In [1]:
import pandas as pd
import time
import psutil
import os

process = psutil.Process(os.getpid())
cpu_cores = psutil.cpu_count(logical=True)

process.cpu_percent(interval=None)

mem_before = process.memory_info().rss / (1024 ** 2)  # MB
cpu_start = process.cpu_times().user + process.cpu_times().system
start_time = time.time()

df_rename_config_sem = pd.read_excel(
    "abfss://FabricCICDQA@onelake.dfs.fabric.microsoft.com/SemanticModelConfig.Lakehouse/Files/sales.xlsx",
    sheet_name="Sheet1"
)
end_time = time.time()
cpu_end = process.cpu_times().user + process.cpu_times().system
mem_after = process.memory_info().rss / (1024 ** 2)  # MB

rename_mapping_data = df_rename_config_sem.to_dict(orient="records")
df_rename_config = pd.DataFrame(rename_mapping_data)

elapsed = end_time - start_time
cpu_used = cpu_end - cpu_start
cus_consumed = (cpu_used * cpu_cores) / 3600  

print("Read Successfully ")
print(f"Time Taken: {elapsed:.4f} sec")
print(f"CUs Consumed: {cus_consumed:.6f} approx")
print(f"CPU Time Used: {cpu_used:.4f} sec")
print(f"Memory Used: {mem_after - mem_before:.2f} MB")
print(f"Data Size: {df_rename_config.shape[0]} rows × {df_rename_config.shape[1]} cols")


Read Successfully 
Time Taken: 6.1551 sec
CUs Consumed: 0.000256 approx
CPU Time Used: 0.4600 sec
Memory Used: 18.09 MB
Data Size: 6 rows × 5 cols


In [2]:
import pandas as pd
import time
import psutil
import re
import os

process = psutil.Process(os.getpid())
cpu_cores = psutil.cpu_count(logical=True)

mem_before = process.memory_info().rss / (1024 ** 2)  # MB
cpu_start = process.cpu_times().user + process.cpu_times().system
start_time = time.time()

df_rename_config['New Column'] = df_rename_config['Type'] * 2
df_rename_config['Current Name'].fillna('Unknown', inplace=True)

dq_report = {}
dq_report["Null Values"] = df_rename_config.isnull().sum().to_dict()
dq_report["Duplicate Rows"] = df_rename_config.duplicated().sum()

end_time = time.time()
cpu_end = process.cpu_times().user + process.cpu_times().system
mem_after = process.memory_info().rss / (1024 ** 2)  # MB

elapsed = end_time - start_time
cpu_used = cpu_end - cpu_start
cus_consumed = (cpu_used * cpu_cores) / 3600  

print("Data After Manipulation:")
print(df_rename_config.head())

print("DQ Report (Python):", dq_report)

print(f"Time Taken: {elapsed:.4f} sec")
print(f"CUs Consumed: {cus_consumed:.6f} approx")
print(f"CPU Time Used: {cpu_used:.4f} sec")
print(f"Memory Used: {mem_after - mem_before:.2f} MB")
print(f"Data Size: {df_rename_config.shape[0]} rows × {df_rename_config.shape[1]} cols")


Data After Manipulation:
     Type      Table Name    Current Name      New Name  \
0   Table      Fact Table      Fact Table     SalesFact   
1   Table  Customer Table  Customer Table   CustomerDim   
2  Column       SalesFact           Sales   SalesAmount   
3  Column       SalesFact       YearMonth   Year_Period   
4  Column     CustomerDim        CustName  CustomerName   

                Description    New Column  
0          Sales Fact Table    TableTable  
1  Customer Dimension Table    TableTable  
2              Sales Amount  ColumnColumn  
3         Year Month Period  ColumnColumn  
4             Customer Name  ColumnColumn  
DQ Report (Python): {'Null Values': {'Type': 0, 'Table Name': 0, 'Current Name': 0, 'New Name': 0, 'Description': 0, 'New Column': 0}, 'Duplicate Rows': 0}
Time Taken: 0.0710 sec
CUs Consumed: 0.000006 approx
CPU Time Used: 0.0100 sec
Memory Used: 0.00 MB
Data Size: 6 rows × 6 cols
