In [1]:
import pandas as pd
import os
import glob

# Specify the path to subfolders with CSV files
main_path = r'C:\ResearchFiles\AllFiles_Cleaned'

# List of buildings to include
selected_buildings = ['Atkinson_Hall', 'EBU3B']

# Recursively search for all CSV files in the subfolders of the selected buildings
all_files = []
for building in selected_buildings:
    building_path = os.path.join(main_path, building)
    all_files.extend(glob.glob(os.path.join(building_path, "*.csv")))

# Combine all files into a single DataFrame
df_list = []
for filename in all_files:
    df = pd.read_csv(filename)
    df['smart_plug'] = os.path.basename(filename).split('_')[1]  # Add a column for the smart plug identifier
    df['building'] = os.path.basename(os.path.dirname(filename))  # Add a column for the building
    df_list.append(df)

# Combine all the dataframes into one
combined_df = pd.concat(df_list, ignore_index=True)

In [2]:
combined_df = combined_df.drop(columns = ['analogInput_2', 'analogInput_4', 'analogInput_5', 'binaryInput_3', 'binaryValue_1', 'building'])

In [3]:
combined_df

Unnamed: 0,time,analogInput_3,smart_plug
0,2023-09-14T18:16,30392.714286,180940
1,2023-09-14T18:17,31416.000000,180940
2,2023-09-14T18:18,30044.000000,180940
3,2023-09-14T18:19,30740.000000,180940
4,2023-09-14T18:20,29952.000000,180940
...,...,...,...
36967007,2024-06-10T23:55,0.000000,944300
36967008,2024-06-10T23:56,0.000000,944300
36967009,2024-06-10T23:57,0.000000,944300
36967010,2024-06-10T23:58,0.000000,944300


In [4]:
# Convert timestamp column to datetime
combined_df['time'] = pd.to_datetime(combined_df['time'])

In [5]:
# Create a new column for the hour
combined_df['15min'] = combined_df['time'].dt.floor('15T')

In [6]:
combined_df

Unnamed: 0,time,analogInput_3,smart_plug,15min
0,2023-09-14 18:16:00,30392.714286,180940,2023-09-14 18:15:00
1,2023-09-14 18:17:00,31416.000000,180940,2023-09-14 18:15:00
2,2023-09-14 18:18:00,30044.000000,180940,2023-09-14 18:15:00
3,2023-09-14 18:19:00,30740.000000,180940,2023-09-14 18:15:00
4,2023-09-14 18:20:00,29952.000000,180940,2023-09-14 18:15:00
...,...,...,...,...
36967007,2024-06-10 23:55:00,0.000000,944300,2024-06-10 23:45:00
36967008,2024-06-10 23:56:00,0.000000,944300,2024-06-10 23:45:00
36967009,2024-06-10 23:57:00,0.000000,944300,2024-06-10 23:45:00
36967010,2024-06-10 23:58:00,0.000000,944300,2024-06-10 23:45:00


In [7]:
# Group by 'building', 'smart_plug', and 'hour', then sum the power readings
min15_df = combined_df.groupby(['smart_plug', '15min']).agg({'analogInput_3': 'sum'}).reset_index()

# Optionally, rename 'power_reading' to 'average_power_usage'
min15_df.rename(columns={'analogInput_3': 'average_power_usage'}, inplace=True)

In [8]:
min15_df

Unnamed: 0,smart_plug,15min,average_power_usage
0,180408,2023-09-14 18:15:00,26075.0
1,180408,2023-09-14 18:30:00,39545.0
2,180408,2023-09-14 18:45:00,192324.0
3,180408,2023-09-14 19:00:00,28517.0
4,180408,2023-09-14 19:15:00,28002.0
...,...,...,...
2480137,944300,2024-06-10 22:45:00,0.0
2480138,944300,2024-06-10 23:00:00,0.0
2480139,944300,2024-06-10 23:15:00,0.0
2480140,944300,2024-06-10 23:30:00,0.0


In [9]:
plug_info = pd.read_csv('helper_spreadsheet(2).csv')

In [10]:
plug_info

Unnamed: 0,smart_plug,building_name,Load_Type,Inspection
0,291824,Atkinson Hall,Computer,disconnected
1,291956,Atkinson Hall,Printer,checked
2,292032,Atkinson Hall,Printer,checked
3,183436,Atkinson Hall,Water Dispenser,disconnected
4,181084,Atkinson Hall,TV,checked
...,...,...,...,...
112,283884,EBU3B,Printer,checked
113,288480,EBU3B,Printer,checked
114,291792,EBU3B,Printer,missing
115,565612,EBU3B,Printer,checked


In [11]:
min15_df['smart_plug'] = min15_df['smart_plug'].astype(str)
plug_info['smart_plug'] = plug_info['smart_plug'].astype(str)

# Merge the aggregated data with the plug info data
merged_15min = pd.merge(min15_df, plug_info, on='smart_plug')
merged_15min = merged_15min[
    (merged_15min['Inspection'] == 'checked') & 
    (~merged_15min['smart_plug'].isin(['299184', '183436', '186204', '634584', '284068', '298808', 
                                       '180552', '944300', '186212', '291792', '283892', '288848', 
                                       '565996', '283884', '287196', '654200', '286460', '182136', 
                                       '297624', '181084', '291824', '284416', '293688', '291080', 
                                       '290240', '297984', '289840', '182552', '291728', '180672', 
                                       '183428', '284556', '285120', '291076', '291740', '183232', 
                                       '284372', '180940', '182268', '182584', '292032', '294576', 
                                       '296488', '298776', '299852', '639140', '651712', '652512', 
                                       '301192', '562240', '566540', '780512']))
]

In [12]:
merged_15min = merged_15min.rename(columns = {'15min': 'Timestamp'})

In [18]:
merged_15min = merged_15min[merged_15min['Load_Type'] == 'Air Purifier']

In [14]:
merged_15min

Unnamed: 0,smart_plug,Timestamp,average_power_usage,building_name,Load_Type,Inspection
1060496,286456,2023-09-14 18:15:00,669143.571429,EBU3B,Air Purifier,checked
1060497,286456,2023-09-14 18:30:00,716445.000000,EBU3B,Air Purifier,checked
1060498,286456,2023-09-14 18:45:00,712120.000000,EBU3B,Air Purifier,checked
1060499,286456,2023-09-14 19:00:00,715228.000000,EBU3B,Air Purifier,checked
1060500,286456,2023-09-14 19:15:00,711481.000000,EBU3B,Air Purifier,checked
...,...,...,...,...,...,...
1083022,286456,2024-06-10 22:45:00,0.000000,EBU3B,Air Purifier,checked
1083023,286456,2024-06-10 23:00:00,0.000000,EBU3B,Air Purifier,checked
1083024,286456,2024-06-10 23:15:00,0.000000,EBU3B,Air Purifier,checked
1083025,286456,2024-06-10 23:30:00,0.000000,EBU3B,Air Purifier,checked


In [15]:
total_building_load = merged_15min.groupby(['building_name', 'Timestamp']).agg({'average_power_usage': 'sum'}).reset_index()

In [16]:
total_building_load

Unnamed: 0,building_name,Timestamp,average_power_usage
0,EBU3B,2023-09-14 18:15:00,669143.571429
1,EBU3B,2023-09-14 18:30:00,716445.000000
2,EBU3B,2023-09-14 18:45:00,712120.000000
3,EBU3B,2023-09-14 19:00:00,715228.000000
4,EBU3B,2023-09-14 19:15:00,711481.000000
...,...,...,...
22526,EBU3B,2024-06-10 22:45:00,0.000000
22527,EBU3B,2024-06-10 23:00:00,0.000000
22528,EBU3B,2024-06-10 23:15:00,0.000000
22529,EBU3B,2024-06-10 23:30:00,0.000000


In [252]:
ebu3b_submetering = pd.read_csv('helper_spreadsheet(4).csv')

In [253]:
ebu3b_submetering = ebu3b_submetering.drop(columns = ['Lights 1st Floor', 'Lights 2nd Floor', 'Lights 3rd Floor', 'Lights 4th Floor', 'WARREN.EBU3B_1st_Floor_E2545#Real Power Mean#kW', 'WARREN.EBU3B_1st_Floor_E2546#Real Power Mean#kW', 'WARREN.EBU3B_1st_Floor_E2548#Real Power Mean#kW', 'WARREN.EBU3B_E2544#Real Power Mean#kW', 'WARREN.EBU3B_Panel_M_E2526#Real Power Mean#kW'])

In [254]:
# Convert timestamp column to datetime
ebu3b_submetering['Timestamp'] = pd.to_datetime(ebu3b_submetering['Timestamp'])

In [255]:
ebu3b_submetering

Unnamed: 0,Timestamp,Elevator,Total Lights,Total Servers
0,2023-01-01 00:15:00,0.629064,7.893459,102.036439
1,2023-01-01 00:30:00,0.658419,7.896329,102.235192
2,2023-01-01 00:45:00,0.504297,7.924650,102.482906
3,2023-01-01 01:00:00,5.773901,8.140687,102.294906
4,2023-01-01 01:15:00,5.650762,7.918884,101.985907
...,...,...,...,...
54521,2024-07-21 23:30:00,0.838737,11.850697,94.435776
54522,2024-07-21 23:45:00,4.860091,11.810819,94.522451
54523,2024-07-22 00:00:00,6.950442,11.724997,94.519934
54524,NaT,,0.000000,0.000000


In [260]:
both_merged_15min = pd.merge(total_building_load, ebu3b_submetering, on='Timestamp')

In [261]:
both_merged_15min['average_power_usage'] = both_merged_15min['average_power_usage'] / 1_000_000

In [262]:
both_merged_15min

Unnamed: 0,building_name,Timestamp,average_power_usage,Elevator,Total Lights,Total Servers
0,EBU3B,2023-09-14 18:15:00,0.669144,1.962154,21.980021,83.907898
1,EBU3B,2023-09-14 18:30:00,0.716445,5.825698,21.541027,83.989771
2,EBU3B,2023-09-14 18:45:00,0.712120,2.009048,20.936879,83.645997
3,EBU3B,2023-09-14 19:00:00,0.715228,3.713328,21.466072,83.340961
4,EBU3B,2023-09-14 19:15:00,0.711481,2.007112,22.291043,83.441196
...,...,...,...,...,...,...
22526,EBU3B,2024-06-10 22:45:00,0.000000,2.471272,17.096050,90.355329
22527,EBU3B,2024-06-10 23:00:00,0.000000,3.017766,17.677648,90.254372
22528,EBU3B,2024-06-10 23:15:00,0.000000,5.134647,17.154149,90.496993
22529,EBU3B,2024-06-10 23:30:00,0.000000,4.824259,16.606649,90.396309


In [263]:
both_merged_15min['average_power_usage'] = both_merged_15min['average_power_usage'] / 4

In [264]:
both_merged_15min['Elevator'] = both_merged_15min['Elevator'] / 4

In [265]:
both_merged_15min['Total Lights'] = both_merged_15min['Total Lights'] / 4

In [266]:
both_merged_15min['Total Servers'] = both_merged_15min['Total Servers'] / 4

In [267]:
both_merged_15min = both_merged_15min[both_merged_15min['building_name'] == 'EBU3B']

In [268]:
both_merged_15min

Unnamed: 0,building_name,Timestamp,average_power_usage,Elevator,Total Lights,Total Servers
0,EBU3B,2023-09-14 18:15:00,0.167286,0.490538,5.495005,20.976975
1,EBU3B,2023-09-14 18:30:00,0.179111,1.456425,5.385257,20.997443
2,EBU3B,2023-09-14 18:45:00,0.178030,0.502262,5.234220,20.911499
3,EBU3B,2023-09-14 19:00:00,0.178807,0.928332,5.366518,20.835240
4,EBU3B,2023-09-14 19:15:00,0.177870,0.501778,5.572761,20.860299
...,...,...,...,...,...,...
22526,EBU3B,2024-06-10 22:45:00,0.000000,0.617818,4.274012,22.588832
22527,EBU3B,2024-06-10 23:00:00,0.000000,0.754441,4.419412,22.563593
22528,EBU3B,2024-06-10 23:15:00,0.000000,1.283662,4.288537,22.624248
22529,EBU3B,2024-06-10 23:30:00,0.000000,1.206065,4.151662,22.599077


In [269]:
building_metering = pd.read_csv('building_metering.csv')

In [270]:
building_metering = building_metering.drop(columns = ['WARREN.CAL_IT_E2531#Real Power Mean#kW', 'WARREN.CAL_IT_E2532#Real Power Mean#kW', 'WARREN.CAL_IT_Gate_E2530#Real Power Mean#kW', 'WARREN.EBU3B_E2520#Real Power Mean#kW', 'WARREN.EBU3B_E2521#Real Power Mean#kW', 'Atkinson total'])

In [271]:
# Convert timestamp column to datetime
building_metering['Timestamp'] = pd.to_datetime(building_metering['Timestamp'])

In [272]:
building_metering

Unnamed: 0,Timestamp,EBU3B total
0,2023-01-01 00:15:00,312.613174
1,2023-01-01 00:30:00,313.511780
2,2023-01-01 00:45:00,317.042526
3,2023-01-01 01:00:00,318.071701
4,2023-01-01 01:15:00,316.035248
...,...,...
54521,2024-07-21 23:30:00,397.910767
54522,2024-07-21 23:45:00,396.301117
54523,2024-07-22 00:00:00,394.709076
54524,NaT,0.000000


In [273]:
all_merged_15min = pd.merge(both_merged_15min, building_metering, on='Timestamp')

In [274]:
all_merged_15min

Unnamed: 0,building_name,Timestamp,average_power_usage,Elevator,Total Lights,Total Servers,EBU3B total
0,EBU3B,2023-09-14 18:15:00,0.167286,0.490538,5.495005,20.976975,311.203552
1,EBU3B,2023-09-14 18:30:00,0.179111,1.456425,5.385257,20.997443,303.462402
2,EBU3B,2023-09-14 18:45:00,0.178030,0.502262,5.234220,20.911499,303.112823
3,EBU3B,2023-09-14 19:00:00,0.178807,0.928332,5.366518,20.835240,307.819702
4,EBU3B,2023-09-14 19:15:00,0.177870,0.501778,5.572761,20.860299,304.548492
...,...,...,...,...,...,...,...
22534,EBU3B,2024-06-10 22:45:00,0.000000,0.617818,4.274012,22.588832,372.360016
22535,EBU3B,2024-06-10 23:00:00,0.000000,0.754441,4.419412,22.563593,381.564133
22536,EBU3B,2024-06-10 23:15:00,0.000000,1.283662,4.288537,22.624248,385.190674
22537,EBU3B,2024-06-10 23:30:00,0.000000,1.206065,4.151662,22.599077,381.697632


In [275]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for air purifiers specifically is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for air purifiers specifically is: 0.06660529713037946


In [246]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for water dispensers specifically is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for water dispensers specifically is: 0.05071571669192818


In [217]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for printers specifically is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for printers specifically is: 0.03733126309712079


In [188]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for TVs specifically is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for TVs specifically is: -0.042086105304801814


In [159]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for computers specifically is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering for computers specifically is: 0.20477201193802738


In [129]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the lights submetering is: 0.09226563085375943


In [66]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Elevator'].corr(all_merged_15min['average_power_usage'])

print(f"The Pearson correlation coefficient between the average plug load in EBU3B and the elevator submetering is: {correlation}")

The Pearson correlation coefficient between the average plug load in EBU3B and the elevator submetering is: -0.04008337272749189


In [67]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Elevator'].corr(all_merged_15min['Total Lights'])

print(f"The Pearson correlation coefficient between the lights submetering in EBU3B and the elevator submetering is: {correlation}")

The Pearson correlation coefficient between the lights submetering in EBU3B and the elevator submetering is: 0.6190652712882163


In [68]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Elevator'].corr(all_merged_15min['EBU3B total'])

print(f"The Pearson correlation coefficient between the elevator submetering and the total power consumption in EBU3B is: {correlation}")

The Pearson correlation coefficient between the elevator submetering and the total power consumption in EBU3B is: 0.3009439064501389


In [69]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Lights'].corr(all_merged_15min['EBU3B total'])

print(f"The Pearson correlation coefficient between the lights submetering and the total power consumption in EBU3B is: {correlation}")

The Pearson correlation coefficient between the lights submetering and the total power consumption in EBU3B is: 0.06017390607190594


In [70]:
# Calculate the Pearson correlation coefficient
correlation = all_merged_15min['Total Servers'].corr(all_merged_15min['EBU3B total'])

print(f"The Pearson correlation coefficient between the servers submetering and the total power consumption in EBU3B is: {correlation}")

The Pearson correlation coefficient between the servers submetering and the total power consumption in EBU3B is: 0.2466114495551074
