In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import os

In [2]:
def range_datetime(data : pd.DataFrame):
    time = 'timestamp'if 'timestamp' in data.columns else 'datetime'
    print(data[time].iloc[0] + " - "+ data[time].iloc[-1])

In [3]:
m11 = pd.read_csv("dataset/meter1_phase1.csv")
m12 = pd.read_csv("dataset/meter1_phase2.csv")
m13 = pd.read_csv("dataset/meter1_phase3.csv")
m21 = pd.read_csv("dataset/meter2_phase1.csv")
m22 = pd.read_csv("dataset/meter2_phase2.csv")
m23 = pd.read_csv("dataset/meter2_phase3.csv")
weather = pd.read_csv("dataset/Weather.csv")

### Look all information.

In [4]:
path, dirs, files = next(os.walk("dataset"))
for _,name in  enumerate(files):
    print(f"File {_} : {name}")

File 0 : meter1_phase2.csv
File 1 : meter2_phase2.csv
File 2 : Weather.csv
File 3 : meter1_phase1.csv
File 4 : meter2_phase3.csv
File 5 : meter1_phase3.csv
File 6 : people.csv
File 7 : meter2_phase1.csv


### Preprocessing Weather data.

In [5]:
weather["Date"] = pd.to_datetime(weather['Date'])
weather["timestamp"] = weather["Date"].astype(str) + " " + weather["Time"].astype(str)
weather['timestamp'] = pd.to_datetime(weather['timestamp'])
weather['Temp'] = weather['Temp'].map(lambda x: x.lstrip('+-').rstrip('aAbBcC%° ')).str.strip()
weather['Humidity'] = weather['Humidity'].map(lambda x: x.lstrip('+-').rstrip('aAbBcC%')).str.strip()

In [6]:
weather.drop(['Condition','Wind', 'Direction', 'Barometer', 'Barometer', 'Visibility', 'Date', 'Time'], axis = 1, inplace = True)

In [7]:
weather = weather.set_index('timestamp')
weather['Temp'] = pd.to_numeric(weather['Temp']) 
weather['Humidity'] = pd.to_numeric(weather['Humidity']) 
weather.head()

Unnamed: 0_level_0,Temp,Humidity
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-12-19 01:00:00,23,79
2020-12-19 04:00:00,21,83
2020-12-19 07:00:00,21,84
2020-12-19 10:00:00,23,72
2020-12-19 13:00:00,28,54


In [8]:
# weather.to_csv('cleandata/c_weather.csv', index = True)

### Preprocessing Energy data.

In [9]:
m11.rename(columns = {'energy': 'energy1', 'power': '1powerA'}, inplace = True)
m12.rename(columns = {'power': '1powerB'}, inplace = True)
m13.rename(columns = {'power': '1powerC'}, inplace = True)

m21.rename(columns = {'energy': 'energy2', 'power': '2powerA'}, inplace = True)
m22.rename(columns = {'power': '2powerB'}, inplace = True)
m23.rename(columns = {'power': '2powerC'}, inplace = True)

In [10]:
Total_Energy = m11['energy1'].iloc[-1] - m11['energy1'].iloc[0]
Total_Bill1= Total_Energy * 4.0

In [11]:
Total_Energy = m21['energy2'].iloc[-1] - m21['energy2'].iloc[0]
Total_Bill2= Total_Energy * 4.0

In [12]:
print("Total Bill : ",Total_Bill1 + Total_Bill2, 'Baht.-')

Total Bill :  89413.625 Baht.-


### Drop useless columns

In [13]:
m11.head()

Unnamed: 0,timestamp,energy_reactive,current,power_reactive,energy_reactive_to_grid,voltage,energy1,energy_to_grid,1powerA,power_factor
0,2020-12-19 00:00:27,37268.03125,24.109783,-1.756851,6832.749023,403.224792,140042.203125,0,5.314347,0.949463
1,2020-12-19 00:00:57,37268.03125,24.554781,-1.800468,6832.780762,402.509125,140042.296875,0,5.400183,0.948662
2,2020-12-19 00:01:27,37268.03125,16.898428,-1.738132,6832.790039,402.380371,140042.40625,0,3.506593,0.895972
3,2020-12-19 00:01:57,37268.03125,16.988962,-1.746844,6832.799805,402.8815,140042.5,0,3.533533,0.89644
4,2020-12-19 00:02:28,37268.03125,17.363531,-1.746971,6832.809082,402.580475,140042.59375,0,3.625191,0.900855


In [14]:
m11 = m11.loc[:, m11.columns.intersection(['timestamp','energy1', '1powerA'])]
m12 = m12.loc[:, m12.columns.intersection(['timestamp', '1powerB'])]
m13 = m13.loc[:, m13.columns.intersection(['timestamp', '1powerC'])]

m21 = m21.loc[:, m21.columns.intersection(['timestamp','energy2', '2powerA'])]
m22 = m22.loc[:, m22.columns.intersection(['timestamp', '2powerB'])]
m23 = m23.loc[:, m23.columns.intersection(['timestamp', '2powerC'])]

In [15]:
m11.head()

Unnamed: 0,timestamp,energy1,1powerA
0,2020-12-19 00:00:27,140042.203125,5.314347
1,2020-12-19 00:00:57,140042.296875,5.400183
2,2020-12-19 00:01:27,140042.40625,3.506593
3,2020-12-19 00:01:57,140042.5,3.533533
4,2020-12-19 00:02:28,140042.59375,3.625191


In [16]:
m11.describe()

Unnamed: 0,energy1,1powerA
count,72446.0,72446.0
mean,143093.745012,3.545585
std,1764.910862,1.990561
min,140042.203125,0.037927
25%,141292.316406,2.064173
50%,143196.015625,3.40152
75%,144810.96875,4.727881
max,145398.203125,13.00934


### Merge 6 table, and make frequency of data every 15 minutes

In [17]:
m11['timestamp'] = pd.to_datetime(m11['timestamp'])
m12['timestamp'] = pd.to_datetime(m12['timestamp'])
m13['timestamp'] = pd.to_datetime(m13['timestamp'])
m1 = pd.merge_asof(m11, m12, on='timestamp', tolerance=pd.Timedelta("30ms"))
m1 = pd.merge_asof(m1, m13, on='timestamp', tolerance=pd.Timedelta("30ms"))
m1 = m1.groupby(pd.Grouper(freq='15T',key='timestamp')).mean()

In [18]:
m21['timestamp'] = pd.to_datetime(m21['timestamp'])
m22['timestamp'] = pd.to_datetime(m22['timestamp'])
m23['timestamp'] = pd.to_datetime(m23['timestamp'])
m2 = pd.merge_asof(m21, m22, on='timestamp', tolerance=pd.Timedelta("30ms"))
m2 = pd.merge_asof(m2, m23, on='timestamp', tolerance=pd.Timedelta("30ms"))
m2 = m2.groupby(pd.Grouper(freq='15T',key='timestamp')).mean()

In [19]:
Ans = pd.merge_asof(m1, m2, on='timestamp', tolerance=pd.Timedelta("30ms"))
Ans = Ans.sort_values(by="timestamp")
Ans['timestamp'] = Ans['timestamp'].dt.floor('Min')
Ans.fillna(0, inplace = True)
Ans

Unnamed: 0,timestamp,energy1,1powerA,1powerB,1powerC,energy2,2powerA,2powerB,2powerC
0,2020-12-19 00:00:00,140043.391146,3.255264,1.724024,4.219208,304480.550000,9.229886,9.135354,7.701414
1,2020-12-19 00:15:00,140045.516146,2.812916,1.777869,3.784695,304487.406250,9.877761,9.159706,9.134663
2,2020-12-19 00:30:00,140047.771875,2.818559,1.857305,5.051593,304494.794792,11.516459,10.269925,9.393754
3,2020-12-19 00:45:00,140049.958854,3.025429,1.750408,3.142562,304502.237500,11.469462,8.602655,8.568154
4,2020-12-19 01:00:00,140051.855729,2.585936,1.698523,3.203603,304509.079167,10.091112,8.232744,8.462210
...,...,...,...,...,...,...,...,...,...
2466,2021-01-13 16:30:00,145397.501562,0.613766,0.000000,0.000000,321467.808333,1.384582,1.098190,0.000000
2467,2021-01-13 16:45:00,145397.653533,0.801505,0.000000,0.000000,321468.691667,1.390026,1.098095,0.000000
2468,2021-01-13 17:00:00,145397.819111,0.692847,0.000000,0.190294,321469.573958,1.394100,1.100963,0.000000
2469,2021-01-13 17:15:00,145398.007212,0.603717,0.000000,0.190307,321470.480208,1.623777,1.326716,0.000000


In [20]:
Ans = Ans.merge(weather, on='timestamp', how='left')
Ans

Unnamed: 0,timestamp,energy1,1powerA,1powerB,1powerC,energy2,2powerA,2powerB,2powerC,Temp,Humidity
0,2020-12-19 00:00:00,140043.391146,3.255264,1.724024,4.219208,304480.550000,9.229886,9.135354,7.701414,,
1,2020-12-19 00:15:00,140045.516146,2.812916,1.777869,3.784695,304487.406250,9.877761,9.159706,9.134663,,
2,2020-12-19 00:30:00,140047.771875,2.818559,1.857305,5.051593,304494.794792,11.516459,10.269925,9.393754,,
3,2020-12-19 00:45:00,140049.958854,3.025429,1.750408,3.142562,304502.237500,11.469462,8.602655,8.568154,,
4,2020-12-19 01:00:00,140051.855729,2.585936,1.698523,3.203603,304509.079167,10.091112,8.232744,8.462210,23.0,79.0
...,...,...,...,...,...,...,...,...,...,...,...
2466,2021-01-13 16:30:00,145397.501562,0.613766,0.000000,0.000000,321467.808333,1.384582,1.098190,0.000000,,
2467,2021-01-13 16:45:00,145397.653533,0.801505,0.000000,0.000000,321468.691667,1.390026,1.098095,0.000000,,
2468,2021-01-13 17:00:00,145397.819111,0.692847,0.000000,0.190294,321469.573958,1.394100,1.100963,0.000000,,
2469,2021-01-13 17:15:00,145398.007212,0.603717,0.000000,0.190307,321470.480208,1.623777,1.326716,0.000000,,


### Fill nan value

In [21]:
Ans['Temp'].fillna(method='ffill', inplace=True)
Ans['Temp'].fillna(method='bfill', inplace=True)
Ans['Humidity'].fillna(method='ffill', inplace=True)
Ans['Humidity'].fillna(method='bfill', inplace=True)

In [22]:
Ans.head()

Unnamed: 0,timestamp,energy1,1powerA,1powerB,1powerC,energy2,2powerA,2powerB,2powerC,Temp,Humidity
0,2020-12-19 00:00:00,140043.391146,3.255264,1.724024,4.219208,304480.55,9.229886,9.135354,7.701414,23.0,79.0
1,2020-12-19 00:15:00,140045.516146,2.812916,1.777869,3.784695,304487.40625,9.877761,9.159706,9.134663,23.0,79.0
2,2020-12-19 00:30:00,140047.771875,2.818559,1.857305,5.051593,304494.794792,11.516459,10.269925,9.393754,23.0,79.0
3,2020-12-19 00:45:00,140049.958854,3.025429,1.750408,3.142562,304502.2375,11.469462,8.602655,8.568154,23.0,79.0
4,2020-12-19 01:00:00,140051.855729,2.585936,1.698523,3.203603,304509.079167,10.091112,8.232744,8.46221,23.0,79.0


In [24]:
Ans.to_csv('Outcome.csv', index = True)

### People data

In [25]:
people = pd.read_csv("dataset/people.csv")
people['datetime'] = pd.to_datetime(people['datetime'])
people

Unnamed: 0,datetime,zone1,zone2,zone3,zone4
0,2020-12-15 22:01:44,0,2,0,3
1,2020-12-15 22:03:32,0,0,0,3
2,2020-12-15 22:05:09,0,0,0,4
3,2020-12-15 22:06:45,0,0,0,4
4,2020-12-15 22:08:15,0,0,0,2
...,...,...,...,...,...
3624,2021-01-11 09:40:12,2,2,0,2
3625,2021-01-11 09:41:50,0,2,0,0
3626,2021-01-11 09:43:09,0,2,0,0
3627,2021-01-11 10:17:42,0,0,0,2


In [26]:
people = people.groupby(pd.Grouper(freq='d',key='datetime')).mean()
Apeople = pd.DataFrame()
Apeople['Total_people'] = people.zone1 + people.zone2 + people.zone3 + people.zone4

### For Power Bi

In [27]:
# Ans = Ans.resample('d', on='timestamp').mean().dropna(how='all')

In [28]:
# Ans['Day'] = Ans.index.day
# Ans['Month'] = Ans.index.month
# Ans['hour'] = Ans.index.hour
# Ans['min'] = Ans.index.minute

In [29]:
Ans['Total_Power'] = Ans['1powerA'] + Ans['1powerB'] + Ans['1powerC'] + Ans['2powerA'] + Ans['2powerB'] + Ans['2powerC']

In [30]:
Ans['Bill'] = Ans['Total_Power'] * 24 * 4

In [31]:
initial = Ans['energy1'].iloc[0] + Ans['energy2'].iloc[0]
initial

444523.94114583335

In [32]:
Ans['Total_Energy'] = Ans['energy1'] + Ans['energy2'] - initial

In [33]:
Ans

Unnamed: 0,timestamp,energy1,1powerA,1powerB,1powerC,energy2,2powerA,2powerB,2powerC,Temp,Humidity,Total_Power,Bill,Total_Energy
0,2020-12-19 00:00:00,140043.391146,3.255264,1.724024,4.219208,304480.550000,9.229886,9.135354,7.701414,23.0,79.0,35.265151,3385.454523,0.000000
1,2020-12-19 00:15:00,140045.516146,2.812916,1.777869,3.784695,304487.406250,9.877761,9.159706,9.134663,23.0,79.0,36.547610,3508.570593,8.981250
2,2020-12-19 00:30:00,140047.771875,2.818559,1.857305,5.051593,304494.794792,11.516459,10.269925,9.393754,23.0,79.0,40.907596,3927.129181,18.625521
3,2020-12-19 00:45:00,140049.958854,3.025429,1.750408,3.142562,304502.237500,11.469462,8.602655,8.568154,23.0,79.0,36.558670,3509.632336,28.255208
4,2020-12-19 01:00:00,140051.855729,2.585936,1.698523,3.203603,304509.079167,10.091112,8.232744,8.462210,23.0,79.0,34.274128,3290.316313,36.993750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,2021-01-13 16:30:00,145397.501562,0.613766,0.000000,0.000000,321467.808333,1.384582,1.098190,0.000000,21.0,58.0,3.096538,297.267665,22341.368750
2467,2021-01-13 16:45:00,145397.653533,0.801505,0.000000,0.000000,321468.691667,1.390026,1.098095,0.000000,21.0,58.0,3.289626,315.804128,22342.404053
2468,2021-01-13 17:00:00,145397.819111,0.692847,0.000000,0.190294,321469.573958,1.394100,1.100963,0.000000,21.0,58.0,3.378202,324.307428,22343.451923
2469,2021-01-13 17:15:00,145398.007212,0.603717,0.000000,0.190307,321470.480208,1.623777,1.326716,0.000000,21.0,58.0,3.744518,359.473731,22344.546274


In [34]:
Ans = Ans.resample('d', on='timestamp').mean().dropna(how='all')

In [35]:
Ans

Unnamed: 0_level_0,energy1,1powerA,1powerB,1powerC,energy2,2powerA,2powerB,2powerC,Temp,Humidity,Total_Power,Bill,Total_Energy
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-12-19,140161.659058,4.234113,2.694683,3.937955,304921.719935,16.839585,12.184673,12.359746,24.375,70.416667,52.250755,5016.072524,559.437847
2020-12-20,140428.035037,4.422865,0.477503,3.695493,305925.793368,16.234833,12.270182,11.961021,22.458333,73.375,49.061896,4709.942041,1829.887259
2020-12-21,140711.093556,4.253141,0.0,3.475088,306834.831223,13.154028,10.822928,11.872286,21.875,56.791667,43.57747,4183.437076,3021.983633
2020-12-22,140956.903352,3.5865,0.0,3.340292,307654.140648,12.191691,9.298458,10.220017,19.916667,60.375,38.636957,3709.147908,4087.102854
2020-12-23,139708.559681,3.252361,0.0,3.182525,305168.814787,10.765692,8.979339,9.260268,19.333333,68.791667,35.440186,3402.25784,353.433323
2020-12-24,141399.426717,3.446294,0.0,3.102851,309097.54638,12.096298,9.945538,9.687798,20.208333,67.083333,38.278779,3674.762804,5973.031951
2020-12-25,132799.332106,4.300166,0.0,4.145159,290502.220903,11.971994,9.702631,8.841125,21.166667,68.916667,38.961074,3740.263118,-21222.388137
2020-12-26,131606.312487,4.525475,0.0,3.662754,288030.04611,12.895971,10.432744,9.875908,24.0,68.416667,41.392852,3973.713802,-24887.582549
2020-12-27,137823.433827,5.965896,0.0,4.949633,301840.341584,15.559503,12.356949,0.861798,25.125,71.041667,39.693779,3810.602828,-4860.165735
2020-12-28,133714.472053,4.666896,0.0,4.167851,293012.736886,14.414483,11.811853,0.0,24.916667,67.041667,35.061083,3365.863922,-17796.732207


In [36]:
Ans.Bill.sum()

68702.93464311391

In [37]:
Ans.to_csv('OutcomeBI.csv', index = True)