# Profile builder

This notebook can be used to generate a profile:
* we combine _N_ profiles from the london database to create one household
* we then add PV power generation, temperature and irradiance from different sources, i.e. the __cases__

In [1]:
import pandas as pd

year = 2012
nb_profiles = 5

df = pd.read_csv('raw/london_smart_meters_firstclean.csv', index_col = 0, parse_dates=True)
df['KWH'] = pd.to_numeric(df['KWH'])
df.head()

Unnamed: 0_level_0,LCLid,KWH
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-12 00:30:00,MAC000002,0.0
2012-10-12 01:00:00,MAC000002,0.0
2012-10-12 01:30:00,MAC000002,0.0
2012-10-12 02:00:00,MAC000002,0.0
2012-10-12 02:30:00,MAC000002,0.0


In [2]:
df = df[df.index.year == year]

df_count = df.groupby(df['LCLid']).count()
profiles = df_count.sort_values(by='KWH', ascending=False).index[0:nb_profiles]
profiles

Index(['MAC000027', 'MAC000019', 'MAC000018', 'MAC000026', 'MAC000029'], dtype='object', name='LCLid')

In [4]:
df_profiles = df[df['LCLid'].isin(profiles)].drop(columns=['LCLid'])

df_profiles = df_profiles.groupby(pd.Grouper(freq='1H')).sum().round(4)
df_profiles = df_profiles.groupby(df_profiles.index).sum()
df_profiles

Unnamed: 0_level_0,KWH
DateTime,Unnamed: 1_level_1
2012-01-01 00:00:00,1.019
2012-01-01 01:00:00,0.976
2012-01-01 02:00:00,0.978
2012-01-01 03:00:00,0.903
2012-01-01 04:00:00,1.013
...,...
2012-12-31 19:00:00,1.552
2012-12-31 20:00:00,1.600
2012-12-31 21:00:00,1.925
2012-12-31 22:00:00,1.619


In [5]:
pvgis = pd.read_csv(f'raw/pvgis_{year}.csv', index_col=0, parse_dates=True)
pvgis.head()

Unnamed: 0_level_0,irradiance,temp,wind
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20120101:0010,0.0,9.01,0.97
20120101:0110,0.0,8.85,0.91
20120101:0210,0.0,8.69,0.86
20120101:0310,0.0,8.53,0.8
20120101:0410,0.0,8.51,0.65


In [6]:

df_profiles['temp'] = pvgis['temp'].values
df_profiles['irradiance'] = pvgis['irradiance'].values
df_profiles.head()

Unnamed: 0_level_0,KWH,temp,irradiance
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01 00:00:00,1.019,9.01,0.0
2012-01-01 01:00:00,0.976,8.85,0.0
2012-01-01 02:00:00,0.978,8.69,0.0
2012-01-01 03:00:00,0.903,8.53,0.0
2012-01-01 04:00:00,1.013,8.51,0.0


In [7]:
df_profiles.to_csv(f'new{year}.csv')