# General Imports


In [None]:
import os
import inspect
import sys
import pandas as pd
import charts
from opengrid_dev import config
c = config.Config()

from opengrid_dev.library import misc, houseprint

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,8

In [None]:
c.opengrid_libdir

## Houseprint

In [None]:
hp = houseprint.Houseprint()
hp.init_tmpo()
hp._tmpos.debug = False

In [None]:
hp.sync_tmpos()

## Create dataframes with minute data for a single year, by sensortype

Only run if needed. Hourly frames can be created by loading these minute pickles. 

In [None]:
for sensortype in ['gas', 
                   'water',
                   'electricity'
                  ]:
    print('Processing {}'.format(sensortype))
    for y in ['2016']:
        print('year {}'.format(y))
        head = pd.Timestamp('{}0101'.format(y), tz='Europe/Brussels')
        tail = pd.Timestamp('{}0101 02:00:00'.format(int(y)+1), tz='Europe/Brussels')
        df = hp.get_data(sensortype=sensortype, head=head, tail=tail, diff=True, resample='min')
        df.rename(columns=lambda x: x[:4], inplace=True)
        df = df.tz_convert('Europe/Brussels')
        path = os.path.join(c.get('data', 'folder'), '{}_{}_min.pkl'.format(sensortype, y))
        df.to_pickle(path, compression='gzip')
        
        # Create a dataset with minute values for the 3 sensors for gas
        if sensortype == 'gas':
            df = df[['313b', 'd5a7', 'ba14']]
            dflim = df.loc[pd.Timestamp('2016-12-05 00:00:00', tz='Europe/Brussels'):pd.Timestamp('2016-12-19 00:00:00', tz='Europe/Brussels')]
            path = os.path.join(c.get('data', 'folder'), '{}_dec2016_min.pkl'.format(sensortype))
            dflim.to_pickle(path, compression='gzip')
   

In [None]:
# Minute values for water for march 2015
head = pd.Timestamp('20150301', tz='Europe/Brussels')
tail = pd.Timestamp('20150401', tz='Europe/Brussels')
df = hp.get_data(sensortype='water', head=head, tail=tail, diff=True, resample='min')
df.rename(columns=lambda x: x[:4], inplace=True)
df = df.tz_convert('Europe/Brussels')
path = os.path.join(c.get('data', 'folder'), 'water_march2015_min.pkl')
df.to_pickle(path, compression='gzip')

In [None]:
## Create dataframes with hourly data
for sensortype in ['water', 'gas', 'electricity']:
    print('Processing {}'.format(sensortype))
    for y in ['2016']:
        print('year {}'.format(y))
        path_min = os.path.join(c.get('data', 'folder'), '{}_{}_min.pkl'.format(sensortype, y))
        df = pd.read_pickle(path_min, compression='gzip')
        # hourly: mean values
        df_hour = df.resample(rule='H').mean()
        # remove uncomplete sensors and sensors we don't want in the test dataset
        for sensor in ['565d']:
            try:
                df_hour = df_hour.drop(labels=[sensor], axis=1)
            except:
                pass    
        df_hour = df_hour.loc[head:pd.Timestamp('{}0101'.format(int(y)+1), tz='Europe/Brussels')]
        df_hour = df_hour.dropna(axis=1, how='any')
        
        try:
            df_hour.plot()
        except:
            print("No full hourly data for {}".format(y))
        
        path_hour = os.path.join(c.get('data', 'folder'), '{}_{}_hour.pkl'.format(sensortype, y))
        df_hour.to_pickle(path_hour, compression='gzip')

## Weather data 

In [None]:
from opengrid_dev.library import forecastwrapper
start = pd.Timestamp('20151225', tz='Europe/Brussels')
end = pd.Timestamp('20170101', tz='Europe/Brussels')


Weather_Ukkel = forecastwrapper.Weather(location='Ukkel', start=start, end=end)

In [None]:
columns = ['GlobalHorizontalIrradiance', 'humidity', 'temperature', 'windSpeed']
df = Weather_Ukkel.hours()[columns]
df.info()

In [None]:
df = df.applymap(float).fillna(value=0)
df.info()

In [None]:
path = os.path.join(c.get('data', 'folder'), 'weather_2016_hour.pkl')
df.to_pickle(path, compression='gzip')