### This script is a port of EnergyID's code that calculates a linear regression on heating data

# Imports and setup

General imports

In [None]:
import pandas as pd

OpenGrid-specific imports

In [None]:
from opengrid.library import houseprint
from opengrid import config
from opengrid.library import linearregression

c = config.Config()

Plotting settings

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,8

# Load Data

We are going to use gas consumption data and weather data. Because we don't want to overload the weather API, we will only use 1 location (Ukkel).

First, let's define the start and end date of our experiment. Let's take 1 year worth of data, starting with last month.

In [None]:
# If we want to get consumption for 12 months, we will need 13 months of data
end = pd.Timestamp.today().replace(day=1).normalize()
start = (end.replace(year=end.year-1) - pd.Timedelta(days=1))

#start = start.tz_localize('Europe/Brussels')
#end = end.tz_localize('Europe/Brussels')
print(start, end)

## Gas Data

In [None]:
# Load the Houseprint, and sync all data
hp = houseprint.Houseprint()
#hp = houseprint.load_houseprint_from_file('cache_hp.hp')
hp.init_tmpo()
#hp.sync_tmpos()

In [None]:
#hp.save('cache_hp.hp')

In [None]:
def gas_data_generator():
    for gas_sensor in hp.get_sensors(sensortype='gas'):
        df = gas_sensor.get_data(head=start, tail=end, unit='kWh', diff=False)
        df = df.tz_convert('Europe/Brussels')
        df = df.resample('MS')
        df = df.diff().dropna()
        df = df[df>0]
        if df.empty:
            continue
        yield df

Let's have a peek

In [None]:
gas_data = gas_data_generator()

In [None]:
peek = next(gas_data)
#print(peek)
peek.plot()

## Weather Data

Run this block to download the weather data and save it to a pickle. This is a large request, and you can only do 2 or 3 of these per day before your credit with Forecast.io runs out!

TODO: Use the caching library for this.

To get the data run the cell below

In [None]:
from opengrid.library import forecastwrapper
weather = forecastwrapper.Weather(location='Ukkel, Belgium', start=start, end=end)
weather_data = weather.days().resample('MS').sum()

In [None]:
weather_data['heatingDegreeDays16.5'].plot()

# Put data together

We have defined an OpenGrid analysis as a class that takes a single DataFrame as input, so we'll create that dataframe.

I wrote a generator that uses our previously defined generator so you can generate while you generate.

In [None]:
def analysis_data_generator():
    gas_data = gas_data_generator()
    for gas_df in gas_data:
        df = pd.concat([gas_df, weather_data['heatingDegreeDays16.5']], axis=1).dropna()
        df.columns = ['gas', 'degreedays']
        yield df

Let's have another peek

In [None]:
analysis_data = analysis_data_generator()

In [None]:
peek = next(analysis_data)
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
for axis, column, color in zip([ax1, ax2], peek.columns, ['b', 'r']):
    axis.plot_date(peek.index, peek[column], '-', color=color, label=column)
plt.legend()

# Run Regression Analysis

In [None]:
analysis_data = analysis_data_generator()
for data in analysis_data:    
    try:
        analysis = linearregression.LinearRegression(independent=data.degreedays, dependent=data.gas)
    except ValueError as e:
        print(e)

    fig = analysis.plot()
    fig.show()

In [None]:
analysis_data = analysis_data_generator()
for data in analysis_data:
    try:
        analysis = linearregression.LinearRegression3(independent=data.degreedays, dependent=data.gas,
                                                      breakpoint=60, percentage=0.5)
    except ValueError as e:
        print(e)
    fig = analysis.plot()
    fig.show()