In [4]:
import pandas as pd
import numpy as np
import datetime as dt

# My packages
from packages.thesis_sequence_algorithm_nominmax import thesis_sequence_algorithm
from packages.thesis_model_nominmax import evaluate_model

# Visualization
import matplotlib.pyplot as plt
import time
from dateutil.relativedelta import relativedelta

In [5]:
dataset = pd.read_csv('dataset_final_min.csv', infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
dataset_h = pd.read_csv('dataset_final.csv', infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])

# Thesis Model

- Prepare warm up period (Define quantity)
- Train the ConvLSTM Model by warm-up period initially
- Prepare previous hourly step (Define Quantity)
- Select optimal correlated leg load by the Power Spectrum
- If Correlated leg > Theta
    - Update and reshape the input vector for ConvLSTM model
    - Implement single step ahead forecast
    - Retrain the ConvLSTM model by the observed hourly load of previous predicted time step
- Else 
    - Implement persistence model
    - Retrain the ConvLSTM model by the observed hourly load of previous predicted time step 


In [6]:
dataset_h = dataset_h.loc[:,['Global_active_power','Sub_metering_1','Sub_metering_2','Sub_metering_3','tempC','weekday','time']]
df = dataset_h[(dataset_h.index >= '2008-08-01') & (dataset_h.index < '2010-08-01')]
df

Unnamed: 0_level_0,Global_active_power,Sub_metering_1,Sub_metering_2,Sub_metering_3,tempC,weekday,time
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008-08-01 00:00:00,0.460333,0.0,0.072,0.113,19,0,1
2008-08-01 01:00:00,0.405967,0.0,0.043,0.115,18,0,2
2008-08-01 02:00:00,0.390733,0.0,0.030,0.118,17,0,3
2008-08-01 03:00:00,0.361433,0.0,0.023,0.116,17,0,4
2008-08-01 04:00:00,0.401200,0.0,0.043,0.112,16,0,5
...,...,...,...,...,...,...,...
2010-07-31 19:00:00,0.342800,0.0,0.028,0.100,21,1,20
2010-07-31 20:00:00,0.310967,0.0,0.030,0.104,19,1,21
2010-07-31 21:00:00,0.297867,0.0,0.013,0.101,18,1,22
2010-07-31 22:00:00,0.337333,0.0,0.039,0.104,18,1,23


In [13]:
list_theta = ['convlstm','convlstm']
for i in list_theta:
        start_date = df.index[0]
        end_date = start_date + relativedelta(months = 1)
        while end_date < df.index[-1]:
            start_date = pd.to_datetime(start_date) + relativedelta(months = 1)
            end_date = pd.to_datetime(start_date) + relativedelta(months = 1)
            dataset_filtered = df[(df.index >= start_date) & (df.index < end_date)]
            autocorrelation_type = 'ConvLSTM'
            warm_up_time = 168
            theta_threshold = i
            leg_days_time = 168
            n_steps = 1
            n_length = 1
            n_input = n_steps * n_length
            n_features = df.shape[1]
            n_out = 1
            epochs = 20
            epochs_retrain = 1
            batch_size = 1
            filters = 36
            activation = 'relu'
            loss = 'mse'
            optimizer = 'adam'
            karnel = (1,2)
            dense_1 = 4
            dense_2 = 1
            model_parameters_PSConvLSTM = thesis_sequence_algorithm(dataset_filtered, 
                                                                    autocorrelation_type, 
                                                                    warm_up_time, 
                                                                    theta_threshold, 
                                                                    leg_days_time, 
                                                                    n_steps, 
                                                                    n_length, 
                                                                    n_input, 
                                                                    n_features, 
                                                                    n_out, 
                                                                    epochs, 
                                                                    batch_size, 
                                                                    filters, 
                                                                    activation, 
                                                                    loss, 
                                                                    optimizer,
                                                                    karnel,
                                                                    epochs_retrain,
                                                                    dense_1,
                                                                    dense_2)
            #RUN ALGO
            start_time = time.time()
            predict_list_PSConvLSTM, observed_list_PSConvLSTM = model_parameters_PSConvLSTM.master_algorithm_convlstm()
            scores_PSConvLSTM = evaluate_model(predict_list_PSConvLSTM, observed_list_PSConvLSTM)
            score_PSConvLSTM, scores_PSConvLSTM = scores_PSConvLSTM.evaluate_model()
            time_run = (time.time() - start_time)
            # Read results csv
            teste = pd.read_csv('params_test.csv', index_col= 'Unnamed: 0')
            # Register actual version
            resgister_version = {'start_date': start_date, 'end_date': end_date, 'warm_up_time': warm_up_time, 'leg_days' : leg_days_time,
                                'theta_threshold': theta_threshold, 'leg_days_time': leg_days_time ,'n_length': n_length, 'epochs':epochs, 'filters':filters, 
                                'epochs_retrain': epochs_retrain, 'dense_1': dense_1, 'karnels' : [karnel], 
                                'score': score_PSConvLSTM, 'max_loss': np.max(scores_PSConvLSTM), 'time_run': time_run}
            # Create Data Frame
            resgister_version_df = pd.DataFrame(data = resgister_version, index = [pd.to_datetime(dt.datetime.today())] )
            # Concat and save
            pd.concat([teste, resgister_version_df]).to_csv('params_test.csv')

5.639449119567871
45.98403310775757
6.226763486862183
54.34017848968506
6.820978403091431
49.68009972572327
5.557284832000732
47.62904167175293
5.7190001010894775
46.71410250663757
5.94853401184082
43.19856357574463
5.896388292312622
49.19789218902588
6.478708028793335
46.79299473762512
5.834232330322266
48.000205993652344
5.610048055648804
45.562904596328735
5.532902479171753
49.52644491195679
6.486979722976685
49.02045178413391
5.762521266937256
45.50121212005615
5.765562057495117
45.281899213790894
5.30837345123291
43.3446683883667
5.382407903671265
45.012051582336426
5.594332218170166
47.762853384017944
5.448354244232178
43.08028435707092
5.480658769607544
47.70358228683472
5.868391513824463
44.97753286361694
5.96391224861145
48.11321568489075
6.004065990447998
47.652184009552
5.879003286361694
46.717692375183105
