In [1]:
import pandas as pd
import numpy as np
import datetime as dt

# My packages
from packages.thesis_sequence_algorithm_nominmax import thesis_sequence_algorithm
from packages.thesis_model_nominmax import evaluate_model

# Visualization
import matplotlib.pyplot as plt
import time
from dateutil.relativedelta import relativedelta

In [2]:
dataset = pd.read_csv('dataset_final_min.csv', infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
dataset_h = pd.read_csv('dataset_final.csv', infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])

# Thesis Model

- Prepare warm up period (Define quantity)
- Train the ConvLSTM Model by warm-up period initially
- Prepare previous hourly step (Define Quantity)
- Select optimal correlated leg load by the Power Spectrum
- If Correlated leg > Theta
    - Update and reshape the input vector for ConvLSTM model
    - Implement single step ahead forecast
    - Retrain the ConvLSTM model by the observed hourly load of previous predicted time step
- Else 
    - Implement persistence model
    - Retrain the ConvLSTM model by the observed hourly load of previous predicted time step 


In [10]:
dataset = dataset.loc[:,['Global_active_power','tempC','weekday','time']]
df_min = dataset[(dataset.index >= '2009-04-01') & (dataset.index < '2009-05-01')]
df_30 = df_min.resample('30min').mean()

In [23]:
list_theta = ['ConvLSTM']
for i in list_theta:
        start_date = '2009-04-01'
        end_date = '2009-05-01'
        dataset_filtered = df_30[(df_30.index >= start_date) & (df_30.index < end_date)]
        autocorrelation_type = 'ConvLSTM'
        algorithm = 'ConvLSTM 30min'
        warm_up_time = 336
        theta_threshold = i
        leg_days_time = 336
        n_steps = 1
        n_length = 1
        n_input = n_steps * n_length
        n_features = dataset_filtered.shape[1]
        n_out = 1
        epochs = 20
        epochs_retrain = 1
        batch_size = 1
        filters = 36
        activation = 'relu'
        loss = 'mse'
        optimizer = 'adam'
        karnel = (1,2)
        dense_1 = 4
        dense_2 = 1
        model_parameters_PSConvLSTM = thesis_sequence_algorithm(dataset_filtered, 
                                                                        autocorrelation_type, 
                                                                        warm_up_time, 
                                                                        theta_threshold, 
                                                                        leg_days_time, 
                                                                        n_steps, 
                                                                        n_length, 
                                                                        n_input, 
                                                                        n_features, 
                                                                        n_out, 
                                                                        epochs, 
                                                                        batch_size, 
                                                                        filters, 
                                                                        activation, 
                                                                        loss, 
                                                                        optimizer,
                                                                        karnel,
                                                                        epochs_retrain,
                                                                        dense_1,
                                                                        dense_2)
        #RUN ALGO
        start_time = time.time()
        predict_list_PSConvLSTM, observed_list_PSConvLSTM = model_parameters_PSConvLSTM.master_algorithm_convlstm()
        scores_PSConvLSTM = evaluate_model(predict_list_PSConvLSTM, observed_list_PSConvLSTM)
        score_PSConvLSTM, scores_PSConvLSTM = scores_PSConvLSTM.evaluate_model()
        time_run = (time.time() - start_time)
        # Read results csv
        teste = pd.read_csv('params_test.csv', index_col= 'Unnamed: 0')
        # Register actual version
        resgister_version = {'start_date': start_date, 'end_date': end_date, 'warm_up_time': warm_up_time, 'leg_days' : leg_days_time,
                            'theta_threshold': theta_threshold, 'leg_days_time': leg_days_time ,'n_length': n_length, 'epochs':epochs, 'filters':filters, 
                            'epochs_retrain': epochs_retrain, 'dense_1': dense_1, 'karnels' : [karnel], 
                            'score': score_PSConvLSTM, 'max_loss': np.max(scores_PSConvLSTM), 'time_run': time_run}
        # Create Data Frame
        resgister_version_df = pd.DataFrame(data = resgister_version, index = [algorithm] )
        # Concat and save
        pd.concat([teste, resgister_version_df]).to_csv('params_test.csv')

14.345156908035278
109.98995971679688


In [16]:
scores_persistence = evaluate_model(df_30['Global_active_power'][335:-1], df_30['Global_active_power'][336:])
score_persistence, scores_persistence_s = scores_persistence.evaluate_model()
score_persistence

0.5534433955985042

In [20]:
np.array(scores_persistence_s).max()

4.1806

In [4]:
for i in range(0,10):    
    list_theta = [0.52, 0.56, 0.60, 0.64, 0.68]
    for i in list_theta:
            start_date = df.index[0]
            end_date = start_date + relativedelta(months = 1)
            while end_date < df.index[-1]:
                start_date = pd.to_datetime(start_date) + relativedelta(months = 1)
                end_date = pd.to_datetime(start_date) + relativedelta(months = 1)
                dataset_filtered = df[(df.index >= start_date) & (df.index < end_date)]
                autocorrelation_type = 'acf'
                algorithm = 'ConvLSTM (Adapted with autoregressive lag being t-1 and higher length)'
                warm_up_time = 168
                theta_threshold = i
                leg_days_time = 168
                n_steps = 1
                n_length = 24
                n_input = n_steps * n_length
                n_features = df.shape[1]
                n_out = 1
                epochs = 20
                epochs_retrain = 1
                batch_size = 1
                filters = 36
                activation = 'relu'
                loss = 'mse'
                optimizer = 'adam'
                karnel = (1,2)
                dense_1 = 4
                dense_2 = 1
                model_parameters_PSConvLSTM = thesis_sequence_algorithm(dataset_filtered, 
                                                                        autocorrelation_type, 
                                                                        warm_up_time, 
                                                                        theta_threshold, 
                                                                        leg_days_time, 
                                                                        n_steps, 
                                                                        n_length, 
                                                                        n_input, 
                                                                        n_features, 
                                                                        n_out, 
                                                                        epochs, 
                                                                        batch_size, 
                                                                        filters, 
                                                                        activation, 
                                                                        loss, 
                                                                        optimizer,
                                                                        karnel,
                                                                        epochs_retrain,
                                                                        dense_1,
                                                                        dense_2)
                #RUN ALGO
                start_time = time.time()
                predict_list_PSConvLSTM, observed_list_PSConvLSTM = model_parameters_PSConvLSTM.master_algorithm_convlstm()
                scores_PSConvLSTM = evaluate_model(predict_list_PSConvLSTM, observed_list_PSConvLSTM)
                score_PSConvLSTM, scores_PSConvLSTM = scores_PSConvLSTM.evaluate_model()
                time_run = (time.time() - start_time)
                # Read results csv
                teste = pd.read_csv('params_test.csv', index_col= 'Unnamed: 0')
                # Register actual version
                resgister_version = {'start_date': start_date, 'end_date': end_date, 'warm_up_time': warm_up_time, 'leg_days' : leg_days_time,
                                    'theta_threshold': theta_threshold, 'leg_days_time': leg_days_time ,'n_length': n_length, 'epochs':epochs, 'filters':filters, 
                                    'epochs_retrain': epochs_retrain, 'dense_1': dense_1, 'karnels' : [karnel], 
                                    'score': score_PSConvLSTM, 'max_loss': np.max(scores_PSConvLSTM), 'time_run': time_run}
                # Create Data Frame
                resgister_version_df = pd.DataFrame(data = resgister_version, index = [algorithm] )
                # Concat and save
                pd.concat([teste, resgister_version_df]).to_csv('params_test.csv')

5.334256649017334
55.28827738761902
7.021195650100708
59.15669584274292
6.088602304458618
51.83251690864563
5.86128044128418
48.201913833618164
5.643917798995972
48.47294044494629
6.181128025054932
44.047576665878296
5.457446336746216
55.28456091880798
6.012292861938477
52.243613958358765
6.611326694488525
51.985827922821045
5.631502151489258
47.88785791397095
6.426384449005127
44.485976219177246
7.153369426727295
46.687654972076416
6.9168243408203125
53.58592414855957
6.3741090297698975
53.05450081825256
6.4519431591033936
57.218974590301514
6.420414209365845
58.180877923965454
6.5415239334106445
58.547393560409546
8.073017835617065
53.1292519569397
8.685153722763062
68.49013805389404
7.118254661560059
53.546995401382446
7.585211515426636
61.76826524734497
7.259969472885132
59.87935256958008
7.091346740722656
47.66328263282776
6.731237173080444
49.0529100894928
6.623762607574463
54.206931352615356
6.284086227416992
52.42994546890259
6.206117153167725
60.06817150115967
6.70212650299072

KeyboardInterrupt: 

# Minute Step

In [3]:
dataset = dataset.loc[:,['Global_active_power','Sub_metering_1','Sub_metering_2','Sub_metering_3','tempC','weekday','time']]
df_min = dataset[(dataset.index >= '2008-08-01') & (dataset.index < '2010-08-01')]
df_min

Unnamed: 0_level_0,Global_active_power,Sub_metering_1,Sub_metering_2,Sub_metering_3,tempC,weekday,time
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008-08-01 00:00:00,0.500,0.0,0.00,0.06,20,0,0
2008-08-01 00:01:00,0.482,0.0,0.00,0.06,20,0,0
2008-08-01 00:02:00,0.502,0.0,0.00,0.00,20,0,0
2008-08-01 00:03:00,0.556,0.0,0.00,0.06,20,0,0
2008-08-01 00:04:00,0.854,0.0,0.06,0.42,20,0,0
...,...,...,...,...,...,...,...
2010-07-31 23:55:00,0.152,0.0,0.00,0.06,18,1,23
2010-07-31 23:56:00,0.152,0.0,0.00,0.00,18,1,23
2010-07-31 23:57:00,0.152,0.0,0.00,0.06,18,1,23
2010-07-31 23:58:00,0.154,0.0,0.00,0.00,18,1,23


In [None]:
list_theta = [500,500]
for i in list_theta:
        start_date = '2009-02-01'
        end_date = '2009-03-01'
        dataset_filtered = df_min[(df_min.index >= start_date) & (df_min.index < end_date)]
        autocorrelation_type = 'ConvLSTM'
        warm_up_time = 10080
        theta_threshold = i
        leg_days_time = 10080
        n_steps = 1
        n_length = 1
        n_input = n_steps * n_length
        n_features = df_min.shape[1]
        n_out = 1
        epochs = 20
        epochs_retrain = 1
        batch_size = 1
        filters = 36
        activation = 'relu'
        loss = 'mse'
        optimizer = 'adam'
        karnel = (1,2)
        dense_1 = 4
        dense_2 = 1
        model_parameters_PSConvLSTM = thesis_sequence_algorithm(dataset_filtered, 
                                                                autocorrelation_type, 
                                                                warm_up_time, 
                                                                theta_threshold, 
                                                                leg_days_time, 
                                                                n_steps, 
                                                                n_length, 
                                                                n_input, 
                                                                n_features, 
                                                                n_out, 
                                                                epochs, 
                                                                batch_size, 
                                                                filters, 
                                                                activation, 
                                                                loss, 
                                                                optimizer,
                                                                karnel,
                                                                epochs_retrain,
                                                                dense_1,
                                                                dense_2)
        #RUN ALGO
        start_time = time.time()
        predict_list_PSConvLSTM, observed_list_PSConvLSTM = model_parameters_PSConvLSTM.master_algorithm_convlstm()
        scores_PSConvLSTM = evaluate_model(predict_list_PSConvLSTM, observed_list_PSConvLSTM)
        score_PSConvLSTM, scores_PSConvLSTM = scores_PSConvLSTM.evaluate_model()
        time_run = (time.time() - start_time)
        # Read results csv
        teste = pd.read_csv('params_test.csv', index_col= 'Unnamed: 0')
        # Register actual version
        resgister_version = {'start_date': start_date, 'end_date': end_date, 'warm_up_time': warm_up_time, 'leg_days' : leg_days_time,
                            'theta_threshold': theta_threshold, 'leg_days_time': leg_days_time ,'n_length': n_length, 'epochs':epochs, 'filters':filters, 
                            'epochs_retrain': epochs_retrain, 'dense_1': dense_1, 'karnels' : [karnel], 
                            'score': score_PSConvLSTM, 'max_loss': np.max(scores_PSConvLSTM), 'time_run': time_run}
        # Create Data Frame
        resgister_version_df = pd.DataFrame(data = resgister_version, index = [pd.to_datetime(dt.datetime.today())] )
        # Concat and save
        pd.concat([teste, resgister_version_df]).to_csv('params_test.csv')