In [1]:
import sys 

import numpy as np 
from numpy.random import randn
from numpy.random import randint
from numpy import expand_dims
from numpy import zeros
from numpy import ones

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv), data manipulation as in SQL
import matplotlib.pyplot as plt # this is used for the plot the graph 
import seaborn as sns # used for plot interactive graph. 

# learning models 
from sklearn.model_selection import train_test_split # to split the data into two parts
from sklearn.model_selection import KFold # use for cross validation
from sklearn.preprocessing import StandardScaler # for normalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline # pipeline making
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics # for the check the error and accuracy of the model
from sklearn.metrics import mean_squared_error,r2_score

## for Deep-learing:
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import SGD, Adam 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
import itertools
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import Dropout, Flatten, Reshape

Using TensorFlow backend.


In [2]:
df = pd.read_csv('TimeForecasting/data/household_power_consumption.txt', sep=';',
                parse_dates={'dt':['Date', 'Time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='dt')

In [3]:
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [12]:
df_global_active_power = df.Global_active_power
df_global_active_power.head()

dt
2006-12-16 17:24:00    4.216
2006-12-16 17:25:00    5.360
2006-12-16 17:26:00    5.374
2006-12-16 17:27:00    5.388
2006-12-16 17:28:00    3.666
Name: Global_active_power, dtype: float64

In [28]:
def timeseries_to_supervised(data, lag=1):
    """
    This method will create 2 column one that lags the other 
    """
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    # this line needs to be made more general for the different columns names
    col_name = df.columns[0]
    df.columns = ['{}_previous'.format(col_name),col_name]
    df.dropna(inplace=True)
    return df

In [58]:
def scale(data):
    """
    Method to scale the data to [-1, 1]
    Inputs:
        data
    Output: 
        scaler: scaler instance 
        data scaled in range [-1, 1]
    """
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(data)
    
    # transform train data
    values = data.values
    data_scaled = scaler.transform(values)
    
    return scaler, data_scaled

def inverse_scale(scaler, X, value):
    # fix this code and use 
    """
    Method to reverse the data scaling the data to [-1, 1]
    and the forecasted value
    Inputs:
        scaler object
        scaled_data: scaled X value
        value: predicted y value 
    Output: 
        data with scaling removed
    """
    # fix the first 3 lines
    new_row = np.concatenate(X, value)#[x for x in X]+ [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

In [71]:
def fit_lstm(train_X, train_y,  epochs=20, batch_size=70):
    """
       Method that define  and fit the LSTM model
       Args:
           train_X
           train_y
           epochs
           batch_size
        Returns:
            trained model
    """
    predication_model = Sequential()
    predication_model.add(LSTM(100, input_shape=(train_X.shape[1], train_X.shape[2])))
    predication_model.add(Dropout(0.2))
    predication_model.add(Dense(1))
    predication_model.compile(loss = 'mse', optimizer='adam')

    # fit network
    history = predication_model.fit(train_X, train_y, epochs=epochs, batch_size=batch_size, validation_data=(test_X, test_y), verbose=2, shuffle=False)
    
    return predication_model

def forecast_lstm(model,test_X):
    """
       Method that define  and fit the LSTM model
       Args:
           test_data
           model
           batch_size
        Returns:
            forecasted time point
    """
    yhat = model.predict(test_X)
    return yhat

def model_performance(y, yhat):
    """
       Method that summarize the performance of the model 
       Args:

        Returns: rmse
            
    """
    return sqrt(mean_squared_error(y, yhat))

    

In [None]:
# convert time series to supervised

def series_to_supervised(data,n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    # input sequence
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
   
    # forecast sequence(t, t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    agg = pd.concat(cols, axis =1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def inverse_transformation(scaler, X, y, num_features):
    """
    TODO:
        This method inverse the reshaping, scaling, timeseries_to_supervised conversion
        to restore the data to the original shape
        Args:
            data
        Returns:
            restored data to preprocessing
    """
    X = reshape((X.shape[0], num_features))
    inv_y = np.concatenate((y, X[:, 1-num_features:]), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,0]
    return inv_yhat

In [29]:
# prepare train and split data
data = df_global_active_power

# convert the data into supervised 
data_time_series = timeseries_to_supervised(data)

Unnamed: 0_level_0,Global_active_power_previous,Global_active_power
dt,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-12-16 17:25:00,4.216,5.36
2006-12-16 17:26:00,5.36,5.374
2006-12-16 17:27:00,5.374,5.388
2006-12-16 17:28:00,5.388,3.666
2006-12-16 17:29:00,3.666,3.52


In [38]:
# scale the data
scaler, scaled_data = scale(data_time_series)

# split the data into train and test data
n_train_time = 365*24
train = scaled_data[:n_train_time, :]
test = scaled_data[n_train_time:, :]
# split into input and output
train_X, train_y = train[:,:-1],train[:,-1]
test_X, test_y = test[:, :-1], test[:, -1]

# reshape into required format [samples, timestamp, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))

In [70]:
test_X.shape

(2040448, 1, 1)

In [73]:
# predication 
yhat = model.predict(test_X)
inv_yhat = inverse_scale(scaler, test_X, yhat)
inv_y =  inverse_scale(scaler, test_X, test_y)

rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

TypeError: only integer scalar arrays can be converted to a scalar index

## GAN :