In [121]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import date
import calendar
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.model_selection import train_test_split
import operator
from sklearn.ensemble import GradientBoostingRegressor  
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import LeaveOneLabelOut
from sklearn import cross_validation 
from math import sqrt

In [122]:
df=pd.read_csv('../Data/energydata_complete.csv',index_col='date',header=0)
df['datetime'] = df.index
df['datetime'] = pd.to_datetime(df['datetime'])
df['day_of_week']=df['datetime'].dt.strftime('%w').astype('int64')
df['month']=df['datetime'].dt.strftime('%m').astype('int64')
df['hour']=df['datetime'].dt.hour
df['day_number']=df['datetime'].dt.day
df['Week_no'] = df['datetime'].dt.strftime('%W').astype('int64')
df['min'] = df['datetime'].dt.minute
df['day_of_week']=df['day_of_week'].apply(str)
df['hour']=df['hour'].apply(str)
df['min']=df['min'].apply(str)
df['period']=df[['day_of_week','hour','min']].apply(lambda x:''.join(x),axis=1)


In [123]:
phase = df.groupby(['date','month','day_of_week','hour','day_number','min','datetime','period']).mean()
phase["Total"]=phase["Appliances"] + phase["lights"]
phase = phase.reset_index()
phase.set_index('date', inplace=True)
phase['datetime'] = df.index
phase['datetime'] = pd.to_datetime(phase['datetime'])
phase['day_of_week']=phase['day_of_week'].apply(int)
phase['hour']=phase['hour'].apply(int)
phase['min']=phase['min'].apply(int)
phase['period']=phase['period'].apply(int)
phase['Press_mm_hg'] = np.log(phase['Press_mm_hg'])
phase['Visibility'] = np.log(phase['Visibility'])

In [124]:
phase1 = phase
phase1.drop(['datetime','Total','rv1','rv2'],axis=1,inplace=True)
y = phase1["Appliances"]
X=phase1.drop("Appliances",axis=1)

# 1.1 Features Selected Using EDA

## 1.1.1 RMSE,MAPE,R2,MAE calculation

In [125]:
rmse_dict = {}    
def rmse(correct,estimated):
    rmse_val = np.sqrt(mean_squared_error(correct,estimated)) 
    return rmse_val

def calc_error_metric(name,modelname, model, X_train, y_train, X_test, y_test):
    
    y_train_predicted = model.predict(X_train)
    y_test_predicted = model.predict(X_test)
        
    #MAE, RMS, MAPE, R2
    
    r2_train = r2_score(y_train, y_train_predicted)
    r2_test = r2_score(y_test, y_test_predicted)
    
    rms_train = rmse(y_train, y_train_predicted)
    rms_test = rmse(y_test, y_test_predicted)
        
    mae_train = mean_absolute_error(y_train, y_train_predicted)
    mae_test = mean_absolute_error(y_test, y_test_predicted)
        
    mape_train = np.mean(np.abs((y_train - y_train_predicted) / y_train)) * 100
    mape_test = np.mean(np.abs((y_test - y_test_predicted) / y_test)) * 100
    
    rmse_dict[modelname] = rms_test
        
    df_local = pd.DataFrame({'Model':[name],
                            'ModelType':[modelname],
                            'r2_train': [r2_train],
                            'r2_test': [r2_test],
                            'rms_train':[rms_train], 
                            'rms_test': [rms_test],
                            'mae_train': [mae_train],
                            'mae_test': [mae_test],
                            'mape_train':[mape_train],
                            'mape_test':[mape_test]})

    error_metric = pd.concat([df_local])
 
    return error_metric

## 1.1.2 RandomForest,LinearRegression,NeuralNetwork 

In [126]:
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor

metric = pd.DataFrame({'Model':[],
                            'ModelType':[],
                            'r2_train': [],
                            'r2_test': [],
                            'rms_train':[], 
                            'rms_test': [],
                            'mae_train': [],
                            'mae_test': [],
                            'mape_train':[],
                            'mape_test':[]})


def models(name,X_train, y_train, X_test, y_test):
    global metric
    n = name

    # Linear Regression
    lm = linear_model.LinearRegression()
    modellinear = lm.fit(X_train,y_train)
    linear = calc_error_metric(n ,modellinear,lm, X_train, y_train, X_test, y_test)
    print("LINEAR")
    print(linear)
    
    # Random Forest
    rf = RandomForestRegressor(n_estimators =500, random_state = 1)
    modelrandomforest = rf.fit(X_train,y_train)
    randomforest = calc_error_metric(name,modelrandomforest,rf, X_train, y_train, X_test, y_test)

    # Neural Network
    nn = MLPRegressor(activation='relu',learning_rate='adaptive',alpha=0.55)
    modelneuralnetwork = nn.fit(X_train, y_train)
    neuralnetwork = calc_error_metric(name,modelneuralnetwork, nn, X_train, y_train, X_test, y_test)
    
    
    metric = pd.concat([metric,linear,randomforest,neuralnetwork])
    return metric

## 1.1.3 Training Testing split

#### 1.1.3.1  Selected Features from Exploratory Data Analysis

In [127]:
X = phase[['lights','hour','T1','RH_1','RH_5','RH_6','RH_8','T_out','Windspeed','Visibility','Tdewpoint']]
y = phase['Appliances']

In [128]:
# Test and Train Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [129]:
teston = models('EDA',X_train, y_train, X_test, y_test)

LINEAR
  Model                                          ModelType   mae_test  \
0   EDA  LinearRegression(copy_X=True, fit_intercept=Tr...  54.738137   

   mae_train  mape_test  mape_train   r2_test  r2_train   rms_test  rms_train  
0  54.655463  64.403084   62.843287  0.108627  0.109353  95.830482   97.16432  


In [130]:
teston

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,EDA,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,EDA,"(DecisionTreeRegressor(criterion='mse', max_de...",33.89672,12.718226,34.569303,12.628072,0.529036,0.933345,69.657464,26.580979
0,EDA,"MLPRegressor(activation='relu', alpha=0.55, ba...",55.15015,54.747871,66.840664,64.481153,0.166364,0.173413,92.674915,93.604872


#### 1.1.3.2 Selecting Features After Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
teston = models('After Scaling',X_train, y_train, X_test, y_test)

LINEAR
           Model                                          ModelType  \
0  After Scaling  LinearRegression(copy_X=True, fit_intercept=Tr...   

    mae_test  mae_train  mape_test  mape_train   r2_test  r2_train   rms_test  \
0  54.738137  54.655463  64.403084   62.843287  0.108627  0.109353  95.830482   

   rms_train  
0   97.16432  




In [14]:
teston

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,EDA,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,EDA,"(DecisionTreeRegressor(criterion='mse', max_de...",33.89672,12.718226,34.569303,12.628072,0.529036,0.933345,69.657464,26.580979
0,EDA,"MLPRegressor(activation='relu', alpha=0.55, ba...",53.791953,53.608378,63.918038,62.004389,0.154167,0.15705,93.350404,94.526821
0,After Scaling,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,After Scaling,"(DecisionTreeRegressor(criterion='mse', max_de...",33.91264,12.717627,34.588777,12.635847,0.528682,0.933351,69.683667,26.579837
0,After Scaling,"MLPRegressor(activation='relu', alpha=0.55, ba...",51.867077,51.153189,59.559731,57.088052,0.191979,0.21079,91.239981,91.464063


## 1.1.4 Best Model

In [15]:
best_model =  min(rmse_dict.items(),key=operator.itemgetter(1))[0]
print('Best Model is ', best_model)

Best Model is  RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=1,
           oob_score=False, random_state=1, verbose=0, warm_start=False)


# Feature Selection

# 1.2 Using Boruto Python Package

In [7]:
X_boruta = phase1
X_boruta = X_boruta.drop(['Appliances'],axis=1)
X_boruta = X_boruta.values

In [8]:
y_boruta = phase1
y_boruta = y_boruta['Appliances']
y_boruta = y.values

In [None]:
import pandas as pd
#from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy

# NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
y_boruta = y_boruta.ravel()

# define random forest classifier, with utilising all cores and
# sampling in proportion to y labels
rf1 = RandomForestRegressor(n_jobs=-1, max_depth=5)

# define Boruta feature selection method
feat_selector = BorutaPy(rf1, n_estimators='auto', verbose=5, random_state=1)

# find all relevant features - 5 features should be selected
feat_selector.fit(X_boruta,y_boruta)

# check selected features - first 5 features are selected
feat_selector.support_

# check ranking of features
feat_selector.ranking_

# call transform() on X to filter it down to selected features
#X_filtered = feat_selector.transform(X)

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	32
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	11
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	9 / 100
Confirmed: 	11
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	10 / 100
Confirmed: 	11
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	11 / 100
Confirmed: 	11
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	12 / 100
Confirmed: 	12
Tentative: 	9
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	13 / 100
Confirmed: 	12
Tentative: 	9
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	14 / 100
Confirmed: 	12
Tentative: 	9
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	15 / 100
Confirmed: 	12
Tentative: 	9
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	16 / 100
Confirmed: 	13
Tentative: 	8
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	17 / 100
Confirmed: 	13
Tentative: 	8
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	18 / 100
Confirmed: 	13
Tentative: 	8
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	19 / 100
Confirmed: 	13
Tentative: 	7
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	20 / 100
Confirmed: 	13
Tentative: 	7
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	21 / 100
Confirmed: 	13
Tentative: 	7
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	22 / 100
Confirmed: 	13
Tentative: 	7
Rejected: 	12


In [None]:
print(feat_selector.ranking_)

In [None]:
phase.dtypes

In [None]:
5->lights
4->T1
1->RH_2
8->RH_3
2->RH_4
5->T5
7->Press_mm_hg
3->Visibility

### 1.2.1 Feature Selected 

#### 12.1.1 Training Testing Spliting

In [17]:
X = phase1[['lights','T1','RH_2','RH_3','RH_4','T5','Press_mm_hg','Visibility']]
y = phase1['Appliances']

In [18]:
# Test and Train Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [19]:
teston = models('Boruta',X_train, y_train, X_test, y_test)

LINEAR
    Model                                          ModelType  mae_test  \
0  Boruta  LinearRegression(copy_X=True, fit_intercept=Tr...  57.71063   

   mae_train  mape_test  mape_train   r2_test  r2_train   rms_test   rms_train  
0  57.757545  70.214249    68.73021  0.058925  0.054655  98.465907  100.103478  


In [20]:
teston

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,EDA,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,EDA,"(DecisionTreeRegressor(criterion='mse', max_de...",33.89672,12.718226,34.569303,12.628072,0.529036,0.933345,69.657464,26.580979
0,EDA,"MLPRegressor(activation='relu', alpha=0.55, ba...",53.791953,53.608378,63.918038,62.004389,0.154167,0.15705,93.350404,94.526821
0,After Scaling,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,After Scaling,"(DecisionTreeRegressor(criterion='mse', max_de...",33.91264,12.717627,34.588777,12.635847,0.528682,0.933351,69.683667,26.579837
0,After Scaling,"MLPRegressor(activation='relu', alpha=0.55, ba...",51.867077,51.153189,59.559731,57.088052,0.191979,0.21079,91.239981,91.464063
0,Boruta,"LinearRegression(copy_X=True, fit_intercept=Tr...",57.71063,57.757545,70.214249,68.73021,0.058925,0.054655,98.465907,100.103478
0,Boruta,"(DecisionTreeRegressor(criterion='mse', max_de...",34.833664,13.288502,35.772812,13.517528,0.497772,0.927404,71.932358,27.740307
0,Boruta,"MLPRegressor(activation='relu', alpha=0.55, ba...",58.130616,58.178617,71.304127,69.786942,0.064542,0.060126,98.171656,99.813398


In [21]:
#### After scaling boruta

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

teston = models('After Scaling_boruta',X_train, y_train, X_test, y_test)


LINEAR
                  Model                                          ModelType  \
0  After Scaling_boruta  LinearRegression(copy_X=True, fit_intercept=Tr...   

   mae_test  mae_train  mape_test  mape_train   r2_test  r2_train   rms_test  \
0  57.71063  57.757545  70.214249    68.73021  0.058925  0.054655  98.465907   

    rms_train  
0  100.103478  




In [22]:
teston

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,EDA,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,EDA,"(DecisionTreeRegressor(criterion='mse', max_de...",33.89672,12.718226,34.569303,12.628072,0.529036,0.933345,69.657464,26.580979
0,EDA,"MLPRegressor(activation='relu', alpha=0.55, ba...",53.791953,53.608378,63.918038,62.004389,0.154167,0.15705,93.350404,94.526821
0,After Scaling,"LinearRegression(copy_X=True, fit_intercept=Tr...",54.738137,54.655463,64.403084,62.843287,0.108627,0.109353,95.830482,97.16432
0,After Scaling,"(DecisionTreeRegressor(criterion='mse', max_de...",33.91264,12.717627,34.588777,12.635847,0.528682,0.933351,69.683667,26.579837
0,After Scaling,"MLPRegressor(activation='relu', alpha=0.55, ba...",51.867077,51.153189,59.559731,57.088052,0.191979,0.21079,91.239981,91.464063
0,Boruta,"LinearRegression(copy_X=True, fit_intercept=Tr...",57.71063,57.757545,70.214249,68.73021,0.058925,0.054655,98.465907,100.103478
0,Boruta,"(DecisionTreeRegressor(criterion='mse', max_de...",34.833664,13.288502,35.772812,13.517528,0.497772,0.927404,71.932358,27.740307
0,Boruta,"MLPRegressor(activation='relu', alpha=0.55, ba...",58.130616,58.178617,71.304127,69.786942,0.064542,0.060126,98.171656,99.813398
0,After Scaling_boruta,"LinearRegression(copy_X=True, fit_intercept=Tr...",57.71063,57.757545,70.214249,68.73021,0.058925,0.054655,98.465907,100.103478


## 1.3 Using featuretools
### It is a framework to perform automated feature engineering. It excels at transforming transactional and relational datasets into feature matrices for machine learning.

In [131]:
import featuretools as ft

In [132]:
phase_featuretools = df.groupby(['date','month','day_of_week','hour','day_number','min','period']).mean()
phase_featuretools = phase_featuretools.reset_index()
phase_featuretools['day_of_week']=phase_featuretools['day_of_week'].apply(int)
phase_featuretools['hour']=phase_featuretools['hour'].apply(int)
phase_featuretools['min']=phase_featuretools['min'].apply(int)
phase_featuretools['period']=phase_featuretools['period'].apply(int)
phase_featuretools['date'] = pd.to_datetime(phase_featuretools['date'])
phase_featuretools['Press_mm_hg'] = np.log(phase_featuretools['Press_mm_hg'])
phase_featuretools['Visibility'] = np.log(phase_featuretools['Visibility'])
phase_featuretools = phase_featuretools.drop(['rv1','rv2'],axis=1)

In [133]:
y_featuretools = phase_featuretools[['date','Appliances']]
X_featuretools = phase_featuretools.drop(['Appliances'],axis=1)
entities ={"appliances" :(y_featuretools,"date"),
          "rest" :(X_featuretools,"date")}
relationships = [("appliances","date","rest","date")]

In [135]:
feature_matrix_app,features_defs = ft.dfs(entities=entities,relationships=relationships,target_entity="appliances")

In [136]:
feature_matrix_app1 = feature_matrix_app

In [137]:
feature_matrix_app1.dtypes

Appliances                  int64
SUM(rest.month)             int64
SUM(rest.day_of_week)       int64
SUM(rest.hour)              int64
SUM(rest.day_number)        int64
SUM(rest.min)               int64
SUM(rest.period)            int64
SUM(rest.lights)            int64
SUM(rest.T1)              float64
SUM(rest.RH_1)            float64
SUM(rest.T2)              float64
SUM(rest.RH_2)            float64
SUM(rest.T3)              float64
SUM(rest.RH_3)            float64
SUM(rest.T4)              float64
SUM(rest.RH_4)            float64
SUM(rest.T5)              float64
SUM(rest.RH_5)            float64
SUM(rest.T6)              float64
SUM(rest.RH_6)            float64
SUM(rest.T7)              float64
SUM(rest.RH_7)            float64
SUM(rest.T8)              float64
SUM(rest.RH_8)            float64
SUM(rest.T9)              float64
SUM(rest.RH_9)            float64
SUM(rest.T_out)           float64
SUM(rest.Press_mm_hg)     float64
SUM(rest.RH_out)          float64
SUM(rest.Winds

#### 1.3.1 Test Train Split

In [None]:
y_featuretools = y_featuretools.drop('date',axis=1)
y_featuretools = y_featuretools.values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(feature_matrix_app1, y_featuretools, test_size=0.30, random_state=42)

In [None]:
teston = models('featuretools',X_train, y_train, X_test, y_test)

In [None]:
teston

#### 1.3.2 After scaling featuretools

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
teston = models('After Scaling_featuretools',X_train, y_train, X_test, y_test)

In [None]:
teston

## 1.4 Using tsfresh

In [10]:
phase_tsfresh = phase_featuretools

In [11]:
from tsfresh import extract_features

  from pandas.core import datetools


In [12]:
X_tsfresh = phase_tsfresh.drop('Appliances',axis=1)
y_tsfresh = phase_tsfresh['Appliances']

In [13]:
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute




In [14]:
p = phase_featuretools.drop('Appliances',axis=1)

In [15]:
# For Extracing Minimal Features
from tsfresh.feature_extraction import MinimalFCParameters
extracted_features = extract_features(p, column_id="date", column_sort="period",show_warnings=False, default_fc_parameters=MinimalFCParameters())

Feature Extraction: 100%|██████████| 20/20 [00:56<00:00,  2.84s/it]


In [16]:
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
# for extracting relevant features
impute(extracted_features)
features_filtered = select_features(extracted_features, y)





#### 1.4.1 Test and Train Split

In [17]:
X_train, X_test, y_train, y_test = train_test_split(features_filtered, y, test_size=0.30, random_state=42)

In [18]:
tsfresh = models('tsfresh',X_train, y_train, X_test, y_test)

LINEAR
     Model                                          ModelType  mae_test  \
0  tsfresh  LinearRegression(copy_X=True, fit_intercept=Tr...  53.02121   

   mae_train  mape_test  mape_train   r2_test  r2_train   rms_test  rms_train  
0  52.843056  62.392016   60.635439  0.168859  0.171669  92.536131  93.703534  


In [19]:
tsfresh 

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,tsfresh,"LinearRegression(copy_X=True, fit_intercept=Tr...",53.02121,52.843056,62.392016,60.635439,0.168859,0.171669,92.536131,93.703534
0,tsfresh,"(DecisionTreeRegressor(criterion='mse', max_de...",32.592461,12.334478,32.352695,12.024886,0.546053,0.934848,68.387492,26.279544
0,tsfresh,"MLPRegressor(activation='relu', alpha=0.55, ba...",48.811336,48.148924,51.915857,49.46015,0.1812,0.193453,91.846514,92.4632


#### 1.4.2 After scaling tsfresh

In [20]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [21]:
teston = models('After Scaling_tsfresh',X_train, y_train, X_test, y_test)

LINEAR
                   Model                                          ModelType  \
0  After Scaling_tsfresh  LinearRegression(copy_X=True, fit_intercept=Tr...   

    mae_test  mae_train  mape_test  mape_train   r2_test  r2_train   rms_test  \
0  52.968279  52.847314  62.388528   60.725148  0.169117  0.171448  92.521727   

   rms_train  
0  93.716054  


In [22]:
teston

Unnamed: 0,Model,ModelType,mae_test,mae_train,mape_test,mape_train,r2_test,r2_train,rms_test,rms_train
0,tsfresh,"LinearRegression(copy_X=True, fit_intercept=Tr...",53.02121,52.843056,62.392016,60.635439,0.168859,0.171669,92.536131,93.703534
0,tsfresh,"(DecisionTreeRegressor(criterion='mse', max_de...",32.592461,12.334478,32.352695,12.024886,0.546053,0.934848,68.387492,26.279544
0,tsfresh,"MLPRegressor(activation='relu', alpha=0.55, ba...",48.811336,48.148924,51.915857,49.46015,0.1812,0.193453,91.846514,92.4632
0,After Scaling_tsfresh,"LinearRegression(copy_X=True, fit_intercept=Tr...",52.968279,52.847314,62.388528,60.725148,0.169117,0.171448,92.521727,93.716054
0,After Scaling_tsfresh,"(DecisionTreeRegressor(criterion='mse', max_de...",32.630809,12.328393,32.378313,12.030228,0.545768,0.93494,68.408937,26.261072
0,After Scaling_tsfresh,"MLPRegressor(activation='relu', alpha=0.55, ba...",48.645106,47.000525,55.551459,52.965478,0.30107,0.361856,84.857588,82.245725


## 1.5 tpot 

### 1.5.1 training testing spliting

In [8]:
X_tpot = phase.drop(['Appliances'],axis=1)
y_tpot = phase['Appliances']

In [9]:
# Test and Train Split
X_train, X_test, y_train, y_test = train_test_split(X_tpot, y_tpot, test_size=0.3, random_state=42)

### 1.5.2 pipeline

In [10]:
from tpot import TPOTRegressor
pipeline_optimizer = TPOTRegressor()

  return f(*args, **kwds)


In [12]:
pipeline_optimizer = TPOTRegressor(generations=10, population_size=20, cv = 3,
                                    random_state=42, verbosity=2)

In [13]:
pipeline_optimizer.fit(X_train,y_train)

Optimization Progress:  18%|█▊        | 40/220 [04:58<15:25,  5.14s/pipeline] 

Generation 1 - Current best internal CV score: -5462.105355916628


Optimization Progress:  27%|██▋       | 60/220 [16:49<31:25, 11.78s/pipeline]  

Generation 2 - Current best internal CV score: -5462.105355916628


Optimization Progress:  38%|███▊      | 84/220 [40:09<47:38, 21.02s/pipeline]   

Generation 3 - Current best internal CV score: -5462.105355916628


Optimization Progress:  47%|████▋     | 104/220 [44:18<24:14, 12.54s/pipeline] 

Generation 4 - Current best internal CV score: -5462.105355916628


Optimization Progress:  56%|█████▋    | 124/220 [49:54<33:31, 20.95s/pipeline]

Generation 5 - Current best internal CV score: -5462.105355916628


Optimization Progress:  65%|██████▌   | 144/220 [56:14<27:25, 21.65s/pipeline]  

Generation 6 - Current best internal CV score: -5447.387656797821


Optimization Progress:  75%|███████▍  | 164/220 [1:02:53<18:57, 20.31s/pipeline]

Generation 7 - Current best internal CV score: -5447.313903685393


Optimization Progress:  84%|████████▎ | 184/220 [1:08:37<12:10, 20.30s/pipeline]

Generation 8 - Current best internal CV score: -5447.313903685393


Optimization Progress:  93%|█████████▎| 205/220 [1:19:28<19:18, 77.26s/pipeline] 

Generation 9 - Current best internal CV score: -5330.967706336275


                                                                                

Generation 10 - Current best internal CV score: -5330.967706336275

Best pipeline: ElasticNetCV(RandomForestRegressor(input_matrix, bootstrap=False, max_features=0.15, min_samples_leaf=1, min_samples_split=2, n_estimators=100), l1_ratio=0.95, tol=0.001)


TPOTRegressor(config_dict={'sklearn.linear_model.ElasticNetCV': {'l1_ratio': array([ 0.  ,  0.05,  0.1 ,  0.15,  0.2 ,  0.25,  0.3 ,  0.35,  0.4 ,
        0.45,  0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,  0.85,
        0.9 ,  0.95,  1.  ]), 'tol': [1e-05, 0.0001, 0.001, 0.01, 0.1]}, 'sklearn.ensemble.ExtraT....45,
        0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,  0.85,  0.9 ,
        0.95,  1.  ])}}}},
       crossover_rate=0.1, cv=3, disable_update_check=False,
       early_stop=None, generations=10, max_eval_time_mins=5,
       max_time_mins=None, memory=None, mutation_rate=0.9, n_jobs=1,
       offspring_size=20, periodic_checkpoint_folder=None,
       population_size=20, random_state=42, scoring=None, subsample=1.0,
       verbosity=2, warm_start=False)

In [14]:
print(pipeline_optimizer.score(X_test, y_test))

-4178.76640353
