In [1]:
# Importing relevant libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
# Adjusting the view
pd.options.display.max_columns = 30

In [3]:
#Reading in the data set
energy = pd.read_csv('energydata_complete.csv')
energy.sample(10)

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,RH_5,T6,RH_6,T7,RH_7,T8,RH_8,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
12310,2016-04-06 04:40:00,50,0,21.963333,41.0,18.79,46.5,24.2,39.23,20.79,40.7,20.7,51.09,7.19,62.326667,21.29,35.79,22.4175,39.8425,20.89,45.663333,6.366667,753.866667,95.0,4.0,21.333333,5.6,10.494831,10.494831
16550,2016-05-05 15:20:00,60,0,23.79,28.893333,24.426667,25.633333,23.245,33.045,22.79,30.89,21.0,40.59,20.033333,1.0,22.89,26.79,23.29,34.4,20.79,35.5,19.3,760.266667,29.666667,3.333333,40.0,1.033333,25.98105,25.98105
17031,2016-05-08 23:30:00,60,0,24.566667,38.4,23.7,36.863333,26.29,37.9,24.785,36.0475,23.6,39.26,15.69,7.0,24.236,38.17,24.5,42.356,23.2,41.296,17.0,751.65,59.0,2.5,40.0,8.85,24.124543,24.124543
10090,2016-03-21 18:40:00,560,30,21.356667,36.09,19.566667,37.163333,21.633333,35.03,21.39,36.126667,19.1,41.126667,9.36,30.133333,19.29,32.333333,21.26,37.1,19.29,36.0,8.333333,760.066667,71.666667,2.0,36.333333,3.466667,24.368483,24.368483
6430,2016-02-25 08:40:00,60,0,20.2,38.29,18.2,39.76,20.5,38.9,19.76,36.633333,18.0,50.38,0.633333,85.226667,19.1,34.465714,20.73,41.066667,18.066667,40.1,0.8,756.3,91.666667,1.0,57.666667,-0.433333,0.513427,0.513427
3238,2016-02-03 04:40:00,60,0,21.1,43.79,20.5,42.4,21.963333,45.9,20.633333,45.433333,19.5,62.545,3.863333,96.06,18.5,41.02,20.89,52.645,18.2,49.29,4.7,757.766667,89.666667,4.333333,32.666667,3.166667,47.178251,47.178251
13438,2016-04-14 00:40:00,60,0,21.633333,41.933333,18.76,45.79,23.666667,39.466667,21.7,39.2,20.6,47.09,3.845,52.5,20.89,35.856,23.0,45.5,20.39,45.9,3.966667,753.6,95.666667,0.333333,4.0,3.366667,34.688141,34.688141
5471,2016-02-18 16:50:00,80,0,20.0,33.9,18.39,34.826667,19.736,36.218,18.7,33.79,17.35,42.856,5.045,59.395,17.7,28.39,19.421429,35.647143,17.6,35.29,3.1,756.183333,79.5,2.166667,27.833333,-0.2,45.117356,45.117356
7810,2016-03-05 22:40:00,80,20,19.89,37.566667,17.6,39.826667,20.1,36.79,18.0,36.9,19.1,63.4,3.7,75.19,18.29,31.1,21.1,35.9,18.0,36.0,3.133333,744.366667,94.333333,3.666667,25.0,2.266667,43.106758,43.106758
2527,2016-01-29 06:10:00,40,0,18.23,43.03,17.29,43.79,19.133333,42.29,17.1,43.5,16.79,50.638889,3.1175,94.945,17.29,38.893889,17.39,45.09,16.39,43.045,4.35,764.116667,82.0,7.666667,40.0,1.55,44.8582,44.8582


## Understanding the relationship between in-house and outdoor temperature 

In [4]:
# Dropping irrelevant columns from our model

clean_energy = energy.drop(['date', 'lights'], axis=1).copy()

In [5]:
# Predicting the value of T6 from T2

t2 = clean_energy['T2'].values
t2 = t2.reshape(-1, 1)
t6 = clean_energy['T6'].values

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(t2, t6)
pred_t6 = reg.predict(t2)

In [6]:
# Calculating the coefficient of determination for our model

from sklearn.metrics import r2_score 
r2 = r2_score(t6, pred_t6)
round(r2, 2)

0.64

## Training the model

In [7]:
# Scaling the data set 
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_energy = pd.DataFrame(scaler.fit_transform(clean_energy), columns = clean_energy.columns)

In [8]:
x = scaled_energy.drop(['Appliances'], axis=1)
y = scaled_energy['Appliances']

In [9]:
# Training the model
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)
y_pred = regressor.predict(x_test)

In [10]:
# Finding the mean absolute error for our model

from sklearn.metrics import mean_absolute_error 
mae = mean_absolute_error(y_test, y_pred) 
round(mae, 2)

0.05

In [11]:
# Finding the Residual Sum of Squares

rss = np.sum(np.square(y_test - y_pred))
round(rss, 2)

45.35

In [12]:
# Finding the RMSE for the Linear Model

from sklearn.metrics import mean_squared_error 
rmse = np.sqrt(mean_squared_error(y_test, y_pred)) 
round(rmse, 3)

0.088

In [13]:
# Finding the coefficient of determination

from sklearn.metrics import r2_score 
energy_r2_score = r2_score(y_test, y_pred) 
round(energy_r2_score, 2)

0.15

In [14]:
def get_weights_df(model, feat, col_name):
    #this function returns the weight of every feature 
    weights = pd.Series(model.coef_, feat.columns).sort_values() 
    weights_df = pd.DataFrame(weights).reset_index() 
    weights_df.columns = ['Features', col_name] 
    weights_df[col_name].round(3) 
    return weights_df 

In [15]:
# Getting the weights of our linear model
linear_model_weights = get_weights_df(regressor, x_train, 'Linear_Model_Weight')
linear_model_weights.sort_values(by='Linear_Model_Weight')

Unnamed: 0,Features,Linear_Model_Weight
0,RH_2,-0.456698
1,T_out,-0.32186
2,T2,-0.236178
3,T9,-0.189941
4,RH_8,-0.157595
5,RH_out,-0.077671
6,RH_7,-0.044614
7,RH_9,-0.0398
8,T5,-0.015657
9,T1,-0.003281


In [16]:
# Training our Ridge Regression model

from sklearn.linear_model import Ridge 
ridge_reg = Ridge(alpha= 0.4) 
ridge_reg.fit(x_train, y_train)
predicted_y = ridge_reg.predict(x_test)

In [17]:
# Calculating the RMSE value for the Ridge Regression model

ridge_rmse = np.sqrt(mean_squared_error(y_test, predicted_y)) 
round(ridge_rmse, 3)

0.088

In [18]:
# Deriving the weights for our lasso regression model

from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha= 0.001 )
lasso_reg.fit(x_train, y_train)

linear_model_weights = get_weights_df(lasso_reg, x_train, 'Lasso_Weight')
linear_model_weights.sort_values(by='Lasso_Weight')

Unnamed: 0,Features,Lasso_Weight
0,RH_out,-0.049557
1,RH_8,-0.00011
23,rv2,-0.0
22,RH_6,-0.0
21,T2,0.0
20,RH_2,-0.0
19,T3,0.0
18,RH_3,0.0
17,T4,-0.0
16,RH_4,0.0


In [19]:
# Calculating the RMSE for our lasso model

lasso_pred_y = lasso_reg.predict(x_test)
lasso_rmse = np.sqrt(mean_squared_error(y_test, lasso_pred_y)) 
round(lasso_rmse, 3)

0.094