<h2>Importing Libraries</h2>

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge,Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

In [2]:
#Loading dataset
df = pd.read_csv('data/energydata_complete.csv')

In [3]:
df = df.drop(['date', 'lights'], axis=1)
scaler = MinMaxScaler()
n_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

<h1>Solutions</h1>

<h3>Question 12 - 16</h3>

In [21]:
#important functions

def predict(model,xtest):
    #this function returns the predictions of a fitted model
    predictions = model.predict(xtest)
    return predictions

def evaluate(ytest, predictions):
    #this returns an evaluation table
    mse = mean_squared_error(ytest, predictions).round(2)
    mae = mean_absolute_error(ytest,predictions).round(2)
    rss = np.sum(np.square(ytest - predictions)).round(2)
    r_score = r2_score(ytest, predictions).round(2)
    rmse = np.sqrt(mse).round(2)
    print(f"MAE is: {mae}")
    print(f"MSE is: {mse}")
    print(f"RSS is: {rss}")
    print(f"R2_SCORE is: {r_score}")
    print(f"RMSE is: {rmse}")
    metrics_df = pd.DataFrame({'MSE':mse, 'MAE':mae, 'RSS':rss, 'R2_SCORE':r_score, 'RMSE':rmse}, index = range(1)) 
    return metrics_df

def get_weights(model, feat, col_name):
    #this function returns the weight of every feature
    weights = pd.Series(model.coef_, feat.columns).sort_values()
    weights_df = pd.DataFrame(weights).reset_index()
    weights_df.columns = ['Features', col_name]
    weights_df[col_name].round(3)
    return weights_df

In [5]:
#defining the dependent and independent variable
X = df[['T2']]
Y = df[['T6']]

#splitting my dataset
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.3, random_state=42)

In [22]:
#Defining my model
Lr = LinearRegression()

#getting predictions/metrics
linear_model = Lr.fit(xtrain,ytrain)
linear_pred = predict(linear_model,xtest)
linear_metrics = evaluate(ytest, linear_pred)
linear_metrics

MAE is: 2.82
MSE is: 13.18
RSS is: T6   78033.970000
dtype: float64
R2_SCORE is: 0.64
RMSE is: 3.63


Unnamed: 0,MSE,MAE,RSS,R2_SCORE,RMSE
0,13.18,2.82,,0.64,3.63


In [8]:
#rounding my coefficient to two decimal places
linear_model.coef_.round(2)

array([[2.23]])

<h4>Question 17 - 20</h4>

In [10]:
#Defining my independent and dependent variable

X2 = df.drop('Appliances', axis=1)
Y2 = df['Appliances']

#splitting my dataset
xtrain2, xtest2, ytrain2, ytest2 = train_test_split(X2, Y2, test_size=0.3, random_state=42)

In [11]:
#Defining my model
linear_model2 = Lr.fit(xtrain2, ytrain2)

In [12]:
#getting my weights
linear_weights_df = get_weights(linear_model2, xtrain2, 'linear_weight')
linear_weights_df

Unnamed: 0,Features,linear_weight
0,T9,-21.148452
1,T2,-18.37003
2,RH_2,-13.740748
3,T_out,-11.073628
4,RH_8,-5.778861
5,RH_9,-1.76268
6,RH_7,-1.692787
7,T5,-1.600843
8,RH_out,-1.093521
9,T1,-0.370721


In [13]:
#getting my metrics
linear_pred2 = predict(linear_model2,xtest2)
linear_metrics2 = evaluate(ytest2, linear_pred2)
pd.set_option('float_format', '{:f}'.format)


MAE is: 53.64
MSE is: 8768.54
RSS is: 51918501.21
R2_SCORE is: 0.15
RMSE is: 93.64


In [14]:
#defining my ridge model, fitting it and getting my metrics
RR = Ridge(alpha=0.4)
Ridge_model = RR.fit(xtrain2,ytrain2)
Ridge_pred = predict(Ridge_model,xtest2)
Ridge_metrics = evaluate(ytest2, Ridge_pred)
ridge_weights_df = get_weights(Ridge_model, xtrain2, 'Ridge_weight')
ridge_weights_df

MAE is: 53.64
MSE is: 8768.54
RSS is: 51918548.95
R2_SCORE is: 0.15
RMSE is: 93.64


Unnamed: 0,Features,Ridge_weight
0,T9,-21.144194
1,T2,-18.365719
2,RH_2,-13.738775
3,T_out,-11.070412
4,RH_8,-5.778774
5,RH_9,-1.762822
6,RH_7,-1.692745
7,T5,-1.602047
8,RH_out,-1.093058
9,T1,-0.371657


In [16]:
#defining my lasso model, fitting it and getting my metrics
lasso = Lasso(alpha=0.001)
lasso_model = lasso.fit(xtrain2,ytrain2)
lasso_pred = predict(lasso_model,xtest2)
lasso_metrics = evaluate(ytest2, lasso_pred)
lasso_weights_df = get_weights(lasso_model, xtrain2, 'lasso_weight')
lasso_weights_df

MAE is: 53.64
MSE is: 8768.54
RSS is: 51918519.82
R2_SCORE is: 0.15
RMSE is: 93.64


  positive)


Unnamed: 0,Features,lasso_weight
0,T9,-21.138361
1,T2,-18.363178
2,RH_2,-13.737231
3,T_out,-11.059566
4,RH_8,-5.778977
5,RH_9,-1.762338
6,RH_7,-1.691936
7,T5,-1.601612
8,RH_out,-1.091186
9,T1,-0.366515


In [17]:
#finding the rmse of my lasso
mse = mean_squared_error(ytest2, lasso_pred).round(3)
rmse = np.sqrt(mse).round(3)
print(f"RMSE is: {rmse}")


RMSE is: 93.64
