# Importing important libraries necessary for the analysis

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Importing the dataset 

In [50]:
dataset = pd.read_csv('quiz dataset.csv')

# Removing the date and lights columns

In [31]:
dataset = dataset.drop(columns=['date', 'lights'])

Unnamed: 0,Appliances,T1,RH_1,T2,RH_2,T3,RH_3,T4,RH_4,T5,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,60,19.890000,47.596667,19.200000,44.790000,19.790000,44.730000,19.000000,45.566667,17.166667,...,17.033333,45.5300,6.600000,733.5,92.000000,7.000000,63.000000,5.300000,13.275433,13.275433
1,60,19.890000,46.693333,19.200000,44.722500,19.790000,44.790000,19.000000,45.992500,17.166667,...,17.066667,45.5600,6.483333,733.6,92.000000,6.666667,59.166667,5.200000,18.606195,18.606195
2,50,19.890000,46.300000,19.200000,44.626667,19.790000,44.933333,18.926667,45.890000,17.166667,...,17.000000,45.5000,6.366667,733.7,92.000000,6.333333,55.333333,5.100000,28.642668,28.642668
3,50,19.890000,46.066667,19.200000,44.590000,19.790000,45.000000,18.890000,45.723333,17.166667,...,17.000000,45.4000,6.250000,733.8,92.000000,6.000000,51.500000,5.000000,45.410389,45.410389
4,60,19.890000,46.333333,19.200000,44.530000,19.790000,45.000000,18.890000,45.530000,17.200000,...,17.000000,45.4000,6.133333,733.9,92.000000,5.666667,47.666667,4.900000,10.084097,10.084097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,100,25.566667,46.560000,25.890000,42.025714,27.200000,41.163333,24.700000,45.590000,23.200000,...,23.200000,46.7900,22.733333,755.2,55.666667,3.333333,23.666667,13.333333,43.096812,43.096812
19731,90,25.500000,46.500000,25.754000,42.080000,27.133333,41.223333,24.700000,45.590000,23.230000,...,23.200000,46.7900,22.600000,755.2,56.000000,3.500000,24.500000,13.300000,49.282940,49.282940
19732,270,25.500000,46.596667,25.628571,42.768571,27.050000,41.690000,24.700000,45.730000,23.230000,...,23.200000,46.7900,22.466667,755.2,56.333333,3.666667,25.333333,13.266667,29.199117,29.199117
19733,420,25.500000,46.990000,25.414000,43.036000,26.890000,41.290000,24.700000,45.790000,23.200000,...,23.200000,46.8175,22.333333,755.2,56.666667,3.833333,26.166667,13.233333,6.322784,6.322784


# Scaling the dataset due to variation in units

In [35]:
#importing the minmaxscaler class
from sklearn.preprocessing import MinMaxScaler

#creating a scaler object
scaler = MinMaxScaler()

#fitting the dataset to the scaler object
normalized_df = pd.DataFrame(scaler.fit_transform(dataset), columns=dataset.columns)

# Fitting a linear model on the relationship between the temperature in the living room in Celsius(x= T2) and the temperature outside the building(y= T6)

In [36]:
#defining the independent variable
features1 = normalized_df['T2']

features = np.array(features1).reshape(-1,1)


#defining the target/dependent variable
appliances_target = normalized_df['T6']

#splitting training and testing set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features, appliances_target, test_size=0.3, random_state=42)

#fitting MLR to training set
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(x_train,y_train)

#predicting test set results
y_pred = linear_model.predict(x_test)

In [10]:
print(linear_model.coef_)

[2.22531769]


# Obtaining the R-Squared value of the fit Linear Model

In [37]:
from sklearn.metrics import r2_score
r2_score = r2_score(y_test, y_pred)
round(r2_score, 2)

0.64

# Obtaining the Mean Absolute Error value of the fit Linear Model

In [38]:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_pred)
#round(mae, 3)
mae
#Rounding off a number
round(mae, 2)

0.08

# Obtaining the Residual Sum of Squares value of the fit Linear Model

In [39]:
import numpy as np
rss = np.sum(np.square(y_test - y_pred))
round(rss, 2)

66.12

# Obtaining the Root Mean Squared Error value of the fit Linear Model

In [43]:
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
round(rmse, 3)

0.106

# Obtaining the Coefficient of Determination of the fit Linear Model

In [41]:
print(linear_model.coef_)

[0.8910771]


# Preparing the dataset

In [45]:
#defining the independent variable
features2 = normalized_df.drop(columns=['Appliances'])

#defining the target/dependent variable
appliances_target2 = normalized_df['Appliances']

#splitting training and testing set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features2, appliances_target2, test_size=0.3, random_state=42)

#fitting MLR to training set
from sklearn.linear_model import LinearRegression
linear_model = LinearRegression()
linear_model.fit(x_train,y_train)

#predicting test set results
y_pred = linear_model.predict(x_test)

# Obtaining the lowest and highest weights

In [46]:
def get_weights_df(model, feat, col_name):
  #this function returns the weight of every feature
  weights = pd.Series(model.coef_, feat.columns).sort_values()
  weights_df = pd.DataFrame(weights).reset_index()
  weights_df.columns = ['Features', col_name]
  weights_df[col_name].round(3)
  return weights_df

linear_model_weights = get_weights_df(linear_model, x_train, 'Linear_Model_Weight')
linear_model_weights


Unnamed: 0,Features,Linear_Model_Weight
0,RH_2,-0.456698
1,T_out,-0.32186
2,T2,-0.236178
3,T9,-0.189941
4,RH_8,-0.157595
5,RH_out,-0.077671
6,RH_7,-0.044614
7,RH_9,-0.0398
8,T5,-0.015657
9,T1,-0.003281


# Training  ridge regression model 

In [47]:
from sklearn.linear_model import Ridge
ridge_reg = Ridge(alpha=0.4)
ridge_reg.fit(x_train, y_train)

y_pred1 = ridge_reg.predict(x_test)
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, y_pred1))
round(rmse, 3)

0.088

# Training a Lasso regression model

In [48]:
from sklearn.linear_model import Lasso
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(x_train, y_train)

y_pred2 = lasso_reg.predict(x_test)
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, y_pred2))
round(rmse, 3)

0.094

# Finding out the non-zero feature weights

In [49]:
lasso_weights_df = get_weights_df(lasso_reg, x_train, 'Lasso_weight')
lasso_weights_df

Unnamed: 0,Features,Lasso_weight
0,RH_out,-0.049557
1,RH_8,-0.00011
2,T1,0.0
3,Tdewpoint,0.0
4,Visibility,0.0
5,Press_mm_hg,-0.0
6,T_out,0.0
7,RH_9,-0.0
8,T9,-0.0
9,T8,0.0
