### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

import warnings
warnings.filterwarnings('ignore')

### Reading the dataset 

In [2]:
df=pd.read_csv(r"electricity.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Target
0,641925,6/3/2008,12:09:00,1.322,0.058,239.17,5.4,0.0
1,495667,25/11/2007,22:31:00,0.294,0.064,244.26,1.2,0.0
2,1302460,8/6/2009,05:04:00,0.336,0.094,239.77,1.6,1.0
3,749968,20/5/2008,12:52:00,1.548,0.19,239.85,6.4,23.0
4,482440,16/11/2007,18:04:00,2.838,0.06,238.33,11.8,18.0


In [3]:
df.drop(['Date', 'Time','Unnamed: 0'], axis=1,inplace=True) #Dropping the unnecessay columns for model building

In [4]:
df.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Target
0,1.322,0.058,239.17,5.4,0.0
1,0.294,0.064,244.26,1.2,0.0
2,0.336,0.094,239.77,1.6,1.0
3,1.548,0.19,239.85,6.4,23.0
4,2.838,0.06,238.33,11.8,18.0


### Creating Feature and Label columns 

In [5]:
X=df.drop('Target',axis=1)

In [6]:
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity
0,1.322,0.058,239.17,5.4
1,0.294,0.064,244.26,1.2
2,0.336,0.094,239.77,1.6
3,1.548,0.19,239.85,6.4
4,2.838,0.06,238.33,11.8


In [7]:
Y=df['Target']

In [8]:
#Train and test split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.20,random_state=40)

### Standarad Scaler 

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
scaler=StandardScaler()
scaler

StandardScaler()

In [11]:
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

### Linear Regression  

In [12]:
from sklearn.linear_model import LinearRegression
Regression=LinearRegression()#Creating a Linear Regression model

In [13]:
Regression

LinearRegression()

In [14]:
Regression.fit(X_train,Y_train)#giving the training data inside the regression model

LinearRegression()

In [15]:
reg_pred=Regression.predict(X_test)#Predicting the label values by giving the feature

In [16]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print("MSE =",mean_squared_error(Y_test,reg_pred))
print("MAE =",mean_absolute_error(Y_test,reg_pred))
print(np.sqrt(mean_squared_error(Y_test,reg_pred)))

MSE = 46.022316575491566
MAE = 4.260624862969165
6.783974983406967


In [17]:
from sklearn.metrics import r2_score
score=r2_score(Y_test,reg_pred)
print("R2 value",score)

R2 value 0.7042951669479176


In [18]:
print("Adjusted R2 value",1-(1-score)*(len(Y_test)-1)/(len(Y_test)-X_test.shape[1]-1))

Adjusted R2 value 0.7039002356182988


#### a)Ridge Regression 

In [19]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
ridge_regressor=Ridge()
parameters={'alpha':[1,1,2,4,5,10,20,30,40,50,60,70,80,90]}
ridgecv=GridSearchCV(ridge_regressor,parameters,cv=5)
ridgecv.fit(X_train,Y_train)

GridSearchCV(cv=5, estimator=Ridge(),
             param_grid={'alpha': [1, 1, 2, 4, 5, 10, 20, 30, 40, 50, 60, 70,
                                   80, 90]})

In [20]:
print(ridgecv.best_params_)

{'alpha': 1}


In [21]:
print(ridgecv.best_score_)

0.7149959682184683


In [22]:
ridge_pred=ridgecv.predict(X_test)
ridge_pred

array([ 0.76123494,  7.13595829, 11.51628736, ...,  2.13574326,
       16.84524981, 12.46157096])

In [23]:
#Performance matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print(mean_squared_error(Y_test,ridge_pred))
print(mean_absolute_error(Y_test,ridge_pred))
print(np.sqrt(mean_squared_error(Y_test,ridge_pred)))

46.04429175175405
4.262759035755337
6.785594428769969


In [24]:
from sklearn.metrics import r2_score
ridge_score=r2_score(Y_test,ridge_pred)
print(ridge_score)

0.7041539709736271


In [25]:
1-(1-ridge_score)*(len(Y_test)-1)/(len(Y_test)-X_test.shape[1]-1)

0.7037588510684165

#### b) Lasso Regression 

In [26]:
from sklearn.linear_model import Lasso
lasso=Lasso()
parameters={'alpha':[1,2,5,10,20,30,40,50,60,70,80,90]}
lassocv=GridSearchCV(lasso,parameters,scoring='neg_mean_squared_error')
lassocv.fit(X_train,Y_train)

GridSearchCV(estimator=Lasso(),
             param_grid={'alpha': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80,
                                   90]},
             scoring='neg_mean_squared_error')

In [27]:
print(lassocv.best_score_)

-46.405076337711705


In [28]:
lasso_pred=lassocv.predict(X_test)
lasso_pred

array([ 0.99460388,  7.70320159, 10.82648813, ...,  3.13839819,
       15.33584857, 11.36243671])

In [29]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print(mean_squared_error(Y_test,lasso_pred))
print(mean_absolute_error(Y_test,lasso_pred))
print(np.sqrt(mean_squared_error(Y_test,lasso_pred)))

47.14657858822157
4.456497092054926
6.866336620660363


In [30]:
from sklearn.metrics import r2_score
Lasso_score=r2_score(Y_test,lasso_pred)
print(Lasso_score)

0.6970715038314425


In [31]:
1-(1-Lasso_score)*(len(Y_test)-1)/(len(Y_test)-X_test.shape[1]-1)

0.696666924871618

#### c)Elastic Net Regression 

In [32]:
from sklearn.linear_model import ElasticNet
elastic_net_reg=ElasticNet()
elastic_net_reg

ElasticNet()

In [33]:
elastic_net_reg.fit(X_train,Y_train)
parameters={'alpha':[1,2,5,10,20,30,40,50,60,70,80,90]}
elastic_netcv=GridSearchCV(elastic_net_reg,parameters,scoring='neg_mean_squared_error',cv=5)
elastic_netcv.fit(X_train,Y_train)

GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80,
                                   90]},
             scoring='neg_mean_squared_error')

In [34]:
print(elastic_netcv.best_score_)

-52.044941626209116


In [35]:
elastic_net_pred=elastic_netcv.predict(X_test)

In [36]:
#Evaluation of metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print(mean_squared_error(Y_test,elastic_net_pred))
print(mean_absolute_error(Y_test,elastic_net_pred))
print(np.sqrt(mean_squared_error(Y_test,elastic_net_pred)))

52.196714237795355
5.029875850031242
7.224729353947825


In [37]:
#R2 square value
from sklearn.metrics import r2_score
elastic_net_score=r2_score(Y_test,elastic_net_pred)
print(elastic_net_score)

0.6646231259515936


In [38]:
#Adjusted R2 value
1-(1-elastic_net_score)*(len(Y_test)-1)/(len(Y_test)-X_test.shape[1]-1)

0.6641752102600431

### Scaling the data 

In [39]:
X_train_tf=scaler.transform(X_train)
X_test_tf=scaler.transform(X_test)

### Support vector Regression model 

In [40]:
classifier=SVR(kernel='linear')
classifier.fit(X_train_tf,Y_train)

SVR(kernel='linear')

In [41]:
classifier.score(X_train_tf,Y_train)

0.7012860490230135

In [42]:
#parameters=[{'C':[1,10,20],'kernel':['polynomial',]},{'C':[1,10,20],'kernel':['rbf'],'gamma':[0.6,0.7,0.8,0.9,0.10]}]
#grid_search=GridSearchCV(estimator=classifier,param_grid=parameters,scoring='accuracy',cv=5,n_jobs=-1)
#grid_search=grid_search.fit(X_train_tf,Y_train)

In [43]:
#grid_search.best_params_

### Saving the model 

In [44]:
import pickle
filename = 'standard_sacler.pkl'
pickle.dump(scaler, open(filename, 'wb'))

In [45]:
import pickle
filename = 'SVR.pkl'
pickle.dump(classifier, open(filename, 'wb'))

In [46]:
import pickle
filename = 'Linear_Regression.pkl'
pickle.dump(Regression, open(filename, 'wb'))

In [47]:
import pickle
filename = 'Ridge.pkl'
pickle.dump(ridgecv, open(filename, 'wb'))

In [48]:
import pickle
filename = 'Lasso.pkl'
pickle.dump(lassocv, open(filename, 'wb'))

In [49]:
import pickle
filename = 'elasticnet.pkl'
pickle.dump(elastic_netcv, open(filename, 'wb'))