In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import FunctionTransformer
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
dataset=pd.read_csv('outlier_handled')
dataset.drop(['Unnamed: 0','Date','Time'],axis=1,inplace=True)

In [3]:
dataset.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,year,month,day,Total_power_consumed
0,0.332,0.074,241.44,1.4,2010,2,19,1.0
1,0.624,0.128,242.91,2.6,2008,9,20,0.0
2,0.214,0.0,240.92,0.8,2007,2,28,0.0
3,0.876,0.238,246.75,3.6,2010,10,27,1.0
4,0.638,0.198,244.19,3.4,2007,7,14,3.0


In [4]:
## Dependent and independent features

X=dataset.iloc[:,:4]
y=dataset.loc[:,'Total_power_consumed']

In [5]:
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity
0,0.332,0.074,241.44,1.4
1,0.624,0.128,242.91,2.6
2,0.214,0.0,240.92,0.8
3,0.876,0.238,246.75,3.6
4,0.638,0.198,244.19,3.4


In [6]:
y.head()

0    1.0
1    0.0
2    0.0
3    1.0
4    3.0
Name: Total_power_consumed, dtype: float64

In [7]:
## Splitting train and test data

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
## Applying standard Scaler

scaler = StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)


#### pickling the scaler object

In [23]:
import pickle
pickle_out=open('scaler.pkl',"wb")
pickle.dump(scaler,pickle_out)

In [24]:
pickle_out.close()

#### loading the pickled object

In [25]:
pickle_in = open('scaler.pkl','rb')
scaler_loaded=pickle.load(pickle_in)

## Model Building

In [26]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error


In [27]:
## linear regression
model = LinearRegression()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.698763112426554
Training Adjusted r2 score 0.6987224019883388
Testing r2 score 0.7013699957242017
Testing Adjusted r2 score 0.701248884498702
Mean squared error 38.88877640814374
Mean absolute error 4.135562303883284
Root Mean squared error 6.2360866260936225


-  Because of outliers in target feature we got high MSE value

In [28]:
## Ridge Regression

model = Ridge()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6987529656955421
Training Adjusted r2 score 0.6987122538860544
Testing r2 score 0.7014411507498601
Testing Adjusted r2 score 0.7013200683817165
Mean squared error 38.87951032019703
Mean absolute error 4.133987294550904
Root Mean squared error 6.235343640906812


In [29]:
## Lasso Regression

model = Lasso()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6844962279685323
Training Adjusted r2 score 0.6844535894426818
Testing r2 score 0.6882375515390201
Training Adjusted r2 score 0.6881111143704259
Mean squared error 40.5989350603142
Mean absolute error 4.383799421411811
Root Mean squared error 6.371729361822754


In [30]:
## Elastic Net Regression

## linear regression
model = ElasticNet()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6492953817408369
Training Adjusted r2 score 0.6492479860224425
Testing r2 score 0.6521328848395906
Training Adjusted r2 score 0.6519918052024983
Mean squared error 45.300627088781454
Mean absolute error 4.943105687511112
Root Mean squared error 6.730574053435669


In [31]:
from sklearn.svm import SVR

In [32]:
## before applying Grid Search CV
## SVR regression


model = SVR()


In [33]:
model.fit(X_train_scaled,y_train)

SVR()

In [34]:
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")

Training r2 score 0.7131126639350096
Training Adjusted r2 score 0.7130738927564076


In [35]:
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

Testing r2 score 0.7147240446808569
Training Adjusted r2 score 0.7146083492716228


In [36]:
## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Mean squared error 37.149759509027504
Mean absolute error 3.0108481106702008
Root Mean squared error 6.095060254749538


In [37]:
import pickle
pickle_out=open('svm_model.pkl',"wb")
pickle.dump(model,pickle_out)

In [38]:
## Hyper parametet tuning

from sklearn.model_selection import GridSearchCV



In [41]:
svr=SVR()
params=[{'C':[1,10,100,1000],'kernel':['linear']},
       {'C':[1,10,100,100],'kernel':['rbf'],'gamma':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0,9]}]

Grid = GridSearchCV(estimator=svr,param_grid=params,scoring='accuracy')

In [None]:
Grid=Grid.fit(X_train_scaled,y_train)

In [None]:
from sklearn.linear_model import SGDRegressor