In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import FunctionTransformer
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
dataset=pd.read_csv('https://raw.githubusercontent.com/medashabari/MongoDB_test/master/outlier_handled')
dataset.drop(['Unnamed: 0','Date','Time'],axis=1,inplace=True)

In [3]:
dataset.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,year,month,day,Total_power_consumed
0,0.332,0.074,241.44,1.4,2010,2,19,1.0
1,0.624,0.128,242.91,2.6,2008,9,20,0.0
2,0.214,0.0,240.92,0.8,2007,2,28,0.0
3,0.876,0.238,246.75,3.6,2010,10,27,1.0
4,0.638,0.198,244.19,3.4,2007,7,14,3.0


In [4]:
## Dependent and independent features

X=dataset.iloc[:,:4]
y=dataset.loc[:,'Total_power_consumed']

In [5]:
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity
0,0.332,0.074,241.44,1.4
1,0.624,0.128,242.91,2.6
2,0.214,0.0,240.92,0.8
3,0.876,0.238,246.75,3.6
4,0.638,0.198,244.19,3.4


In [6]:
y.head()

0    1.0
1    0.0
2    0.0
3    1.0
4    3.0
Name: Total_power_consumed, dtype: float64

In [7]:
## Splitting train and test data

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
## Applying standard Scaler

scaler = StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)


#### pickling the scaler object

In [10]:

import pickle
pickle_out=open('scaler.pkl',"wb")
pickle.dump(scaler,pickle_out)

In [11]:
pickle_out.close()

#### loading the pickled object

In [12]:
pickle_in = open('scaler.pkl','rb')
scaler_loaded=pickle.load(pickle_in)

## Model Building

In [13]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error


In [14]:
## linear regression
model = LinearRegression()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.698763112426554
Training Adjusted r2 score 0.6987224019883388
Testing r2 score 0.7013699957242017
Testing Adjusted r2 score 0.701248884498702
Mean squared error 38.88877640814374
Mean absolute error 4.135562303883285
Root Mean squared error 6.2360866260936225


-  Because of outliers in target feature we got high MSE value

In [15]:
## Ridge Regression

model = Ridge()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6987529656955578
Training Adjusted r2 score 0.6987122538860701
Testing r2 score 0.7014411507498122
Testing Adjusted r2 score 0.7013200683816685
Mean squared error 38.87951032020328
Mean absolute error 4.133987294552062
Root Mean squared error 6.235343640907314


In [16]:
## Lasso Regression

model = Lasso()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6844962279685323
Training Adjusted r2 score 0.6844535894426818
Testing r2 score 0.68823755153902
Training Adjusted r2 score 0.6881111143704259
Mean squared error 40.59893506031422
Mean absolute error 4.383799421411812
Root Mean squared error 6.371729361822755


In [17]:
## Elastic Net Regression

## linear regression
model = ElasticNet()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.6492953817408369
Training Adjusted r2 score 0.6492479860224425
Testing r2 score 0.6521328848395906
Training Adjusted r2 score 0.6519918052024983
Mean squared error 45.300627088781454
Mean absolute error 4.943105687511112
Root Mean squared error 6.730574053435669


In [18]:
from sklearn.svm import SVR

In [19]:
## before applying Grid Search CV
## SVR regression


model = SVR()


In [20]:
model.fit(X_train_scaled,y_train)

In [21]:
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")

Training r2 score 0.7131126639350096
Training Adjusted r2 score 0.7130738927564076


In [22]:
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

Testing r2 score 0.7147240446808569
Training Adjusted r2 score 0.7146083492716228


In [23]:
## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Mean squared error 37.1497595090275
Mean absolute error 3.0108481106702008
Root Mean squared error 6.095060254749537


In [24]:
import pickle
pickle_out=open('svm_model.pkl',"wb")
pickle.dump(model,pickle_out)

In [25]:
## Hyper parametet tuning

from sklearn.model_selection import GridSearchCV



In [26]:
svr=SVR()
params=[{'C':[1,10],'kernel':['linear']}]

Grid_linear = GridSearchCV(estimator=svr,param_grid=params,scoring='accuracy')

In [27]:
Grid_linearK=Grid_linear.fit(X_train_scaled,y_train)

In [28]:
Grid_linearK.best_params_

{'C': 1, 'kernel': 'linear'}

In [31]:
model1=SVR(kernel='linear',C=1)

In [32]:
model.fit(X_train_scaled,y_train)

In [33]:
pred1=model.predict(X_test_scaled)

In [34]:
model1.get_params

<bound method BaseEstimator.get_params of SVR(C=1, kernel='linear')>

In [35]:
r2_score(y_test,pred1)

0.7147240446808569

In [36]:
from sklearn.svm import LinearSVR

svr=LinearSVR()

In [42]:
params=[{'C':[1,10,100,100],'epsilon':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0,9]}]

Grid_rbf = GridSearchCV(estimator=svr,param_grid=params,scoring='accuracy')

In [43]:
Grid_rbf.fit(X_train_scaled,y_train)

In [44]:
Grid_rbf.best_params_

{'C': 1, 'epsilon': 0.1}

In [45]:
Grid_rbf.best_score_

nan

In [46]:
pred2=Grid_rbf.predict(X_test_scaled)

In [47]:
r2_score(y_test,pred2)

0.6713131709420594

In [48]:
from sklearn.linear_model import SGDRegressor

In [50]:
sdg.fit(X_train_scaled,y_train)

In [51]:
pred3=sdg.predict(X_test_scaled)

In [53]:
r2_score(y_test,pred3)

0.7000053028875659

In [56]:
sdg=SGDRegressor()
params=[{'penalty':['l2', 'l1', 'elasticnet'],'alpha':[0.0001,0.0002,0.0003,0.0004,0.0005,0.01,0.02]}]

grid_sdg= GridSearchCV(estimator=sdg,param_grid=params,scoring='accuracy')

In [57]:
grid_sdg.fit(X_train_scaled,y_train)

In [60]:
grid_sdg.best_params_

{'alpha': 0.0001, 'penalty': 'l2'}

In [61]:
grid_sdg.best_score_

nan

In [58]:
pred4=grid_sdg.predict(X_test_scaled)

In [59]:
r2_score(y_test,pred4)

0.6991708594858645