In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import FunctionTransformer
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
dataset=pd.read_csv('outlier_handled_target')
dataset.drop(['Unnamed: 0','Date'],axis=1,inplace=True)

In [3]:
dataset.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Total_power_consumed
0,1209.176,50.212,341733.055,5180.8,14680.933333
1,2985.47,226.006,345725.32,12375.6,27354.983333
2,2203.826,161.792,347373.64,9247.2,19028.433333
3,1666.194,150.942,348479.01,7094.0,13131.9
4,2225.748,160.998,348923.61,9313.0,20384.8


In [4]:
## Dependent and independent features

X=dataset.iloc[:,:4]
y=dataset.loc[:,'Total_power_consumed']

In [5]:
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity
0,1209.176,50.212,341733.055,5180.8
1,2985.47,226.006,345725.32,12375.6
2,2203.826,161.792,347373.64,9247.2
3,1666.194,150.942,348479.01,7094.0
4,2225.748,160.998,348923.61,9313.0


In [6]:
y.head()

0    14680.933333
1    27354.983333
2    19028.433333
3    13131.900000
4    20384.800000
Name: Total_power_consumed, dtype: float64

In [45]:
## Splitting train and test data

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=42)

In [46]:
from sklearn.preprocessing import StandardScaler

In [47]:
## Applying standard Scaler

scaler = StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)


In [29]:
scaler=StandardScaler()
y_train_scaled=scaler.fit_transform(pd.DataFrame(y_train))
y_test_scaled=scaler.fit_transform(pd.DataFrame(y_test))

#### pickling the scaler object

In [24]:
import pickle
pickle_out=open('scaler.pkl',"wb")
pickle.dump(scaler,pickle_out)

In [25]:
pickle_out.close()

#### loading the pickled object

In [26]:
pickle_in = open('scaler.pkl','rb')
scaler_loaded=pickle.load(pickle_in)

## Model Building

In [66]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error


In [67]:
## linear regression
model = LinearRegression()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.8179604789750587
Training Adjusted r2 score 0.8172793208047129
Testing r2 score 0.8373470471206067
Testing Adjusted r2 score 0.8355091606473932
Mean squared error 4755914.598822532
Mean absolute error 1704.7459596618262
Root Mean squared error 2180.805951666157


-  Because of outliers in target feature we got high MSE value

In [68]:
## Ridge Regression

model = Ridge()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Testing Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.817953234436507
Training Adjusted r2 score 0.8172720491584398
Testing r2 score 0.8373866016319191
Testing Adjusted r2 score 0.8355491621023363
Mean squared error 4754758.038953993
Mean absolute error 1703.8809277998455
Root Mean squared error 2180.540767551479


In [69]:
## Lasso Regression

model = Lasso()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.8179437208308962
Training Adjusted r2 score 0.8172624999546787
Testing r2 score 0.837280881311848
Training Adjusted r2 score 0.8354422472023774
Mean squared error 4757849.263581135
Mean absolute error 1705.98206362707
Root Mean squared error 2181.2494730271305


In [70]:
## Elastic Net Regression

## linear regression
model = ElasticNet()
model.fit(X_train_scaled,y_train)
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test,y_pred))}")

Training r2 score 0.782305036323244
Training Adjusted r2 score 0.78149046209059
Testing r2 score 0.8049970339531879
Training Adjusted r2 score 0.8027936106080261
Mean squared error 5701817.499270399
Mean absolute error 1871.2979654179633
Root Mean squared error 2387.847880261722


In [71]:
from sklearn.svm import SVR

In [72]:
## before applying Grid Search CV
## SVR regression


model = SVR(kernel='rbf')


In [73]:
model.fit(X_train_scaled,y_train_scaled)

SVR()

In [74]:
x_predicted=model.predict(X_train_scaled)
score=r2_score(y_train_scaled,x_predicted)
print(f"Training r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1)}")

Training r2 score 0.8407504722207204
Training Adjusted r2 score 0.8401545899839411


In [75]:
y_pred = model.predict(X_test_scaled)
score=r2_score(y_test_scaled,y_pred)
print(f"Testing r2 score {score}")
print(f"Training Adjusted r2 score {1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)}")

Testing r2 score 0.847470224823999
Training Adjusted r2 score 0.8457467245395245


In [76]:
## MAE MSE AND RMSE

print(f"Mean squared error {mean_squared_error(y_test_scaled,y_pred)}")
print(f"Mean absolute error {mean_absolute_error(y_test_scaled,y_pred)}")
print(f"Root Mean squared error {np.sqrt(mean_squared_error(y_test_scaled,y_pred))}")

Mean squared error 0.152529775176001
Mean absolute error 0.2990193180187865
Root Mean squared error 0.3905506051409996


In [63]:
import pickle
pickle_out=open('svm_model.pkl',"wb")
pickle.dump(model,pickle_out)