# <span style='color:green'> Linear Regression, SVR on House Power Consumption/FE and Model </span>

# <span style='color:red'> 1.0 Importing required libraries </span>

In [1]:
### Pandas and Numpy
import pandas as pd
import numpy as np

### MongoDB Library
import pymongo

### To ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Machine Learning librarie
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import r2_score

# <span style='color:red'> 2.4 Uploading Data to MongoDB  </span>

In [2]:
data_sample = pd.read_csv('Desktop/Datasets/household_power_consumption_cleaned.csv')
data_sample.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,0.284,0.0,240.03,1.2,0,0,0,1,9,0
1,0.488,0.194,242.18,2.2,0,1,1,18,10,2
2,0.35,0.098,243.71,1.4,0,0,0,27,4,0
3,0.464,0.0,237.06,2.0,0,1,0,25,5,1
4,0.43,0.248,244.67,2.0,0,1,1,20,1,2


In [3]:
data_sample.shape

(48412, 10)

In [4]:
### creating connection with MongoDB
import pymongo
from pymongo import MongoClient

In [5]:
Client = MongoClient('Localhost',27017)

In [6]:
db=Client['Power_consumption1']
collection=db['Household_power_data1']

In [7]:
data_dict = data_sample.to_dict("records")

In [8]:
collection.insert_many(data_dict)

<pymongo.results.InsertManyResult at 0x1f3798066a0>

# <span style='color:red'> 2.0 Retrieving data from MongoDB </span>

In [9]:
### Locating our collection and data in MongoDb using find() method
data_from_pymongodb = collection.find()

In [10]:
### converting data from MongoDb to Dataframe in pandas
data_pymongodb = pd.DataFrame(data_from_pymongodb)

In [11]:
data_pymongodb.head()

Unnamed: 0,_id,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,641ecbbb0fc48164255a4ed8,0.284,0.0,240.03,1.2,0,0,0,1,9,0
1,641ecbbb0fc48164255a4ed9,0.488,0.194,242.18,2.2,0,1,1,18,10,2
2,641ecbbb0fc48164255a4eda,0.35,0.098,243.71,1.4,0,0,0,27,4,0
3,641ecbbb0fc48164255a4edb,0.464,0.0,237.06,2.0,0,1,0,25,5,1
4,641ecbbb0fc48164255a4edc,0.43,0.248,244.67,2.0,0,1,1,20,1,2


In [13]:
data_pymongodb.drop(['_id'] , axis=1, inplace=True)

In [14]:
data_pymongodb.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,0.284,0.0,240.03,1.2,0,0,0,1,9,0
1,0.488,0.194,242.18,2.2,0,1,1,18,10,2
2,0.35,0.098,243.71,1.4,0,0,0,27,4,0
3,0.464,0.0,237.06,2.0,0,1,0,25,5,1
4,0.43,0.248,244.67,2.0,0,1,1,20,1,2


# <span style='color:red'> 3.0 Model and Evaluation </span>

# <span style='color:red'> 3.1 Seperating Independent and Dependent features </span>

In [15]:
### Splitting data into independent feature dataframe and dependent feature series

X= data_pymongodb.iloc[ :,:-1]
y= data_pymongodb.iloc[ :,-1]

In [16]:
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
0,0.284,0.0,240.03,1.2,0,0,0,1,9
1,0.488,0.194,242.18,2.2,0,1,1,18,10
2,0.35,0.098,243.71,1.4,0,0,0,27,4
3,0.464,0.0,237.06,2.0,0,1,0,25,5
4,0.43,0.248,244.67,2.0,0,1,1,20,1


In [17]:
y.head()

0    0
1    2
2    0
3    1
4    2
Name: Total_power_use, dtype: int64

# <span style='color:red'> 3.2 Train Test Split </span>

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state= 19)
X_train.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
27148,0.25,0.118,240.71,1.2,0,0,0,15,7
18596,0.248,0.11,242.93,1.0,0,2,1,23,7
17779,2.158,0.486,238.9,9.2,0,1,30,28,8
25995,0.746,0.072,244.79,3.2,0,1,0,30,11
11270,0.404,0.0,241.84,2.0,0,0,0,26,10


In [19]:
y_train.head()

27148     0
18596     3
17779    31
25995     1
11270     0
Name: Total_power_use, dtype: int64

In [20]:
X_test.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
15567,3.3,0.056,240.15,13.8,0,0,0,13,12
20711,2.17,0.238,235.82,9.4,0,32,0,4,6
35071,1.964,0.0,232.56,8.4,0,0,0,8,11
38605,4.41,0.314,237.5,18.6,0,36,17,10,8
4208,2.426,0.164,240.87,10.0,0,2,18,22,7


In [21]:
y_test.head()

15567     0
20711    32
35071     0
38605    53
4208     20
Name: Total_power_use, dtype: int64

In [22]:
### both will have same shape
X_train.shape,y_train.shape

((36309, 9), (36309,))

In [23]:
X_test.shape,y_test.shape

((12103, 9), (12103,))

# <span style='color:red'> 3.3 Feature Scaling </span>

In [24]:
Scaler = StandardScaler()
Scaler

StandardScaler()

In [25]:
X_train = Scaler.fit_transform(X_train)
X_train 

array([[-0.80258101, -0.05747984, -0.03034584, ..., -0.76909553,
        -0.07367583,  0.1908574 ],
       [-0.80449444, -0.12847529,  0.66809624, ..., -0.65080045,
         0.83529316,  0.1908574 ],
       [ 1.02283174,  3.20831075, -0.59979636, ...,  2.77975662,
         1.40339878,  0.4861967 ],
       ...,
       [-0.17114893, -1.10466269,  0.96383298, ..., -0.76909553,
        -0.64178145,  1.66755389],
       [ 3.60978985,  3.048571  , -2.05645709, ..., -0.76909553,
        -1.20988707, -0.99049979],
       [ 0.73773058,  0.31524626,  0.1238148 , ..., -0.76909553,
        -0.64178145, -1.58117839]])

In [26]:
x_test=Scaler.transform(X_test)
x_test

array([[ 2.11540058, -0.60769456, -0.20652942, ..., -0.76909553,
        -0.30091808,  1.66755389],
       [ 1.03431232,  1.00745188, -1.56880609, ..., -0.76909553,
        -1.3235082 , -0.1044819 ],
       [ 0.83722897, -1.10466269, -2.59444626, ..., -0.76909553,
        -0.8690237 ,  1.37221459],
       ...,
       [ 0.23258492, -0.43020594,  1.21866995, ...,  1.47851083,
         1.51701991, -1.58117839],
       [-0.81214817, -1.10466269,  0.40067473, ..., -0.65080045,
        -0.98264483,  0.78153599],
       [-0.95948232, -1.10466269, -0.25372146, ..., -0.76909553,
         0.38080867, -0.1044819 ]])

# <span style='color:red'> 3.4 Linear Regression </span>

In [27]:
linear_reg =LinearRegression()
linear_reg

LinearRegression()

In [28]:
linear_reg.fit(X_train,y_train)

LinearRegression()

In [29]:
linear_reg_pred = linear_reg.predict(X_test)
linear_reg_pred

array([  8.82084332, 188.34149684,   8.82084332, ..., 169.43615708,
        17.27428088,   8.82084332])

In [30]:
r2score_linear_reg = r2_score(y_test,linear_reg_pred)
print("Our Linear Regression model has {} % accuracy".format(round(r2score_linear_reg*100,3)))

Our Linear Regression model has -6579.553 % accuracy


In [31]:
adj_r2score_linear_reg= (1-(1-(r2score_linear_reg))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Our Linear Regression model has {} % accuracy".format(round(adj_r2score_linear_reg*100,3)))

Our Linear Regression model has -6584.524 % accuracy


# <span style='color:red'> 3.5 Ridge Regression </span>

In [32]:
ridge=Ridge()
ridge

Ridge()

In [33]:
ridge.fit(X_train,y_train)

Ridge()

In [34]:
ridge.pred=ridge.predict(X_test)
ridge.pred

array([  8.80246419, 188.31293419,   8.80708443, ..., 169.41094039,
        17.26578381,   8.81357841])

In [35]:
r2_score_ridge=r2_score(y_test,ridge.pred)
print('our ridge regression has {} acuracy'.format(round(r2_score_ridge*100,3)))

our ridge regression has -6577.149 acuracy


In [36]:
adj_r2_score_ridge=(1-(1-r2_score_ridge)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('our ridge regression has {} acuracy'.format(round(adj_r2_score_ridge*100,3)))

our ridge regression has -6582.118 acuracy


# <span style='color:red'> 3.5 Lasso Regression </span>

In [37]:
lasso = Lasso()
lasso

Lasso()

In [38]:
lasso.fit(X_train,y_train)

Lasso()

In [39]:
lasso_pred =lasso.predict(X_test)
lasso_pred

array([ 12.48171433, 149.71968619,  10.99961624, ..., 142.89953776,
        16.06609026,   8.91624783])

In [40]:
r2_score_lasso = r2_score(y_test,lasso_pred)
print('our lasso regression has {} acurracy'.format(r2_score_lasso*100,3))

our lasso regression has -4273.457803631254 acurracy


In [41]:
adj_r2_score_lasso=(1-(1-r2_score_lasso)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('our lasso regression has {} acurracy'.format(adj_r2_score_lasso*100,3))

our lasso regression has -4276.71267175601 acurracy


# <span style='color:red'> 3.6 Elastic-Net Regression </span>

In [42]:
elasticnet = ElasticNet()
elasticnet 

ElasticNet()

In [43]:
elasticnet.fit(X_train,y_train)

ElasticNet()

In [44]:
elasticnet_pred=elasticnet.predict(X_test)
elasticnet_pred

array([ 20.18372282,  91.66214942,   7.08616422, ...,  72.78430847,
        -8.69870272, -13.95690664])

In [45]:
r2_score_elasticnet = r2_score(y_test,elasticnet_pred)
print('our elasticnet regression has {} acurracy'.format(r2_score_lasso*100,3))

our elasticnet regression has -4273.457803631254 acurracy


In [46]:
adj_r2_score_elasticnet=(1-(1-r2_score_elasticnet)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('our elasticnet regression has {} acurracy'.format(adj_r2_score_elasticnet*100,3))

our elasticnet regression has -1088.769843506163 acurracy


# <span style='color:red'> 3.7 Support Vector Regressor </span>

In [47]:
svr=SVR()
svr

SVR()

In [48]:
svr.fit(X_train, y_train)

SVR()

In [49]:
svr_pred=svr.predict(X_test)
svr_pred

array([29.4927867, 29.4927867, 29.4927867, ..., 29.4927867, 29.4927867,
       29.4927867])

In [50]:
r2score_svr=r2_score(y_test, svr_pred)
print("Our Support Vector Regressor model has {} % accuracy".format(round(r2score_svr*100,3)))

Our Support Vector Regressor model has -257.858 % accuracy


In [51]:
adjusted_r2_score_svr=1-((1-r2score_svr)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_svr*100,3)))

Adjusted R square accuracy is -258.124 % 
