In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('combined_sorted_final.csv')

In [3]:
df=df.drop('unit',axis=1)

In [4]:
X = df.drop('Price',axis=1)

In [5]:
y = df['Price']

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [8]:
print(X_train.shape)
print(X_test.shape)

(3936, 4)
(1688, 4)


In [9]:
column_trans = make_column_transformer((OneHotEncoder(sparse=False), ['location']),remainder='passthrough')

In [10]:
scaler = StandardScaler()

In [11]:
lr = LinearRegression(normalize=True)

In [12]:
pipe = make_pipeline(column_trans,scaler,lr)

In [13]:
pipe.fit(X_train,y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




In [14]:
y_pred_lr = pipe.predict(X_test)

In [15]:
r2_score(y_test,y_pred_lr)

0.83304208087044

In [16]:
lasso = Lasso()

In [17]:
pipe = make_pipeline(column_trans, lasso)

In [18]:
pipe.fit(X_train,y_train)

In [19]:
y_pred_lasso = pipe.predict(X_test)

In [20]:
r2_score(y_test,y_pred_lasso)

0.8165267021880898

In [21]:
ridge = Ridge()

In [22]:
pipe = make_pipeline(column_trans,scaler,ridge)

In [23]:
pipe.fit(X_train,y_train)

In [24]:
y_pred_ridge = pipe.predict(X_test)

In [25]:
r2_score(y_test,y_pred_ridge)

0.8334413044619742

In [26]:
print("No Regularization: ", r2_score(y_test,y_pred_lr))
print("Lasso: ",r2_score(y_test,y_pred_lasso))
print("Ridge: ",r2_score(y_test,y_pred_ridge))

No Regularization:  0.83304208087044
Lasso:  0.8165267021880898
Ridge:  0.8334413044619742


In [27]:
import pickle

In [28]:
pickle.dump(pipe, open('RidgeModel.pkl','wb'))