In [5]:
import pandas as pd
df = pd.read_csv('CNX_HousePrice.csv')
y = df['per_sqm_price']
X = df.drop(['price', 'per_sqm_price', 'Location'], axis = 'columns')

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3)

In [7]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

import pickle

steps = [("imp_mean", SimpleImputer()), ("scale", StandardScaler()), 
         ("polytransform", PolynomialFeatures(degree =3)), ("linear", LinearRegression()) ]

pipeline = Pipeline(steps)

model = pipeline.fit(X_train, y_train)

with open('cnxmodel.pkl', 'wb') as model_file:
   pickle.dump(model, model_file)

In [8]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = model.predict(X_train)
mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
r2 = r2_score(y_train,y_pred)
print('training')
print('mae:', round(mae), 'mse:', round(mse), 'r2 score:', round(r2, 2))

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test,y_pred)
print('testing')
print('mae:', round(mae), 'mse:', round(mse), 'r2 score:', round(r2, 2))

training
mae: 18624 mse: 596240005 r2 score: 0.29
testing
mae: 19736 mse: 610180342 r2 score: 0.08
