In [None]:
import pandas as pd
import pickle

df = pd.read_csv('CNX_HousePrice.csv')
df = df[df['Location'] == 'nimman']

y = df['price']
X = df.drop(['price', 'per_sqm_price', 'Location'], axis = 'columns')

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)

In [None]:
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.impute import SimpleImputer

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

regressors = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(),
    'Lasso Regression': Lasso(),
    'Random Forest Regression': RandomForestRegressor()
}

pipelines = {}
for name, regressor in regressors.items():
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('transform', PolynomialFeatures(degree=5)),
        ('regressor', regressor)
    ])
    pipelines[name] = pipeline

results = {}
for name, pipeline in pipelines.items():
    model = pipeline.fit(X_train, y_train)
    y_pred = model.predict(X_train)
    mae = mean_absolute_error(y_train, y_pred)


    r2 = r2_score(y_train,y_pred)
    
    y_predt = model.predict(X_test)
    maet = mean_absolute_error(y_test, y_predt)


    r2t = r2_score(y_test,y_predt)
    results[name] = [mae, r2, maet, r2t] 
    

df_result = pd.DataFrame(results)
df_result = df_result.round(2)
df_result.index = ['mae_train', 'r2_train', 'mae_test', 'r2_test']
df_result

In [None]:
steps = [("imp_mean", SimpleImputer()), ("scale", StandardScaler()), 
         ("polytransform", PolynomialFeatures(degree =3)), ("regressor", RandomForestRegressor()) ]

pipeline = Pipeline(steps)

model = pipeline.fit(X_train, y_train)


with open('CNX_BestModel.pkl', 'wb') as model_file:
   pickle.dump(model, model_file)