[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saschaschworm/big-data-and-data-science/blob/master/notebooks/demos/rental-prices-polynomial-regression.ipynb)

In [1]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/saschaschworm/big-data-and-data-science/master/datasets/demos/rental-prices.csv')
X, y = data[['apartment_size']], data['rental_price']

hyperparams = {'loss': 'squared_loss', 'penalty': 'none', 'alpha': 0.0001, 'max_iter': 1000, 
               'tol': 1e-3, 'random_state': 1909, 'eta0': 0.0001}

model = SGDRegressor(**hyperparams)

In [3]:
numeric_features = ['apartment_size']
numeric_transformer = Pipeline([
    ('polynomials', PolynomialFeatures(degree=3, include_bias=False)),
    ('scaler', MinMaxScaler()),
])

preprocessor = ColumnTransformer([
    ('numeric_transformer', numeric_transformer, numeric_features),
])

pipeline = Pipeline([
    ('preprocessor', preprocessor), 
    ('model', SGDRegressor(max_iter=50000, penalty='none', eta0=0.01, random_state=1909))
])

pipeline = pipeline.fit(X, y)

In [4]:
prediction = pipeline.predict(pd.DataFrame({'apartment_size': [44]}))
f'Prediction for a 44sqm Apartment: {prediction[0]:.2f} EUR'

'Prediction for a 44sqm Apartment: 504.18 EUR'

In [5]:
scoring = 'neg_mean_squared_error'
res_cv = cross_validate(pipeline, X, y, scoring=scoring, cv=10, return_train_score=True)



In [6]:
res_rmse_tr = np.mean(np.sqrt(np.abs(res_cv['train_score'])))
res_rmse_te = np.mean(np.sqrt(np.abs(res_cv['test_score'])))
f'Average RMSE on Training and Test Set: {res_rmse_tr:.2f}/{res_rmse_te:.2f} EUR'

'Average RMSE on Training and Test Set: 42.24/49.86 EUR'