<a href="https://colab.research.google.com/github/rgumi/dataScience/blob/master/rental_prices_polynomial_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [0]:
data = pd.read_csv('https://raw.githubusercontent.com/saschaschworm/big-data-and-data-science/master/datasets/demos/rental-prices.csv')
X, y = data[['apartment_size', 'age']], data['rental_price']

hyperparams = {'loss': 'squared_loss', 'penalty': 'L1', 'alpha': 0.0001, 'max_iter': 50000, 
               'tol': 1e-3, 'random_state': 1909, 'eta0': 0.01}

model = SGDRegressor(**hyperparams)

In [0]:
numeric_features = ['apartment_size', 'age']

polynomial_transformer = Pipeline([
    ('polynomials', PolynomialFeatures(degree=21, include_bias=False)),
    ('scaler', MinMaxScaler()),
])

numeric_transformer = Pipeline([
    ('scaler', MinMaxScaler()),
])

preprocessor = ColumnTransformer([
    ('p_transformer', polynomial_transformer, ['apartment_size']),
    ('n_transformer', numeric_transformer, numeric_features),
])

pipeline = Pipeline([
    ('preprocessor', preprocessor), 
    ('model', model)
])

pipeline = pipeline.fit(X, y)

In [0]:
prediction = pipeline.predict(pd.DataFrame({'apartment_size': [44], 'age': [10]}))
f'Prediction for a 44sqm Apartment: {prediction[0]:.2f} EUR'

'Prediction for a 44sqm Apartment: 521.70 EUR'

In [0]:
scoring = 'neg_mean_squared_error'
res_cv = cross_validate(pipeline, X, y, scoring=scoring, cv=10, return_train_score=True)

In [0]:
res_rmse_tr = np.mean(np.sqrt(np.abs(res_cv['train_score'])))
res_rmse_te = np.mean(np.sqrt(np.abs(res_cv['test_score'])))
f'Average RMSE on Training and Test Set: {res_rmse_tr:.2f}/{res_rmse_te:.2f} EUR'

'Average RMSE on Training and Test Set: 23.92/34.39 EUR'