[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saschaschworm/big-data-and-data-science/blob/master/notebooks/development-exercises/rental-prices-multiple-linear-regression.ipynb)

In [0]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [0]:
data = pd.read_csv('https://raw.githubusercontent.com/saschaschworm/big-data-and-data-science/master/datasets/exercises/rental-prices.csv')
X, y = data[['apartment_size', 'age']], data['rental_price']

In [0]:
hyperparams = {'loss': 'squared_loss', 'penalty': 'l1', 'alpha':0.0001, 'max_iter': 50000, 'tol': 1e-3, 'random_state': 1909, 'eta0': 0.01}
model = SGDRegressor(**hyperparams)

In [0]:
polymomial_features = ['apartment_size']
polynomial_transformer = Pipeline([
    ('polynomials', PolynomialFeatures(degree=21, include_bias=False)),
    ('scaler', MinMaxScaler()),
])

numeric_features = ['apartment_size', 'age']
numeric_transformer = Pipeline([
    ('scaler', MinMaxScaler()),
])

preprocessor = ColumnTransformer([
    ('polynomial_transformer', polynomial_transformer, polymomial_features),    
    ('numeric_transformer', numeric_transformer, numeric_features),
])

pipeline = Pipeline([
    ('preprocessor', preprocessor), 
    ('model', model)
])

pipeline = pipeline.fit(X, y)

In [84]:
prediction = pipeline.predict(pd.DataFrame({'apartment_size': [44], 'age': [10]}))
f'Prediction for a 10-year old 44sqm Apartment: {prediction[0]:.2f} EUR'

'Prediction for a 10-year old 44sqm Apartment: 521.70 EUR'

In [85]:
res_cv = cross_validate(pipeline, X, y, scoring='neg_mean_squared_error', cv=10, return_train_score=True)
res_rmse_tr = np.mean(np.sqrt(np.abs(res_cv['train_score'])))
res_rmse_te = np.mean(np.sqrt(np.abs(res_cv['test_score'])))
f'Average RMSE on Training and Test Set: {res_rmse_tr:.2f}/{res_rmse_te:.2f} EUR'

'Average RMSE on Training and Test Set: 23.92/34.39 EUR'