In [None]:
import importlib

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
model = 'sklearn.ensemble.RandomForestRegressor'
params = {'min_samples_leaf': 1, 'n_estimators': 50}

In [None]:
model

In [None]:
d = fetch_california_housing()
X = d['data']
y = d['target']

In [None]:
tokens = model.split('.')
name, module = tokens[-1], '.'.join(tokens[:-1])
class_ = getattr(importlib.import_module(module), name)

In [None]:
list(d['feature_names'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

In [None]:
model = class_(**params)

In [None]:
model.get_params()

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
ax = plt.gca()
sns.scatterplot(x=y_pred, y=y_test, ax=ax)
ax.set_xlim(0, 20)
ax.set_ylim(0, 20)
ax.grid()

In [None]:
metrics_ = {
    'mae': metrics.mean_absolute_error(y_test, y_pred),
    'mse': metrics.mean_squared_error(y_test, y_pred),
    'r2': metrics.r2_score(y_test, y_pred),
}

In [None]:
pd.DataFrame(metrics_, index=[0])

In [None]:
df = pd.DataFrame(X_test)
df.columns = d['feature_names']
df['y_true'] = y_test
df['y_pred'] = y_pred
df['error_abs'] = np.abs(y_test - y_pred)
df['error_sq'] = np.square(y_test - y_pred)

In [None]:
error_houseage = df.groupby('HouseAge')[['error_abs', 'error_sq']].mean()
error_houseage.columns = ['mae', 'mse']


def r2_score(df):
    return metrics.r2_score(df.y_true, df.y_pred)


r2 = pd.DataFrame(df.groupby('HouseAge').apply(r2_score))
r2.columns = ['r2']

error_houseage.merge(r2, on='HouseAge')