In [61]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import r2_score, recall_score

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [62]:
X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)

In [63]:
m = LinearRegression().fit(X_train, y_train)
print('train score ', m.score(X_train, y_train))
print('test score ', m.score(X_test, y_test))
print(r2_score(y_test, m.predict(X_test)))
print(m.coef_)
print(m.intercept_)

train score  0.6112671771315213
test score  0.5859370991928383
0.5859370991928383
[ 4.43836332e-01  9.73627052e-03 -1.17810160e-01  7.79638749e-01
  3.46556567e-07 -4.51607938e-03 -4.31295340e-01 -4.44407256e-01]
-37.89883514628247


In [64]:
X_medinc_mean = X[:,0].mean()
filter1 = X[:,0] < X_medinc_mean
cat_y = filter1.astype(int)
# print(cat_y)

X = X[:, 1:]
X_train, X_test, y_train, y_test = train_test_split(X, cat_y, stratify=cat_y, random_state=10)

In [65]:
m = LogisticRegression(solver='liblinear', max_iter=1000).fit(X_train, y_train)

print('train score ', m.score(X_train, y_train))
print('test score ', m.score(X_test, y_test))
print('recall ', recall_score(y_test, m.predict(X_test)))

train score  0.8224160206718346
test score  0.8217054263565892
recall  0.8733755414861712


In [67]:
m = DecisionTreeClassifier(max_depth=5, max_leaf_nodes=30).fit(X_train, y_train)

print('train score ', m.score(X_train, y_train))
print('test score ', m.score(X_test, y_test))
print('recall ', recall_score(y_test, m.predict(X_test)))
print(m.get_depth())
print(m.get_n_leaves())

train score  0.818217054263566
test score  0.8071705426356589
recall  0.8680439853382206
5
30
