In [17]:
from datasets import load_happines
from tools import *
from sklearn.linear_model import Lasso
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
import plotly.express as px

In [18]:
df_train, df_test = load_happines()
df_train.columns

Index(['region', 'economy', 'family', 'health', 'freedom', 'trust',
       'generosity', 'target'],
      dtype='object')

In [19]:
train_X = df_train.drop(['region', 'target'], axis=1)
train_y = df_train['target']
test_X = df_test.drop(['region', 'target'], axis=1)
test_y = df_test['target']
features = train_X.columns

In [20]:
# nutna standartizace
scaler = StandardScaler().fit(train_X)
train_X = pd.DataFrame(scaler.transform(train_X), columns=features)
test_X = pd.DataFrame(scaler.transform(test_X), columns=features)

## Alpha selection

In [26]:
n_alphas = 80
alphas = np.logspace(-2, 0, n_alphas)

In [30]:
scores = []
for i in alphas:
    scores.append(cross_val_score(Lasso(alpha=i), test_X, test_y).mean())

result = pd.DataFrame(zip(alphas, scores), columns=['alpha', 'R^2 (mean)'])
px.line(result, x='alpha', y='R^2 (mean)')

In [36]:
coefs = []
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(test_X, test_y)
    coefs.append(ridge.coef_)

In [37]:
# nejdele se drzi economy, coz dava smysl vzhledem k feature selection
result2 = pd.DataFrame(coefs, columns=features)
result2['alphas'] = alphas
px.line(result2, x='alphas', y=features)

In [44]:
model = Lasso(alpha=0.3).fit(train_X, train_y)

In [45]:
model_performance(model, train_X, train_y, test_X, test_y)

Train score
0.6592449581831044
Test score
0.6392368300650815


## Polynomicke Lasso

In [46]:
train_X = polynomial_features(train_X, 3)
test_X = polynomial_features(test_X, 3)
train_X.shape

(158, 18)

In [47]:
features = train_X.columns

In [48]:
scores = []
for i in alphas:
    scores.append(cross_val_score(Lasso(alpha=i), test_X, test_y).mean())

result = pd.DataFrame(zip(alphas, scores), columns=['alpha', 'R^2 (mean)'])
px.line(result, x='alpha', y='R^2 (mean)')

In [49]:
coefs = []
for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(test_X, test_y)
    coefs.append(ridge.coef_)

In [50]:
result2 = pd.DataFrame(coefs, columns=features)
result2['alphas'] = alphas
px.line(result2, x='alphas', y=features)