In [4]:
import os
os.chdir('..')

In [16]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV, train_test_split
from common.eval import cross_val_rmse

np.random.seed = 70

In [17]:
data = pd.read_csv('./data/dropped_correlatated.csv')

X = data.drop('rings', axis=1)
y = data['rings']

In [26]:
grid = {
    'alpha': [0.01, 0.05, 0.1, 0.5, 1, 2, 3],
    'normalize': [True, False],
    'solver': ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
    }

search = GridSearchCV(Ridge(), grid, cv=10)
search.fit(X, y)
best_params = search.best_params_
print("Best parameters found: ", best_params)

Best parameters found:  {'alpha': 0.5, 'normalize': True, 'solver': 'saga'}


In [28]:
model = Ridge(**best_params)
scores = cross_val_rmse(model, X, y, cv=10)
print(f"Mean score {np.mean(scores)}")

Mean score 2.5794337761826247


In [29]:
model = Ridge(**best_params)
X_train, X_test, y_train, y_test = train_test_split(X, y)
model.fit(X_train, y_train)


Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=True, random_state=None, solver='saga', tol=0.001)

In [30]:
predicts = np.around(model.predict(X_test))
results = pd.DataFrame({'true': y_test, 'predicted': predicts})
results.head()

Unnamed: 0,predicted,true
2603,12.0,9
42,7.0,5
1040,12.0,11
1582,9.0,9
4107,8.0,7


In [34]:
def diff_error(y, yhat):
    "An error calculated by the difference of the rounded predicted age to the actual value"
    diff = y - yhat
    return np.mean(np.sqrt(np.square(diff)))

In [35]:
msg = f"""
The mean difference from the original target rings,
using the rounded transformation of the predicted
value is: {diff_error(y_test, predicts)}

"""

print(msg)


The mean difference from the original target rings,
using the rounded transformation of the predicted
value is: 1.8976076555023924




### Experiment Conclusion

Using the features Sex (One Hot Encoding), Diameter, and Whole Weight targeting the Rings amount.