# Regularisation methods
with LASSO and RIDGE then fine tuning alpha (lambda in notes)
## Creating LASSO and RIDGE models

In [69]:
# import libraries
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.linear_model import Lasso, Ridge, RidgeCV
from sklearn.model_selection import RepeatedKFold

In [70]:
# import data
df = pd.read_csv("uni_data.csv")
df.head()

Unnamed: 0,institution,national_rank,quality_of_education,alumni_employment,quality_of_faculty,publications,influence,citations,patents,score
0,Harvard University,1,7,9,1,1,1,1,5,100.0
1,Massachusetts Institute of Technology,2,9,17,3,12,4,4,1,91.67
2,Stanford University,3,17,11,5,4,2,2,15,89.5
3,University of Cambridge,1,10,24,4,16,16,11,50,86.17
4,California Institute of Technology,4,2,29,7,37,22,22,18,85.21


In [71]:
# group the data into x and y
# don't include the final value so we can test if the model works later
y = df.score[:-1].values
x = df.iloc[:-1,1:-1].values

testData = df.iloc[-1,1:-1]
testAns = df.score[y.shape[0]]

In [72]:
# define models - create objects for LASSO or RIDGE as done w/ Linear Regression
modelLASSO = Lasso(alpha=0.01)
modelRIDGE = Ridge(alpha=1)

In [73]:
# fit (train) the models
modelLASSO.fit(x,y)
modelRIDGE.fit(x,y)

In [74]:
#  models
predLASSO = modelLASSO.predict([testData])
predRIDGE = modelRIDGE.predict([testData])

In [75]:
print(f"Real = {testAns}\nLASSO Prediction = {str(predLASSO)[1:9]}\nRIDGE Prediction = {str(predRIDGE)[1:9]}")

Real = 44.02
LASSO Prediction = 42.87359
RIDGE Prediction = 42.87337


## Tuning Alpha
Using Repeated K-Fold, CV = Cross-Validation

In [76]:
# define model evaluation method
cv = RepeatedKFold(n_splits=10,n_repeats=3,random_state=1)

In [77]:
# define model
model = RidgeCV(alphas=(0.5,1,0.01),cv=cv,scoring="neg_mean_absolute_error")

#fit the model
model.fit(x,y)

In [78]:
model.alpha_

1.0