In [67]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.metrics import \
    r2_score, get_scorer
from sklearn.preprocessing import \
    StandardScaler, PolynomialFeatures

import warnings

def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

In [68]:
# load artifical data set into a DataFrame
url = "https://raw.githubusercontent.com/nandikachirala/trust-lending-AI/main/ArtificialData-For-Algorithm.csv"
dataset = pd.read_csv(url)

In [69]:
# summarize shape + first few lines
print(dataset.shape)
print(dataset.head(10))

(484, 6)
   Promise5  Promise10  AmountSent  MadePromise5  MadePromise10  \
0         6         16          10             1              1   
1        10          0           5             1              0   
2         9         12           5             1              1   
3        11         20          10             1              1   
4         0         17          10             0              1   
5         0         12          10             0              1   
6         0         25          10             0              1   
7         0         14          10             0              1   
8         8         14          10             1              1   
9         8         13           5             1              1   

   AmountReturned  
0              16  
1              10  
2               9  
3              20  
4              17  
5              12  
6               3  
7              14  
8              14  
9               8  


In [70]:
headers = list(dataset)
print(headers)
data = dataset.values

['Promise5', 'Promise10', 'AmountSent', 'MadePromise5', 'MadePromise10', 'AmountReturned']


In [71]:
# Evaluating Lasso Model (default Lasso parameter)

X, y = data[:, :-1], data[:, -1]

#Scale X
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(data = X_scaled)

model = Lasso(alpha=1.0)
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, X_scaled, y, scoring='r2', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean Absolute Error: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))
print(scores)

Mean Absolute Error: 0.643 (0.086)
[0.74319281 0.48292904 0.61755224 0.73397196 0.68014473 0.62082296
 0.71642537 0.74786884 0.40611324 0.7081892  0.4450918  0.5709716
 0.6868564  0.75488916 0.59954779 0.66074291 0.64132589 0.69756108
 0.7130625  0.61737704 0.67914465 0.71207368 0.58680991 0.71239748
 0.70095556 0.60939177 0.57139946 0.60716345 0.68718489 0.58688678]


In [72]:
# Using Lasso to make predictions

# fit model
model.fit(X, y)
# new fake data
row = [7, 20, 5, 1, 1]
# make a prediction
yhat = model.predict([row])
# summarize prediction
print('Predicted: %.3f' % yhat)

Predicted: 17.982


In [76]:
# Tuning Hyperparameter Alpha

In [64]:
#Use LassoCV class to automatically find good hyperparameters
from sklearn.linear_model import LassoCV
model = LassoCV(alphas=np.arange(0, 1, 0.01), cv=cv, n_jobs=-1)
model.fit(X, y)
print('alpha: %f' % model.alpha_)

alpha: 0.030000


In [81]:
# Making Predictions with Tuned Alpha

model = Lasso(alpha=0.03)
# fit model
model.fit(X, y)
# new fake data
row = [7, 12, 10, 1, 1]
# make a prediction
yhat = model.predict([row])
# summarize prediction
print('Predicted: %.3f' % yhat)

Predicted: 11.758
