In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.metrics import \
    r2_score, get_scorer
from sklearn.preprocessing import \
    StandardScaler, PolynomialFeatures

import warnings
warnings.filterwarnings('ignore')

In [15]:
# load artifical data set into a DataFrame
url = "https://raw.githubusercontent.com/nandikachirala/trust-lending-AI/main/ArtificialData-For-Algorithm.csv"
dataset = pd.read_csv(url)

In [16]:
# summarize shape + first few lines
print(dataset.shape)
print(dataset.head(10))
dataset.loc[dataset['AmountSent'] == 5]

(484, 6)
   Promise5  Promise10  AmountSent  MadePromise5  MadePromise10  \
0         6         16          10             1              1   
1        10          0           5             1              0   
2         9         12           5             1              1   
3        11         20          10             1              1   
4         0         17          10             0              1   
5         0         12          10             0              1   
6         0         25          10             0              1   
7         0         14          10             0              1   
8         8         14          10             1              1   
9         8         13           5             1              1   

   AmountReturned  
0              16  
1              10  
2               9  
3              20  
4              17  
5              12  
6               3  
7              14  
8              14  
9               8  


Unnamed: 0,Promise5,Promise10,AmountSent,MadePromise5,MadePromise10,AmountReturned
1,10,0,5,1,0,10
2,9,12,5,1,1,9
9,8,13,5,1,1,8
10,9,0,5,1,0,9
11,12,0,5,1,0,12
...,...,...,...,...,...,...
466,12,15,5,1,1,12
469,9,0,5,1,0,9
470,9,0,5,1,0,9
478,6,0,5,1,0,6


In [17]:
headers = list(dataset)
print(headers)
data = dataset.values

['Promise5', 'Promise10', 'AmountSent', 'MadePromise5', 'MadePromise10', 'AmountReturned']


In [18]:
# Evaluating Lasso Model (default Lasso parameter)

X, y = data[:, :-1], data[:, -1]

#Scale X
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled = pd.DataFrame(data = X_scaled)

model = Lasso(alpha=1.0)
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, X_scaled, y, scoring='r2', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean Absolute Error: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))
print(scores)

Mean Absolute Error: 0.643 (0.086)
[0.74319281 0.48292904 0.61755224 0.73397196 0.68014473 0.62082296
 0.71642537 0.74786884 0.40611324 0.7081892  0.4450918  0.5709716
 0.6868564  0.75488916 0.59954779 0.66074291 0.64132589 0.69756108
 0.7130625  0.61737704 0.67914465 0.71207368 0.58680991 0.71239748
 0.70095556 0.60939177 0.57139946 0.60716345 0.68718489 0.58688678]


In [19]:
# Using Lasso to make predictions

# fit model
model.fit(X, y)
# new fake data
row = [7, 20, 10, 1, 1]
# make a prediction
yhat = model.predict([row])
# summarize prediction
print('Predicted: %.3f' % yhat)

Predicted: 18.589


In [20]:
# Tuning Hyperparameter Alpha

In [21]:
#Use LassoCV class to automatically find good hyperparameters
from sklearn.linear_model import LassoCV
model = LassoCV(alphas=np.arange(0, 1, 0.01), cv=cv, n_jobs=-1)
model.fit(X, y)
print('alpha: %f' % model.alpha_)

alpha: 0.030000


In [22]:
# Making Predictions with Tuned Alpha

model = Lasso(alpha=0.03)
# fit model
model.fit(X, y)
# new fake data
row = [7, 20, 10, 1, 1]
# make a prediction
yhat = model.predict([row])
# summarize prediction
print('Predicted: %.3f' % yhat)
print(model.coef_)
print(model.intercept_)

Predicted: 19.040
[ 0.33066889  0.91024821  0.42739431 -0.         -8.00661198]
2.252941601399767


In [23]:
#Try to form two Lasso models: training Send 5 and Send 10
datasetFive = dataset.loc[dataset['AmountSent'] == 5]
datasetTen = dataset.loc[dataset['AmountSent'] == 10]
dataFive = datasetFive.values
dataTen = datasetTen.values

In [28]:
# Model for 5

Xfive, yfive = dataFive[:, :-1], dataFive[:, -1]

model = Lasso(alpha=1.0)
# define model evaluation method
cvFive = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, Xfive, yfive, scoring='r2', cv=cv, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean Absolute Error: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))

# tune alpha for 5
from sklearn.linear_model import LassoCV
modelFive = LassoCV(alphas=np.arange(0, 1, 0.01), cv=cvFive, n_jobs=-1)
modelFive.fit(Xfive, yfive)
print('alpha: %f' % modelFive.alpha_)

# predict using tuned alpha
modelFive = Lasso(alpha=0.18)
# fit model
modelFive.fit(Xfive, yfive)
# new fake data
row = [10, 20, 5, 1, 1]
# make a prediction
predictFive = modelFive.predict([row])
# summarize prediction
print('Predicted: %.3f' % predictFive)
print(modelFive.coef_)
print(modelFive.intercept_)

Mean Absolute Error: 0.872 (0.133)
alpha: 0.180000
Predicted: 9.662
[ 0.95142494 -0.          0.          0.         -0.        ]
0.1474723127870412


In [29]:
# Model for 10

Xten, yten = dataTen[:, :-1], dataTen[:, -1]

modelTen = Lasso(alpha=1.0)
# define model evaluation method
cvTen = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(modelTen, Xten, yten, scoring='r2', cv=cvTen, n_jobs=-1)
# force scores to be positive
scores = np.absolute(scores)
print('Mean Absolute Error: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))

# tune alpha for 10
from sklearn.linear_model import LassoCV
modelTen = LassoCV(alphas=np.arange(0, 1, 0.01), cv=cvTen, n_jobs=-1)
modelTen.fit(Xten, yten)
print('alpha: %f' % modelTen.alpha_)

# predict using tuned alpha
modelTen = Lasso(alpha=0.03)
# fit model
modelTen.fit(Xten, yten)
# new fake data
row = [10, 20, 10, 1, 1]
# make a prediction
predictTen = modelTen.predict([row])
# summarize prediction
print('Predicted: %.3f' % predictTen)
print(modelTen.coef_)
print(modelTen.intercept_)

Mean Absolute Error: 0.746 (0.221)
alpha: 0.030000
Predicted: 19.643
[ 0.13214656  0.91321253  0.         -0.          0.        ]
0.057039738582982835


In [30]:
predictFive = float(predictFive) + 5
predictTen = float(predictTen)
outcomeMap = {10:0, predictFive:5, predictTen:10}
bestDecision = max(10, predictFive, predictTen)
print(bestDecision)
print(outcomeMap[bestDecision])

19.64275579874844
10
