In [1]:
import random
from sklearn import datasets, linear_model
from sklearn import metrics

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from sklearn.preprocessing import Normalizer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate

In [10]:
def read_data(run_num):
    data = np.genfromtxt("datasets/iris/pima-indians-diabetes.csv", delimiter=",")
    data_X = data[:, :8]   # all features 1-7
    
    # transformer = Normalizer().fit(data_X)
    # data_X = transformer.transform(data_X)
    
    data_y = data[:, -1]   # target
    
    # percent_test = 0.4
    # testsize = int(percent_test * data_X.shape[0])
    # X_train = data_X[:-testsize]
    # X_test = data_X[-testsize:]
    # y_train = data_y[:-testsize]
    # y_test = data_y[-testsize:]
    
    X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.4, random_state=run_num)
    
    return X_train, X_test, y_train, y_test

def scipy_linear_mod(X_train, X_test, y_train, y_test, type_model):
    if type_model == 0:
        log_reg = linear_model.LogisticRegression(tol=0.1, solver='saga')
    elif type_model == 1:
        log_reg = linear_model.LogisticRegression(tol=0.1, solver='saga', penalty='l1')
    else:
        log_reg = linear_model.LogisticRegression(tol=0.1, solver='saga', penalty='l2')
        
        # saga: Stochastic Average Gradient descent 
        # tol: Tolerance for stopping criteria
        
    # Train the model
    log_reg.fit(X_train, y_train)
    
    # Make prediction
    y_pred = log_reg.predict(X_test)
    #print("y_pred: \n", y_pred)
    
    print("\nCoefficients: \n", log_reg.coef_)
    print("RMSE: %.2f" % mean_squared_error(y_pred, y_test, squared=False))
    print("Explained variance score: %.2f" % r2_score(y_test, y_pred))
    
    acc = accuracy_score(y_pred, y_test)
    print("Accuracy score: ", acc)
    
    auc = roc_auc_score(y_pred, y_test, average=None)
    print("AUC score: ", auc)
    
    corr_mat = confusion_matrix(y_pred, y_test)
    print("Confusion matrix: \n", corr_mat)
    
    #Rather than passing the class predictions (y_pred), to pass the probabilities
    y_pred_ = log_reg.predict_proba(X_test)[:, -1]
    
    # Otherwise
    #y_pred_ = log_reg.decision_function(X_test)
    
    lr_fpr, lr_tpr, _ = roc_curve(y_test, y_pred_)
    
    # Plot the ROC curve for the method 1
    plt.plot(lr_fpr, lr_tpr, marker='.', label='Logistic-model')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.savefig('figures/'+str(type_model)+'plot.png')
    plt.clf()
    
    # Cross validation
    cv_results = cross_validate(log_reg, X_train, y_train, cv=10)
    print("10-fold Cross Validation: ", cv_results)
    
    return auc, acc

def main():
    max_expruns = 3
    accbase_all = np.zeros(max_expruns)
    accl1_all = np.zeros(max_expruns)
    accl2_all = np.zeros(max_expruns)
    
    for run_num in range(max_expruns):
        X_train, X_test, y_train, y_test = read_data(run_num)
        auc_base, acc_base = scipy_linear_mod(X_train, X_test, y_train, y_test, 0)
        auc_l1, acc_l1 = scipy_linear_mod(X_train, X_test, y_train, y_test, 1)
        auc_l2, acc_l2 = scipy_linear_mod(X_train, X_test, y_train, y_test, 2)
        
        accbase_all[run_num] = acc_base
        accl1_all[run_num] = acc_l1
        accl2_all[run_num] = acc_l2
        
        print('-- run num --', run_num)
        print('acc_base, acc_l1, acc_l2', acc_base, acc_l1, acc_l2)
    
    print('\naccbase_all', accbase_all)
    print('mean accbase_all', np.mean(accbase_all))
    print('std accbase_all', np.std(accbase_all))
    
    print('accl1_all', accl1_all)
    print('mean accl1_all', np.mean(accl1_all))
    print('std accl1_all', np.std(accl1_all))
    

if __name__ == '__main__':
    main()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Coefficients: 
 [[ 6.79917033e-04  5.42756385e-03 -1.39193074e-02 -3.33983272e-03
   1.27651284e-03 -2.40839037e-03 -3.86565901e-06 -1.70212256e-03]]
RMSE: 0.58
Explained variance score: -0.50
Accuracy score:  0.6655844155844156
AUC score:  0.5917266187050358
Confusion matrix: 
 [[190  88]
 [ 15  15]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00099754, 0.00099754, 0.00199485, 0.00099707, 0.00099707,
       0.0009973 , 0.0009973 , 0.0009973 , 0.0009973 , 0.00099778]), 'score_time': array([0.        , 0.        , 0.        , 0.00099707, 0.        ,
       0.00099707, 0.        , 0.00099754, 0.        , 0.00099683]), 'test_score': array([0.69565217, 0.58695652, 0.65217391, 0.67391304, 0.65217391,
       0.65217391, 0.58695652, 0.69565217, 0.65217391, 0.60869565])}

Coefficients: 
 [[ 1.35227026e-03  5.73784183e-03 -1.50475737e-02 -2.76312252e-03
   1.02830280e-03 -2.11197465e-03  1.97151380e-05 -4.80943746e-04]]
RMSE: 0.58
Explained variance score: -0.49
Accuracy score:  0.6688311688311688
AUC score:  0.6010248049376965
Confusion matrix: 
 [[190  87]
 [ 15  16]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00199485, 0.00099707, 0.00099635, 0.00099707, 0.00199485,
       0.00099754, 0.00099707, 0.00099754, 0.0009973 , 0.00099754]), 'score_time': array([0.00099778, 0.00099778, 0.00099754, 0.        , 0.        ,
       0.00099707, 0.        , 0.0009973 , 0.        , 0.00099707]), 'test_score': array([0.69565217, 0.58695652, 0.65217391, 0.67391304, 0.65217391,
       0.67391304, 0.56521739, 0.7173913 , 0.65217391, 0.60869565])}

Coefficients: 
 [[ 1.28120385e-03  5.81970918e-03 -1.45569965e-02 -3.35070371e-03
   1.16422823e-03 -2.80971953e-03  2.11463993e-05 -1.08220813e-03]]
RMSE: 0.58
Explained variance score: -0.49
Accuracy score:  0.6688311688311688
AUC score:  0.6010248049376965
Confusion matrix: 
 [[190  87]
 [ 15  16]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.0009973 , 0.00099707, 0.00099754, 0.00100493, 0.00099707,
       0.0009973 , 0.0009973 , 0.0009973 , 0.00099754, 0.00099754]), 'score_time': array([0.0009973 , 0.        , 0.        , 0.00099015, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ]), 'test_score': array([0.69565217, 0.58695652, 0.65217391, 0.67391304, 0.63043478,
       0.65217391, 0.58695652, 0.7173913 , 0.65217391, 0.60869565])}
-- run num -- 0
acc_base, acc_l1, acc_l2 0.6655844155844156 0.6688311688311688 0.6688311688311688


<IPython.core.display.Javascript object>


Coefficients: 
 [[ 1.04870712e-03  6.79338108e-03 -1.77931601e-02 -5.10790691e-03
   2.39562265e-03 -1.99627508e-03 -3.19358386e-05 -3.94781928e-05]]
RMSE: 0.63
Explained variance score: -0.73
Accuracy score:  0.6038961038961039
AUC score:  0.51699604743083
Confusion matrix: 
 [[165  88]
 [ 34  21]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00199294, 0.00099707, 0.00095367, 0.00104308, 0.00101757,
       0.00199413, 0.00099707, 0.00099945, 0.00095129, 0.        ]), 'score_time': array([0.        , 0.        , 0.        , 0.00097752, 0.        ,
       0.        , 0.        , 0.00099635, 0.        , 0.00100064]), 'test_score': array([0.65217391, 0.67391304, 0.58695652, 0.69565217, 0.73913043,
       0.65217391, 0.69565217, 0.76086957, 0.69565217, 0.69565217])}

Coefficients: 
 [[ 1.10059290e-03  7.63729291e-03 -2.03543362e-02 -4.50145009e-03
   2.02424617e-03 -2.18105668e-03 -1.23715608e-05 -8.99863956e-04]]
RMSE: 0.62
Explained variance score: -0.66
Accuracy score:  0.6201298701298701
AUC score:  0.5232943469785576
Confusion matrix: 
 [[176  94]
 [ 23  15]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00095725, 0.00099754, 0.0009973 , 0.00099921, 0.00099874,
       0.00195432, 0.0020051 , 0.00098228, 0.00099754, 0.00200176]), 'score_time': array([0.        , 0.        , 0.00099421, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ]), 'test_score': array([0.60869565, 0.67391304, 0.58695652, 0.69565217, 0.73913043,
       0.65217391, 0.69565217, 0.76086957, 0.69565217, 0.69565217])}

Coefficients: 
 [[ 1.89456382e-03  7.96641823e-03 -2.09061795e-02 -4.93749413e-03
   2.16358245e-03 -2.53959431e-03 -6.44556025e-05 -3.66811001e-04]]
RMSE: 0.63
Explained variance score: -0.73
Accuracy score:  0.6038961038961039
AUC score:  0.5009716941275877
Confusion matrix: 
 [[170  93]
 [ 29  16]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00095177, 0.00204468, 0.00099301, 0.00099874, 0.00099683,
       0.00099683, 0.00100088, 0.00096989, 0.00099897, 0.00099587]), 'score_time': array([0.        , 0.        , 0.        , 0.00099659, 0.        ,
       0.00100064, 0.        , 0.00101829, 0.        , 0.        ]), 'test_score': array([0.63043478, 0.67391304, 0.58695652, 0.69565217, 0.73913043,
       0.65217391, 0.69565217, 0.80434783, 0.69565217, 0.69565217])}
-- run num -- 1
acc_base, acc_l1, acc_l2 0.6038961038961039 0.6201298701298701 0.6038961038961039


<IPython.core.display.Javascript object>


Coefficients: 
 [[ 2.93651687e-03  7.38741019e-03 -1.79937126e-02 -3.72163719e-03
   1.25788933e-03 -3.69470682e-03  5.40436174e-05 -9.54720032e-04]]
RMSE: 0.58
Explained variance score: -0.50
Accuracy score:  0.6655844155844156
AUC score:  0.5910714285714287
Confusion matrix: 
 [[191  89]
 [ 14  14]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00199008, 0.00100255, 0.00099635, 0.00099707, 0.00099659,
       0.00099826, 0.00096035, 0.0009973 , 0.00099683, 0.00099587]), 'score_time': array([0.        , 0.00099301, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ]), 'test_score': array([0.63043478, 0.67391304, 0.65217391, 0.63043478, 0.65217391,
       0.63043478, 0.69565217, 0.67391304, 0.67391304, 0.65217391])}

Coefficients: 
 [[ 2.60061810e-03  6.43233138e-03 -1.73165699e-02 -3.61658270e-03
   1.28379160e-03 -3.01959579e-03  5.33846987e-05  4.95198260e-04]]
RMSE: 0.58
Explained variance score: -0.52
Accuracy score:  0.6623376623376623
AUC score:  0.5805983919862923
Confusion matrix: 
 [[191  90]
 [ 14  13]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.0029664 , 0.00199318, 0.00199604, 0.00199437, 0.00199914,
       0.00099683, 0.00099683, 0.00099802, 0.00099683, 0.00197244]), 'score_time': array([0.        , 0.00099683, 0.00099683, 0.0009973 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ]), 'test_score': array([0.65217391, 0.67391304, 0.63043478, 0.63043478, 0.67391304,
       0.65217391, 0.69565217, 0.65217391, 0.67391304, 0.69565217])}

Coefficients: 
 [[ 3.03421517e-03  7.64060682e-03 -1.80074436e-02 -4.17024968e-03
   1.40469712e-03 -3.75544933e-03  3.73167877e-05 -6.34191272e-04]]
RMSE: 0.58
Explained variance score: -0.53
Accuracy score:  0.6590909090909091
AUC score:  0.5749547101449275
Confusion matrix: 
 [[188  88]
 [ 17  15]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

10-fold Cross Validation:  {'fit_time': array([0.00095725, 0.00103664, 0.00099611, 0.00102329, 0.0009973 ,
       0.00097561, 0.00102115, 0.00099444, 0.00099826, 0.00099754]), 'score_time': array([0.        , 0.00095892, 0.00099754, 0.        , 0.        ,
       0.        , 0.00099444, 0.        , 0.00099611, 0.        ]), 'test_score': array([0.63043478, 0.67391304, 0.63043478, 0.63043478, 0.65217391,
       0.63043478, 0.69565217, 0.67391304, 0.67391304, 0.65217391])}
-- run num -- 2
acc_base, acc_l1, acc_l2 0.6655844155844156 0.6623376623376623 0.6590909090909091

accbase_all [0.66558442 0.6038961  0.66558442]


<IPython.core.display.Javascript object>

mean accbase_all 0.6450216450216452


<IPython.core.display.Javascript object>

std accbase_all 0.029080149009836367
accl1_all [0.66883117 0.62012987 0.66233766]


<IPython.core.display.Javascript object>

mean accl1_all 0.6504329004329005


<IPython.core.display.Javascript object>

std accl1_all 0.021590841280584423


<Figure size 432x288 with 0 Axes>