In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Adam import AdamOptim
from SGD import SGD
from utils import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from Logreg import LogisticRegression
from irls_optimizer import IRLS
from sklearn.preprocessing import LabelEncoder
np.seterr(divide = 'ignore') 
np.seterr(invalid='ignore')
np.seterr(over ='ignore')
from sklearn.preprocessing import StandardScaler
from scipy.io import arff

# Biodeg

## Balanced accuracy

In [2]:
arff_file = arff.loadarff('data/big/biodeg.arff')
df = pd.DataFrame(arff_file[0])

FileNotFoundError: [Errno 2] No such file or directory: 'data/big/biodeg.arff'

In [None]:
y = df.iloc[:,-1].astype(int).values -1
X = df.iloc[:,:-1].values

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

balancedAdam = []
balancedSGD = []
balancedIRLS = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xval = scaler.transform(Xval)
    Xtest = scaler.transform(Xtest)

    
    logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
    logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
    logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
    logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)
    
    predAdam = logAdam.predict(Xtest)
    predSGD = logSGD.predict(Xtest)
    predIRLS = logIRLS.predict(Xtest)
    
    balancedAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
    balancedSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
    balancedIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    

    
biodegBalanced = {"adam": balancedAdam,
                    "sgd": balancedSGD,
                    "irls": balancedIRLS}

In [None]:
biodegBalanced

# Convergence

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=2)
Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=2)

scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xval = scaler.transform(Xval)

logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)

biodegConvergence = {"adam": logAdam.get_params()[4],
                       "sgd": logSGD.get_params()[4],
                       "irls": logIRLS.get_params()[4]}

In [None]:
logAdam.plot_loss()

In [None]:
logSGD.plot_loss()

In [None]:
logIRLS.plot_loss()

In [None]:
for key, value in biodegConvergence.items():
    np.save(f"results/loss/{key}/biodeg", np.array(value))

## Comparison with other models

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

# compAdam = []
# compSGD = []
# compIRLS = []
compLDA = []
compQDA = []
compTree = []
compForest = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xtest = scaler.transform(Xtest)

    
#     logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
#     logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
#     logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
#     logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=32, patience=5)
    
#     predAdam = logAdam.predict(Xtest)
#     predSGD = logSGD.predict(Xtest)
#     predIRLS = logIRLS.predict(Xtest)
    
#     compAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
#     compSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
#     compIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    
    lda, qda, tree, forest = fitComparisonModels(Xtrain, ytrain, Xtest)
    
    compLDA.append(balanced_accuracy_score(lda, ytest))
    compQDA.append(balanced_accuracy_score(qda, ytest))
    compTree.append(balanced_accuracy_score(tree, ytest))
    compForest.append(balanced_accuracy_score(forest, ytest))
    
    
biodegComp = { "lda": compLDA,
                "qda": compQDA,
                "dt": compTree,
                "rf": compForest}
    


In [None]:
biodegComp

# Parkinson

## Balanced accuracy

In [None]:
parkinson = pd.read_csv("data/big/parkinsons.csv")
y = parkinson["status"].values
X = parkinson.drop(['name', 'status'], axis=1).values

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

balancedAdam = []
balancedSGD = []
balancedIRLS = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xval = scaler.transform(Xval)
    Xtest = scaler.transform(Xtest)

    
    logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
    logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
    logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
    logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)
    
    predAdam = logAdam.predict(Xtest)
    predSGD = logSGD.predict(Xtest)
    predIRLS = logIRLS.predict(Xtest)
    
    balancedAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
    balancedSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
    balancedIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    

    
parkinsonBalanced = {"adam": balancedAdam,
                    "sgd": balancedSGD,
                    "irls": balancedIRLS}

In [None]:
parkinsonBalanced = {"adam": balancedAdam,
                    "sgd": balancedSGD,
                    "irls": balancedIRLS}

## Convergence

In [None]:
Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=1)
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xval = scaler.transform(Xval)

logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)

In [None]:
logAdam.plot_loss()

In [None]:
logSGD.plot_loss()

In [None]:
logIRLS.plot_loss()

In [None]:
parkinsonConvergence = {"adam": logAdam.get_params()[4],
                       "sgd": logSGD.get_params()[4],
                       "irls": logIRLS.get_params()[4]}

In [None]:
for key, value in parkinsonConvergence.items():
    np.save(f"results/loss/{key}/parkinson", np.array(value))

## Comparison with other models

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

# compAdam = []
# compSGD = []
# compIRLS = []
compLDA = []
compQDA = []
compTree = []
compForest = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xtest = scaler.transform(Xtest)

    
#     logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
#     logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
#     logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
#     logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=32, patience=5)
    
#     predAdam = logAdam.predict(Xtest)
#     predSGD = logSGD.predict(Xtest)
#     predIRLS = logIRLS.predict(Xtest)
    
#     compAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
#     compSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
#     compIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    
    lda, qda, tree, forest = fitComparisonModels(Xtrain, ytrain, Xtest)
    
    compLDA.append(balanced_accuracy_score(lda, ytest))
    compQDA.append(balanced_accuracy_score(qda, ytest))
    compTree.append(balanced_accuracy_score(tree, ytest))
    compForest.append(balanced_accuracy_score(forest, ytest))
    
    
parkinsonComp = {#"adam": compAdam,
                #"sgd": compSGD,
                #"irls": compIRLS,
                "lda": compLDA,
                "qda": compQDA,
                "dt": compTree,
                "rf": compForest}
    


In [None]:
parkinsonComp

# Diabetes

## Balanced accuracy

In [49]:
diabetes = pd.read_csv("data/small/diabetes.csv")
y = diabetes["Outcome"].values
X = diabetes.iloc[:, :-1].values

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

balancedAdam = []
balancedSGD = []
balancedIRLS = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xval = scaler.transform(Xval)
    Xtest = scaler.transform(Xtest)

    
    logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
    logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
    logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
    logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)
    
    predAdam = logAdam.predict(Xtest)
    predSGD = logSGD.predict(Xtest)
    predIRLS = logIRLS.predict(Xtest)
    
    balancedAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
    balancedSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
    balancedIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))

diabetesBalanced = {"adam": balancedAdam,
                    "sgd": balancedSGD,
                    "irls": balancedIRLS}

In [None]:
print("adam:", balancedAdam)
print("sgd:", balancedSGD)
print("IRLS:", balancedIRLS)

## Convergence

In [None]:
Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=1)
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xval = scaler.transform(Xval)

logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)

In [None]:
logAdam.plot_loss()

In [None]:
logSGD.plot_loss()

In [None]:
logIRLS.plot_loss()

In [None]:
diabetesConvergence = {"adam": logAdam.get_params()[4],
                       "sgd": logSGD.get_params()[4],
                       "irls": logIRLS.get_params()[4]}

In [None]:
for key, value in diabetesConvergence.items():
    np.save(f"results/loss/{key}/diabetes", np.array(value))

## Comparison with othe models

In [None]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

# compAdam = []
# compSGD = []
# compIRLS = []
compLDA = []
compQDA = []
compTree = []
compForest = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xtest = scaler.transform(Xtest)

    
#     logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
#     logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
#     logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
#     logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=32, patience=5)
    
#     predAdam = logAdam.predict(Xtest)
#     predSGD = logSGD.predict(Xtest)
#     predIRLS = logIRLS.predict(Xtest)
    
#     compAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
#     compSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
#     compIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    
    lda, qda, tree, forest = fitComparisonModels(Xtrain, ytrain, Xtest)
    
    compLDA.append(balanced_accuracy_score(lda, ytest))
    compQDA.append(balanced_accuracy_score(qda, ytest))
    compTree.append(balanced_accuracy_score(tree, ytest))
    compForest.append(balanced_accuracy_score(forest, ytest))
    
    
diabetesComp = {#"adam": compAdam,
                #"sgd": compSGD,
                #"irls": compIRLS,
                "lda": compLDA,
                "qda": compQDA,
                "dt": compTree,
                "rf": compForest}
    


In [None]:
diabetesComp

## Interactions

In [50]:
Xint = createFeatureInteractions(X)

In [None]:
splitting_seeds = [42, 43, 44, 45, 46]

intAdam = []
intSGD = []
intIRLS = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(Xint, y, test_size=0.2, random_state=seed)
    Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xval = scaler.transform(Xval)
    Xtest = scaler.transform(Xtest)

    
    logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
    logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
    logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
    logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, X_val=Xval, y_val=yval, patience=5)
    logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=Xtrain.shape[0], X_val=Xval, y_val=yval, patience=5)
    
    predAdam = logAdam.predict(Xtest)
    predSGD = logSGD.predict(Xtest)
    predIRLS = logIRLS.predict(Xtest)
    
    intAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
    intSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
    intIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    

    
diabetesInt = {"adam": intAdam,
                    "sgd": intSGD,
                    "irls": intIRLS}

In [51]:
#All
splitting_seeds = [42, 43, 44, 45, 46]

# compAdam = []
# compSGD = []
# compIRLS = []
compLDA = []
compQDA = []
compTree = []
compForest = []

for seed in splitting_seeds:
    Xtrain, Xtest, ytrain, ytest = train_test_split(Xint, y, test_size=0.2, random_state=seed)
    
    scaler = StandardScaler()
    Xtrain = scaler.fit_transform(Xtrain)
    Xtest = scaler.transform(Xtest)

    
#     logAdam = LogisticRegression(input_dim=Xtrain.shape[1])
#     logSGD = LogisticRegression(input_dim=Xtrain.shape[1])
#     logIRLS = LogisticRegression(input_dim=Xtrain.shape[1])
#     logAdam.train(Xtrain, ytrain, optimizer=AdamOptim(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logSGD.train(Xtrain, ytrain, optimizer=SGD(eta=0.01), epochs=500, batch_size=32, patience=5)
#     logIRLS.train(Xtrain, ytrain, optimizer=IRLS(), epochs=500, batch_size=32, patience=5)
    
#     predAdam = logAdam.predict(Xtest)
#     predSGD = logSGD.predict(Xtest)
#     predIRLS = logIRLS.predict(Xtest)
    
#     compAdam.append(balanced_accuracy_score(ytest,predAdam.round()))
#     compSGD.append(balanced_accuracy_score(ytest,predSGD.round()))
#     compIRLS.append(balanced_accuracy_score(ytest,predIRLS.round()))
    
    lda, qda, tree, forest = fitComparisonModels(Xtrain, ytrain, Xtest)
    
    compLDA.append(balanced_accuracy_score(lda, ytest))
    compQDA.append(balanced_accuracy_score(qda, ytest))
    compTree.append(balanced_accuracy_score(tree, ytest))
    compForest.append(balanced_accuracy_score(forest, ytest))
    
    
diabetesIntComp = {#"adam": compAdam,
                #"sgd": compSGD,
                #"irls": compIRLS,
                "lda": compLDA,
                "qda": compQDA,
                "dt": compTree,
                "rf": compForest}
    


In [52]:
diabetesIntComp

{'lda': [0.7974065598779558,
  0.8021101705312231,
  0.7983511285308825,
  0.7596558496212475,
  0.7927899686520377],
 'qda': [0.823069403714565,
  0.817120236102046,
  0.7819449927622055,
  0.7658904579319804,
  0.7953333956794164],
 'dt': [0.9784894194832083,
  0.9796115477230029,
  0.9605615161499254,
  0.9622908186341023,
  0.9582816505416356],
 'rf': [0.9892447097416042,
  0.9893333333333334,
  0.9752710317800852,
  0.9719728425541465,
  0.9757041858377287]}

In [None]:
diabetesIntConvergence = {"adam": logAdam.get_params()[4],
                       "sgd": logSGD.get_params()[4],
                       "irls": logIRLS.get_params()[4]}

In [None]:
for key, value in diabetesIntConvergence.items():
    np.save(f"results/loss/{key}/diabetesInt", np.array(value))

# wraping up the results

In [None]:
balancedAccuracy = pd.DataFrame(columns=["data_set", "optimizer", "seed_42_acc", "seed_43_acc", "seed_44_acc", "seed_45_acc", "seed_46_acc"])

In [None]:
balanced = {"biodeg": biodegBalanced,
           "parkinson": parkinsonBalanced,
           "diabetes": diabetesBalanced}
for dataSetName, value in balanced.items():
    for opt, acc in value.items():
        balancedAccuracy.loc[balancedAccuracy.shape[0]] = [dataSetName+"_data", opt] + acc

In [None]:
comp = {"biodeg": biodegComp,
           "parkinson": parkinsonComp,
           "diabetes": diabetesComp}
for dataSetName, value in comp.items():
    for opt, acc in value.items():
        balancedAccuracy.loc[balancedAccuracy.shape[0]] = [dataSetName+"_data", opt] + acc

In [None]:
for opt, acc in diabetesInt.items():
    balancedAccuracy.loc[balancedAccuracy.shape[0]] = ["diabetes_data", opt+"+int"] + acc

In [55]:
balancedAccuracy

Unnamed: 0,data_set,optimizer,seed_42_acc,seed_43_acc,seed_44_acc,seed_45_acc,seed_46_acc
0,biodeg_data,adam,0.853137,0.846035,0.855349,0.873536,0.86049
1,biodeg_data,sgd,0.864167,0.860651,0.862977,0.860234,0.849461
2,biodeg_data,irls,0.853824,0.83142,0.808837,0.881179,0.850147
3,parkinson_data,adam,0.667411,0.8125,0.757143,0.824286,0.748276
4,parkinson_data,sgd,0.683036,0.875,0.797143,0.852857,0.681034
5,parkinson_data,irls,0.738839,0.96875,0.852857,0.872857,0.763793
6,diabetes_data,adam,0.764069,0.746,0.728917,0.719246,0.737562
7,diabetes_data,sgd,0.747614,0.742667,0.719516,0.719246,0.73564
8,diabetes_data,irls,0.73741,0.746,0.728917,0.719246,0.733774
9,biodeg_data,lda,0.837645,0.874608,0.84372,0.854879,0.844853


In [56]:
balancedAccuracy.to_csv("results/balance_accuracy_3.csv", index=False)

In [54]:
balancedAccuracy = pd.read_csv("results/balance_accuracy_3.csv")
for opt, acc in diabetesIntComp.items():
        balancedAccuracy.loc[balancedAccuracy.shape[0]] = ["daibetes_data", opt+"+int"] + acc

# Plots

## Loss

In [57]:
import ast
with open(file_path, 'r') as file:
    # Read each line using a loop
    skip_first_line = True
    
    for line in file:
        if skip_first_line:
            skip_first_line = False
            continue      
        parts = line.strip().split(',')
        lista = ','.join(parts[2:])
        lista = lista.strip()
        loss = np.array(ast.literal_eval(lista))
        opt = parts[1].strip()
        dataSet = parts[0].strip()
        try:
            np.save(f"results/loss/{opt}/{dataSet}", loss)
        except:
            dataSet += "Int"
            opt = opt.split("+")[0]
            np.save(f"results/loss/{opt}/{dataSet}", loss)

In [42]:
lista

'[0.4256747085092538, 0.3350668882183697, 0.28988213888136743, 0.2626156594560849, 0.24427071012890195, 0.23103322849234267, 0.22100356810845903, 0.21312601713233456, 0.20676539907141644, 0.20151583998247943, 0.19710543634125377, 0.19334495735245408, 0.1900985066089237, 0.18726588551065, 0.18477154169144253, 0.1825573960798763, 0.1805780448580153, 0.17879746494625282, 0.17718669942080686, 0.1757221981217846, 0.17438460634435426, 0.17315786621708842, 0.17202854027180003, 0.17098529550327626, 0.1700185050852104, 0.16911993751609802, 0.16828251154429708, 0.1675001011499615, 0.16676737902179112, 0.16607968992634436, 0.16543294750035412, 0.16482354955142736, 0.1642483080987089, 0.16370439123866987, 0.16318927456294702, 0.16270070034200923, 0.1622366430608729, 0.16179528018026268, 0.1613749672197148, 0.1609742164336437, 0.1605916784888344, 0.1602261266607525, 0.15987644315290142, 0.15954160721307828, 0.1592206847764996, 0.15891281941125648, 0.15861722437856018, 0.15833317565055235, 0.1580600