In [1]:
import numpy as np
import time
import warnings

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split

from src.data_loader.DataLoader import DataLoader
from src.model.LogisticRegression import LogisticRegression
from src.optim.ADAM import ADAM
from src.optim.IWLS import IWLS
from src.optim.SGD import SGD
from src.optim.conditions import NoLogLikOrMaxIterCondition

warnings.filterwarnings('ignore')

# Example of usage
dl = DataLoader(product=True)
sd = dl.get_supported_datasets()

rows = []
random_states = [420, 69, 42, 24, 96, 1312, 777]
for d in sd:
    print(d)
    x, y = dl[d]

    for random_state in random_states:
        np.random.seed(random_state)
        train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, stratify=y, random_state=random_state)
        print("ADAM")
        start = time.time()
        model = LogisticRegression()
        optim = ADAM(model, NoLogLikOrMaxIterCondition(500, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "ADAM", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("IWLS")
        start = time.time()
        model = LogisticRegression()
        optim = IWLS(model, NoLogLikOrMaxIterCondition(500, 5), delta=1e-10, lambda_=1e-10)
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "IWLS", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("SGD")
        start = time.time()
        model = LogisticRegression()
        optim = SGD(model, NoLogLikOrMaxIterCondition(500, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "SGD", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})


banknote
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kin8nm
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
phoneme
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
elevators
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
jm1
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kdd_JapaneseVowels
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-karhunen
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-zernike
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
pc1
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD


In [4]:
import pandas as pd

ddf = pd.DataFrame(rows)
ddf["product"] = 1
ddf[ddf["optim"]=="IWLS"]["accuracy"].mean()

0.7324109627466839

In [5]:
import pandas as pd

df = pd.DataFrame(rows)
df["product"] = 1
df

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-2.021797,0.868363,420,16,1
1,banknote,IWLS,-0.002952,0.866729,420,5,1
2,banknote,SGD,-1.054757,0.865631,420,16,1
3,banknote,ADAM,-2.063798,0.857434,69,26,1
4,banknote,IWLS,-0.002595,0.857434,69,6,1
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.001368,0.557153,1312,5,1
185,pc1,SGD,-0.150587,0.498387,1312,5,1
186,pc1,ADAM,-0.434421,0.583731,777,15,1
187,pc1,IWLS,-0.001435,0.560379,777,5,1


In [3]:
import numpy as np
import time
import warnings

from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import train_test_split

from src.data_loader.DataLoader import DataLoader
from src.model.LogisticRegression import LogisticRegression
from src.optim.ADAM import ADAM
from src.optim.IWLS import IWLS
from src.optim.SGD import SGD
from src.optim.conditions import NoLogLikOrMaxIterCondition

warnings.filterwarnings('ignore')

# Example of usage
dl = DataLoader(product=False)
sd = dl.get_supported_datasets()

rows = []
random_states = [420, 69, 42, 24, 96, 1312, 777]
for d in sd:
    print(d)
    x, y = dl[d]

    for random_state in random_states:
        np.random.seed(random_state)
        train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, stratify=y, random_state=random_state)
        print("ADAM")
        start = time.time()
        model = LogisticRegression()
        optim = ADAM(model, NoLogLikOrMaxIterCondition(500, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "ADAM", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("IWLS")
        start = time.time()
        model = LogisticRegression()
        optim = IWLS(model, NoLogLikOrMaxIterCondition(500, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "IWLS", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})

        print("SGD")
        start = time.time()
        model = LogisticRegression()
        optim = SGD(model, NoLogLikOrMaxIterCondition(500, 5))
        model = optim.optimize(train_x, train_y)
        end = time.time()
        rows.append(
            {"dataset": d, "optim": "SGD", "time": start - end, "accuracy": balanced_accuracy_score(test_y, model.predict(test_x)),
             "random_state": random_state, "iters": optim.stop_condition.epoch})


banknote
(1372, 2)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kin8nm
(8192, 8)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
phoneme
(5404, 5)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
elevators
(16599, 11)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
jm1
(10880, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
kdd_JapaneseVowels
(9961, 14)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-karhunen
(2000, 64)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
mfeat-zernike
(2000, 10)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
ADAM
IWLS
SGD
pc1
(1109, 4)
ADAM
IWLS
SGD
ADAM
IWLS
SGD
AD

In [4]:
import pandas as pd

ndf = pd.DataFrame(rows)
ndf["product"] = 0
ndf

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-1.071732,0.868912,420,28,0
1,banknote,IWLS,-0.001414,0.868912,420,8,0
2,banknote,SGD,-0.340031,0.865631,420,12,0
3,banknote,ADAM,-0.784716,0.858520,69,22,0
4,banknote,IWLS,-0.001125,0.854153,69,6,0
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.001167,0.561992,1312,7,0
185,pc1,SGD,-0.156167,0.500000,1312,7,0
186,pc1,ADAM,-0.274677,0.563604,777,9,0
187,pc1,IWLS,-0.001093,0.583731,777,6,0


In [5]:
total = pd.concat((df, ndf), axis=0)

In [6]:
total

Unnamed: 0,dataset,optim,time,accuracy,random_state,iters,product
0,banknote,ADAM,-1.766906,0.868363,420,16,1
1,banknote,IWLS,-0.002908,0.866729,420,5,1
2,banknote,SGD,-1.277053,0.865631,420,16,1
3,banknote,ADAM,-2.858903,0.857434,69,26,1
4,banknote,IWLS,-0.002416,0.857434,69,6,1
...,...,...,...,...,...,...,...
184,pc1,IWLS,-0.001167,0.561992,1312,7,0
185,pc1,SGD,-0.156167,0.500000,1312,7,0
186,pc1,ADAM,-0.274677,0.563604,777,9,0
187,pc1,IWLS,-0.001093,0.583731,777,6,0


In [7]:
total.groupby(["dataset", "optim", "product"])["accuracy"].mean()


dataset             optim  product
banknote            ADAM   0          0.863373
                           1          0.865009
                    IWLS   0          0.863373
                           1          0.864777
                    SGD    0          0.859780
                           1          0.859388
elevators           ADAM   0          0.634492
                           1          0.548180
                    IWLS   0          0.649665
                           1          0.649949
                    SGD    0          0.493347
                           1          0.494590
jm1                 ADAM   0          0.529331
                           1          0.538471
                    IWLS   0          0.519138
                           1          0.519311
                    SGD    0          0.549453
                           1          0.533879
kdd_JapaneseVowels  ADAM   0          0.907030
                           1          0.955611
                    IWLS 

In [8]:
total.to_csv("wyniki.csv")