In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from joblib import Parallel, delayed


In [4]:
def get_data(n_class, perc):
    file_name = f"n_class_{n_class}_maj_perc_0_{perc}.txt"
    train_dir = f"../expert_load_train_ImageNetData_enc"
    test_dir = f"../expert_load_test_ImageNetData_enc"
    train_x, train_y = [], []

    with open(f"{train_dir}/{file_name}", "r") as f:
        for line in f:
            x_y = line.strip().split(",")
            if x_y[0] == 'l5e1':
                continue
            train_x.append(x_y[:16])
            train_y.append(x_y[16])

    test_x, test_y = [], []

    with open(f"{test_dir}/{file_name}", "r") as f:
        for line in f:
            x_y = line.strip().split(",")
            if x_y[0] == 'l5e1':
                continue
            test_x.append(x_y[:16])
            test_y.append(x_y[16])
    return train_x, train_y, test_x, test_y

In [10]:
def pred(n_class, perc):
    try:
        train_x, train_y, test_x, test_y = get_data(n_class, perc)
    except:
        return -1
    N = len(train_x) + len(test_x)
    train_x.extend(test_x)
    train_y.extend(test_y)
    train_x, test_x = train_test_split(train_x, test_size=N//5, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=N//5, random_state=42)

    train_x = np.array(train_x).astype(float)
    train_y = np.array(train_y).astype(int)
    test_x = np.array(test_x).astype(float)
    test_y = np.array(test_y).astype(int)

    clf = SVC(kernel='linear')
    clf.fit(train_x, train_y)
    pred = clf.predict(test_x)
    acc = np.mean(pred == test_y)
    return (n_class, perc, acc)

In [6]:
results = Parallel(n_jobs = 12)(delayed(get_acc)(n_class, perc) for perc in [25, 375, 5, 625, 875] for n_class in [2,4,6,8,16,32,64])


NameError: name 'get_acc' is not defined

In [39]:
for n_class, perc, accuracy_score in results:
    if n_class != 64:
        continue
    print(f"{n_class} {perc} {accuracy_score}")

64 25 0.35325820676139147
64 375 0.6380189366351056
64 5 0.8210831721470019
64 625 0.9149453219927096
64 875 0.9862542955326461


In [2]:
n_class = 64
def collapsed_acc(n_class, perc):
    train_x, train_y, test_x, test_y = get_data(n_class, perc)

    train_x = np.array(train_x).astype(float)
    train_y = np.array(train_y).astype(int)
    test_x = np.array(test_x).astype(float)
    test_y = np.array(test_y).astype(int)

    N = len(train_x) + len(test_x)

    for i in range(len(train_x)):
        for k in range(8):
            train_x[i][k] += train_x[i][k+8]
    for i in range(len(test_x)):
        for k in range(8):
            test_x[i][k] += test_x[i][k+8]
    train_x = train_x[:, :8]
    test_x = test_x[:, :8]
    
    train_x, test_x = train_test_split(train_x, test_size=N//5, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=N//5, random_state=42)
    # reshape and discard 

    clf = SVC(kernel='linear')
    clf = clf.fit(train_x, train_y)
    acc = clf.score(test_x, test_y)
    return acc

In [19]:
results = Parallel(n_jobs = 6)(delayed(collapsed_acc)(64, perc) for perc in [75])
results

[0.8695652173913043]

In [17]:
results_original = Parallel(n_jobs = 12)(delayed(pred)(n_class, perc) for perc in [75] for n_class in [2,4,6,8,16,32,64])
print(results_original)

[(2, 625, 0.96), (4, 625, 1.0), -1, (8, 625, 1.0), (16, 625, 0.9710144927536232), (32, 625, 0.9467312348668281), (64, 625, 0.9149453219927096)]


In [9]:
print(results)

[0.24448799608035277, 0.4537509104151493, 0.6537717601547389, 0.7885783718104495, 0.929553264604811]
