In [39]:
import multiprocessing
import time
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
import sys
sys.path.append("../")
import abus_classification

In [40]:
dataset = abus_classification.datasets.TDSCTumors(path="../data/tdsc")    

In [41]:
def normalize_(x:np, interval=(0,1)):
    x_std = (x - x.min())/x.max()-x.min()
    mi, ma = interval
    return x_std*(ma-mi) + mi

def normalize(x):
    for i in range(100):
        x[i] = normalize_(x[i])
    return x

In [42]:
def classify_with_svm(x, y):
    
    acc = 0
    train_acc = 0
    cfm = [[0,0],
           [0,0]]

    for i in range(100):
        x_test, y_test = x[i], y[i]
        X_train, Y_train = x[:i] + x[i+1:], y[:i] + y[i+1:]
        clf = SVC()
        clf.fit(X_train, Y_train)
        P = clf.predict(X_train)
        train_acc += accuracy_score(Y_train, P)
        res = clf.predict([x_test])[0]
        if res == y_test:
            acc += 1
            
        cfm[y_test][res] += 1
            
    print(f"Train accuracy: {train_acc/100}")
    print(f"Accuracy: {acc/100}")
    print(f"{cfm[0]}\n{cfm[1]}")

In [43]:
alpha = 1
beta = 1

def extract_signature(data):
    x, y = data
    sig = abus_classification.utils.features.boundary_signature_3d(x, resolution=(alpha,beta))
    return sig, y

s = time.time()
_, x, y = dataset[0]
print(time.time() - s)
extract_signature((x, y))
print(time.time()-s)

1.710676908493042
3.047036647796631


In [45]:
num_processes = 10

for a in range(0,20,5):
    alpha = a+1
    signature_dataset = []
    for b in range(0,20,5):
        beta = b+1
    loop = tqdm(range(0, 100, num_processes))

    for _ in loop:
        loop.set_postfix(processing=f"{i} to {i+num_processes}")
        data_chunk = []
        for j in range(i, i+num_processes):
            _, x, y = dataset[j]
            data_chunk.append((x,y))
        with multiprocessing.Pool(processes=num_processes) as pool:
            signature_dataset.append(pool.map(extract_signature, data_chunk))
    
    ds = [signature_dataset[i][j] for j in range(10) for i in range(10)]
    X, Y = zip(*ds)
    X = list(X)
    Y = list(Y)
    X = [x.flatten() for x in X]
    print(f"Results with resolution ({alpha}, {beta})")
    print("Not normalized")
    classify_with_svm(X,Y)
    print("Normalized")
    X = normalize(X)
    classify_with_svm(X,Y)

  0%|          | 0/10 [00:00<?, ?it/s]

Results with resolution (1, 16)
Not normalized
Train accuracy: 0.8040404040404038
Accuracy: 0.66
[44, 14]
[20, 22]
Normalized
Train accuracy: 0.8675757575757573
Accuracy: 0.61
[50, 8]
[31, 11]


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
classify_with_svm(X,Y)
X = normalize(X)
classify_with_svm(X,Y)

Train accuracy: 0.7139393939393939
Accuracy: 0.69
[45, 13]
[18, 24]
Train accuracy: 0.7110101010101009
Accuracy: 0.68
[54, 4]
[28, 14]
