In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score, log_loss, accuracy_score

# Functions

In [7]:
def load_dummy_data(dataset:str, binary:bool = False):
    data = pd.read_csv(f'data.nosync/networks_multi/{dataset}_set_files.csv')
    data['class'] = data['file'].apply(lambda x: x.split('/')[-1].split('_')[3])
    if binary:
        data['class'] = data['class'].replace({'ADHD': 'Non-TD',
                                               'ASD-ADHD': 'Non-TD',
                                               'ASD': 'Non-TD'})
    return data['class'].to_list()

def train_dummy(train_set:list, strategy:str):
    dummy_clf = DummyClassifier(strategy=strategy, random_state= 42)
    dummy_clf.fit(train_set, train_set)
    return dummy_clf

def train_get_stats(binary, strategy):
    train_data = load_dummy_data(dataset = 'train', binary = binary)
    val_data = load_dummy_data(dataset = 'val', binary = binary)
    test_data = load_dummy_data(dataset = 'test', binary = binary)

    clf = train_dummy(train_set = train_data, strategy = strategy)
    
    predicted_y_train = clf.predict(train_data)
    prob_y_train = clf.predict_proba(train_data)
    print(f"Train - Loss {round(log_loss(train_data, prob_y_train), 2)}, Accuracy: {round(accuracy_score(predicted_y_train, train_data)*100,2)}, F1-score: {round(f1_score(predicted_y_train, train_data, average='macro'), 2)}")

    predicted_y_val = clf.predict(val_data)
    prob_y_val = clf.predict_proba(val_data)
    print(f"Valdiation - Loss {round(log_loss(val_data, prob_y_val), 2)}, Accuracy: {round(accuracy_score(predicted_y_val, val_data)*100,2)}, F1-score: {round(f1_score(predicted_y_val, val_data, average='macro'), 2)}")

    predicted_y_test = clf.predict(test_data)
    prob_y_test = clf.predict_proba(test_data)
    print(f"Test - Loss {round(log_loss(test_data, prob_y_test), 2)}, Accuracy: {round(accuracy_score(predicted_y_test, test_data)*100,2)}, F1-score: {round(f1_score(predicted_y_test, test_data, average='macro'), 2)}")


In [8]:
train_get_stats(binary = False, strategy = 'uniform')

Train - Loss 1.39, Accuracy: 25.4, F1-score: 0.21
Valdiation - Loss 1.39, Accuracy: 19.75, F1-score: 0.18
Test - Loss 1.39, Accuracy: 19.48, F1-score: 0.16


In [9]:
train_get_stats(binary = False, strategy = 'prior')

Train - Loss 1.13, Accuracy: 49.21, F1-score: 0.16
Valdiation - Loss 1.2, Accuracy: 39.51, F1-score: 0.14
Test - Loss 1.2, Accuracy: 41.56, F1-score: 0.15


In [10]:
train_get_stats(binary = True, strategy = 'uniform')

Train - Loss 0.69, Accuracy: 48.53, F1-score: 0.49
Valdiation - Loss 0.69, Accuracy: 45.68, F1-score: 0.45
Test - Loss 0.69, Accuracy: 53.25, F1-score: 0.53


In [11]:
train_get_stats(binary = True, strategy = 'prior')

Train - Loss 0.69, Accuracy: 50.79, F1-score: 0.34
Valdiation - Loss 0.69, Accuracy: 60.49, F1-score: 0.38
Test - Loss 0.69, Accuracy: 58.44, F1-score: 0.37
