In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score, log_loss

# Functions

In [7]:
def load_dummy_data(dataset:str, binary:bool = False):
    data = pd.read_csv(f'data.nosync/networks_multi/{dataset}_set_files.csv')
    data['class'] = data['file'].apply(lambda x: x.split('/')[-1].split('_')[3])
    if binary:
        data['class'] = data['class'].replace({'ADHD': 'Non-TD',
                                               'ASD-ADHD': 'Non-TD',
                                               'ASD': 'Non-TD'})
    return data['class'].to_list()

def train_dummy(train_set:list, strategy:str):
    dummy_clf = DummyClassifier(strategy=strategy, random_state= 42)
    dummy_clf.fit(train_set, train_set)
    return dummy_clf

def train_get_stats(binary, strategy):
    train_data = load_dummy_data(dataset = 'train', binary = binary)
    val_data = load_dummy_data(dataset = 'val', binary = binary)
    test_data = load_dummy_data(dataset = 'test', binary = binary)

    clf = train_dummy(train_set = train_data, strategy = strategy)
    
    predicted_y_train = clf.predict(train_data)
    prob_y_train = clf.predict_proba(train_data)
    print(f"Train - Loss {round(log_loss(train_data, prob_y_train), 2)}, F1-score: {round(f1_score(predicted_y_train, train_data, average='micro'), 2)}")

    predicted_y_val = clf.predict(val_data)
    prob_y_val = clf.predict_proba(val_data)
    print(f"Valdiation - Loss {round(log_loss(val_data, prob_y_val), 2)}, F1-score: {round(f1_score(predicted_y_val, val_data, average='micro'), 2)}")

    predicted_y_test = clf.predict(test_data)
    prob_y_test = clf.predict_proba(test_data)
    print(f"Test - Loss {round(log_loss(test_data, prob_y_test), 2)}, F1-score: {round(f1_score(predicted_y_test, test_data, average='micro'), 2)}")


In [8]:
train_get_stats(binary = False, strategy = 'uniform')

Train - Loss 1.39, F1-score: 0.25
Valdiation - Loss 1.39, F1-score: 0.2
Test - Loss 1.39, F1-score: 0.19


In [9]:
train_get_stats(binary = False, strategy = 'prior')

Train - Loss 1.13, F1-score: 0.49
Valdiation - Loss 1.2, F1-score: 0.4
Test - Loss 1.2, F1-score: 0.42


In [12]:
train_get_stats(binary = True, strategy = 'uniform')

Train - Loss 0.69, F1-score: 0.49
Valdiation - Loss 0.69, F1-score: 0.46
Test - Loss 0.69, F1-score: 0.53


In [13]:
train_get_stats(binary = True, strategy = 'prior')

Train - Loss 0.69, F1-score: 0.51
Valdiation - Loss 0.69, F1-score: 0.6
Test - Loss 0.69, F1-score: 0.58
