In [1]:
import binary_common as common
import pandas as pd

In [2]:
disorder_labels = [
    "Addictive disorder",
    "Anxiety disorder",
    "Mood disorder",
    "Obsessive compulsive disorder",
    "Schizophrenia",
    "Trauma and stress related disorder",
]

binary_predictions = {}
optimal_thresholds = {}

In [3]:
for label in disorder_labels:
    print(f"\n####### {label} #######\n")
    optimal_thresholds[label] = common.binary_training(label)
    print('\n\n')


####### Addictive disorder #######

Fitting 10 folds for each of 10 candidates, totalling 100 fits

 Tuned SVM Model for Addictive disorder vs. Healthy control
 Best Parameters: {'C': 0.1, 'kernel': 'rbf'}
 Accuracy: 0.4854
 AUC Score: 0.4996
🔹 Optimal Decision Threshold: 0.1980
[0 1 0 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1
 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 0 1 0
 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 0 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1
 0 0 1 0 0 0 1 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 0]




####### Anxiety disorder #######

Fitting 10 folds for each of 10 candidates, totalling 100 fits

 Tuned SVM Model for Anxiety disorder vs. Healthy control
 Best Parameters: {'C': 1, 'kernel': 'linear'}
 Accuracy: 0.5380
 AUC Score: 0.6000
🔹 Optimal Decision Threshold: 0.1060
[1 1 0 1 0 0 1 1 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0
 0 0 1 1 0 0 0 

In [None]:
for label in disorder_labels:
    print(f"\n####### {label} #######\n")
    binary_predictions[label] = common.binary_predictionsgg(optimal_thresholds[label], label)
    print('\n\n')


####### Addictive disorder #######


Predicted Labels:
 ['Healthy control', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Healthy control', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Addictive disorder', 'Healthy control', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Healthy control', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Addictive disorder', 'Healthy control', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Addictive disorder', 'Healthy control', 'Healthy control', 'Addictive

In [12]:
preds = []
for index in range(len(binary_predictions[disorder_labels[0]])):
    max_value = float('-inf')
    max_label = None
    for label in disorder_labels:
        diff = binary_predictions[label][index] - optimal_thresholds[label]
        if diff > 0 and diff > max_value:
            max_value = diff
            max_label = label
    if max_label is None:
        preds.append(common.false_label)
    else:
        preds.append(max_label)

In [13]:
import joblib
import numpy as np

def weighted_voting(svm_models, weights, X_test):
    """
    Perform weighted voting for multiclass classification using multiple SVM classifiers.
    
    :param svm_models: List of trained SVM models
    :param weights: List of weights for each model
    :param X_test: Feature matrix for test data
    :return: Final predicted class labels
    """
    num_samples = X_test.shape[0]
    num_classes = len(svm_models)  # Assuming each SVM corresponds to one class
    
    # Initialize a score matrix
    class_scores = np.zeros((num_samples, num_classes))

    # Collect weighted scores from each classifier
    for i, (svm, weight) in enumerate(zip(svm_models, weights)):
        decision_scores = svm.decision_function(X_test)  # Confidence scores
        class_scores[:, i] = weight * decision_scores  # Apply weight

    # Final prediction: Choose class with highest weighted score
    final_predictions = np.argmax(class_scores, axis=1)
    
    return final_predictions

# Example usage (assuming `svm_models` is a list of trained SVM classifiers and `X_test` is your test data)
# Define weights based on classifier performance (can be optimized)
weights = [0.8974359 , 0.87179487, 0.80769231, 0.93589744, 1, 0.79487179]  # Example weights

# Perform classification
svm_models = [joblib.load(f"{label}_Tuned.pkl") for label in disorder_labels]

cols = common.relevant_columns.copy()
new_data = common.read_file("data.csv", cols)

# Convert DataFrame to NumPy before applying StandardScaler
scaler = joblib.load("Mood disorder_Scaler.pkl")
pca = joblib.load("Mood disorder_Pca.pkl")
y_test = scaler.fit_transform(new_data.values)
new_data_pca = pca.fit_transform(y_test)
y_pred = weighted_voting(svm_models, weights, new_data_pca)
y_pred

array([5, 1, 1, 1, 5, 0, 4, 0, 5, 0, 5, 0, 0, 3, 5, 3, 5, 1, 0, 2, 0, 1,
       0, 2, 1, 0, 5, 0, 5, 4, 4, 5, 4, 1, 5, 5, 1, 5, 4, 1, 4, 1, 4, 4,
       5, 3, 1, 3, 3, 1, 5, 0, 0, 1, 2, 5, 4, 3, 1, 2, 4, 4, 1, 4, 0, 4,
       0, 4, 4, 2, 1, 5, 0, 0, 4, 3, 1, 2, 3, 1, 0, 5, 1, 4, 1, 0, 4, 0,
       0, 1, 3, 2, 4])

In [15]:
disorder_mapping = {
    0: "Addictive disorder",
    1: "Anxiety disorder",
    2: "Mood disorder",
    3: "Obsessive compulsive disorder",
    4: "Schizophrenia",
    5: "Trauma and stress related disorder"
}
labelled_predicted = [disorder_mapping[val] for val in y_pred]
labelled_predicted

['Trauma and stress related disorder',
 'Anxiety disorder',
 'Anxiety disorder',
 'Anxiety disorder',
 'Trauma and stress related disorder',
 'Addictive disorder',
 'Schizophrenia',
 'Addictive disorder',
 'Trauma and stress related disorder',
 'Addictive disorder',
 'Trauma and stress related disorder',
 'Addictive disorder',
 'Addictive disorder',
 'Obsessive compulsive disorder',
 'Trauma and stress related disorder',
 'Obsessive compulsive disorder',
 'Trauma and stress related disorder',
 'Anxiety disorder',
 'Addictive disorder',
 'Mood disorder',
 'Addictive disorder',
 'Anxiety disorder',
 'Addictive disorder',
 'Mood disorder',
 'Anxiety disorder',
 'Addictive disorder',
 'Trauma and stress related disorder',
 'Addictive disorder',
 'Trauma and stress related disorder',
 'Schizophrenia',
 'Schizophrenia',
 'Trauma and stress related disorder',
 'Schizophrenia',
 'Anxiety disorder',
 'Trauma and stress related disorder',
 'Trauma and stress related disorder',
 'Anxiety disorder

In [17]:
output_df = pd.DataFrame(
    {
        "ID": pd.read_csv('data/test.csv')['ID'],
        "main.disorder.class": labelled_predicted,
    }
)
output_df.to_csv("Test_Set_EEG.csv", index=False)

In [18]:
import numpy as np
from scipy.interpolate import UnivariateSpline

# Given accuracy values for each disorder's classifier
disorders = [
    "Addictive disorder", "Anxiety disorder", "Mood disorder", 
    "Obsessive compulsive disorder", "Schizophrenia", "Trauma and stress related disorder"
]
accuracy_values = np.array([0.70, 0.68, 0.63, 0.73, 0.78, 0.62])

# Create evenly spaced x values for interpolation
x_values = np.linspace(0, len(accuracy_values) - 1, len(accuracy_values))

# Fit a spline function
spline = UnivariateSpline(x_values, accuracy_values, s=0)

# Generate evenly spaced weights
weights = spline(x_values)

# Normalize weights to ensure they are in a reasonable range
weights = weights / np.max(weights)

# Display weights
weights


array([0.8974359 , 0.87179487, 0.80769231, 0.93589744, 1.        ,
       0.79487179])