In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

for i in range(12):
    # Load training and testing data for each lead
    train_df = pd.read_csv(f'/Users/matildacotton/Desktop/semester_2_2024/topics/data/training-hctsa-catch22-features-lead-{i}.csv')
    test_df = pd.read_csv(f'/Users/matildacotton/Desktop/semester_2_2024/topics/data/validating-hctsa-catch22-features-lead-{i}.csv')

    X_train = train_df.drop('Condition', axis=1).values
    y_train = train_df['Condition'].values  

    X_test = test_df.drop('Condition', axis=1).values  
    y_test = test_df['Condition'].values 

    # Train the RandomForest classifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Predict on the test set
    y_pred = clf.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Get confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Calculate sensitivity (recall) for each class manually
    sensitivities = []
    for class_idx in range(conf_matrix.shape[0]):
        tp = conf_matrix[class_idx, class_idx]
        fn = conf_matrix[class_idx, :].sum() - tp
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        sensitivities.append(sensitivity)

    # Calculate average sensitivity
    avg_sensitivity = np.mean(sensitivities)

    # Generate classification report (for multi-class metrics)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Extract PPV (Precision) for weighted average
    ppv = report['weighted avg']['precision']

    # Output results
    print(f"Results for Lead {i}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Average Sensitivity (Recall): {avg_sensitivity:.4f}")
    print(f"PPV (Precision): {ppv:.4f}")
    print(f"Confusion Matrix:\n{conf_matrix}\n")


Results for Lead 0:
Accuracy: 0.8619
Average Sensitivity (Recall): 0.8135
PPV (Precision): 0.8757
Confusion Matrix:
[[49  0  0 ...  0  0  0]
 [ 0  6  0 ...  0  0  0]
 [ 0  0  5 ...  0  0  0]
 ...
 [ 0  0  0 ...  2  0  0]
 [ 0  0  0 ...  0  2  0]
 [ 0  0  0 ...  0  0 44]]

Results for Lead 1:
Accuracy: 0.8628
Average Sensitivity (Recall): 0.8103
PPV (Precision): 0.8761
Confusion Matrix:
[[49  0  0 ...  0  0  0]
 [ 0  6  0 ...  0  0  0]
 [ 0  0  5 ...  0  0  0]
 ...
 [ 0  0  0 ...  2  0  0]
 [ 0  0  0 ...  0  2  0]
 [ 0  0  0 ...  0  0 45]]

Results for Lead 2:
Accuracy: 0.8537
Average Sensitivity (Recall): 0.8138
PPV (Precision): 0.8706
Confusion Matrix:
[[49  0  0 ...  0  0  0]
 [ 0  6  0 ...  0  0  0]
 [ 0  0  5 ...  0  0  0]
 ...
 [ 0  0  0 ...  2  0  0]
 [ 0  0  0 ...  0  2  0]
 [ 0  0  0 ...  0  0 45]]

Results for Lead 3:
Accuracy: 0.8682
Average Sensitivity (Recall): 0.8175
PPV (Precision): 0.8805
Confusion Matrix:
[[49  0  0 ...  0  0  0]
 [ 0  6  0 ...  0  0  0]
 [ 0  0  5 ... 

In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Define top 8 class labels (you need to identify the top 8 based on your data)
top_classes = ['Normal','Atrial Flutter','Sinus Bradycardia','Sinus Irregularity','Sinus Rhythm','Sinus Tachycardia','T wave Change','T wave opposite']  # Example class labels, adjust according to your data

for i in range(12):
    # Load training and testing data for each lead
    train_df = pd.read_csv(f'/Users/matildacotton/Desktop/semester_2_2024/topics/data/training-hctsa-catch22-features-lead-{i}.csv')
    test_df = pd.read_csv(f'/Users/matildacotton/Desktop/semester_2_2024/topics/data/validating-hctsa-catch22-features-lead-{i}.csv')

    # Filter to include only the top 8 classes in both training and test data
    train_df_filtered = train_df[train_df['Condition'].isin(top_classes)]
    test_df_filtered = test_df[test_df['Condition'].isin(top_classes)]

    X_train = train_df_filtered.drop('Condition', axis=1).values
    y_train = train_df_filtered['Condition'].values  

    X_test = test_df_filtered.drop('Condition', axis=1).values  
    y_test = test_df_filtered['Condition'].values 

    # Train the RandomForest classifier
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Predict on the test set
    y_pred = clf.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Get confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Calculate sensitivity (recall) for each class manually
    sensitivities = []
    for class_idx in range(conf_matrix.shape[0]):
        tp = conf_matrix[class_idx, class_idx]
        fn = conf_matrix[class_idx, :].sum() - tp
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        sensitivities.append(sensitivity)

    # Calculate average sensitivity
    avg_sensitivity = np.mean(sensitivities)

    # Generate classification report (for multi-class metrics)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Extract PPV (Precision) for weighted average
    ppv = report['weighted avg']['precision']

    # Output results
    print(f"Results for Lead {i}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Average Sensitivity (Recall): {avg_sensitivity:.4f}")
    print(f"PPV (Precision): {ppv:.4f}")
    print(f"Confusion Matrix:\n{conf_matrix}\n")


Results for Lead 0:
Accuracy: 0.8712
Average Sensitivity (Recall): 0.8363
PPV (Precision): 0.8795
Confusion Matrix:
[[ 420   55   19    0   13    8    1    0]
 [   5 1787  100    2   40   26    3    0]
 [   3   69 1677    2   27   10    0    0]
 [   0   32   27  365   24    4    0    0]
 [   1   70   79    3 1025   12    2    0]
 [   2   76   24    2   21  683    0    0]
 [   0   57   66    0   28   10  578    1]
 [   1   26   28    1    9    7    0  199]]

Results for Lead 1:
Accuracy: 0.8765
Average Sensitivity (Recall): 0.8418
PPV (Precision): 0.8847
Confusion Matrix:
[[ 417   55   18    0   18    7    1    0]
 [   6 1780  110    0   33   33    1    0]
 [   5   69 1685    0   22    6    1    0]
 [   0   17   43  365   24    3    0    0]
 [   1   70   63    2 1037   18    1    0]
 [   0   43   33    1   19  712    0    0]
 [   1   59   72    2   17    8  580    1]
 [   1   32   24    0    7    8    0  199]]

Results for Lead 2:
Accuracy: 0.8666
Average Sensitivity (Recall): 0.8335
PP