In [None]:
import numpy as np
import matplotlib.pyplot as pl
import pandas as pd
import seaborn as sns
from pprint import pprint
import warnings
from sklearn.utils import resample
from sources.permutation_importance import* 
from sklearn.metrics import *
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()
pl.style.use('seaborn-ticks')

In [None]:
# X_train = pd.read_csv('normalised/X_train_bal_roc2.csv')
# y_train = pd.read_csv('normalised/y_train_bal_roc2.csv')
# X_train = pd.read_csv('X_train_table.csv')
# y_train = pd.read_csv('y_train_table.csv')
# # encoding target class
# y, clas = pd.factorize(y_train['class_labels']) #getting the class 0 = agn, 1 =notagn, 2 = no class
# y_target = pd.DataFrame(y, columns = ['labels'])

In [None]:
df = pd.read_csv('normalised/roc_data.csv')

cols = ['qir', 'Mstar', 'class_star', 'log(S8/S45)', 'log(S58/S36)']
X_train = df[cols]
y_train = df['class_labels']

# encoding target class
y, clas = pd.factorize(y_train) #getting the class 0 = agn, 1 =notagn, 2 = no class
y_target = pd.DataFrame(y, columns = ['labels'])

In [None]:
# # # column names
# # # feat = X_train.columns 
# feat = X_train.columns.tolist()

# color = [
#     'red', 'green', "blue", 'grey', 'orange', 'purple',  # original 6
#     # 'cyan', 'magenta', 'lime', 'pink', 'teal', 'lavender',  # added 6
#     # 'maroon', 'olive', 'navy', 'coral', 'indigo', 'gold'  # added 6 more
# ]

# # di_feat = [
# #     # True, True, False, False, True, True,  # original 6
# #     # False, False, False, False, False, False,  # repeated pattern
# #     # False, False, False, False, False, False   # repeated pattern
# # ]
# # di_feat = [
# #     False, False, True, True, False, False,  # original 6
# #     True, True, True, True, True, True,  # repeated pattern
# #     True, True, True, True, True, True   # repeated pattern
# # ]

# label = feat
# # # call ROC fuction
# # plot_roc_curve (di_feat, feat, label, X_train_roc, y_target, colors=color)
# # # draw_ROC (di_feat, feat, label, y=y_balanced, colors=colors)


In [None]:
# column names
feat = ['qir', 'Mstar', 'class_star', 'log(S8/S45)', 'log(S58/S36)'] 
color = [ "blue", 'red', 'green', 'grey', 'orange'] #, 'purple'] 
        
label = [r'$q_\mathrm{IR}$',
         r'$log (M_{\rm star})$', 
         'class_star',
         r'$log (S_{8.0}/S_{4.5})$',
         r'$log (S_{5.8}/S_{3.6})$'
               ]
# the direction of the feature
di_feat = [False, True, True, False, True] #, True]
# di_feat = [True, False, False, True, False] #, True]


In [None]:
def threshold(X, Y, direction):
    """Calculate ROC metrics for a single feature"""
    acc = []
    tp = []     
    fp = []
    
    th = np.array(X).flatten()
    inTrain, outTrain = (list(t) for t in zip(*sorted(zip(th, np.array(Y).flatten()))))
    
    thresholds = np.linspace(inTrain[0], inTrain[-1], 1000)
    
    for i in thresholds:
        pred = []
        for xTr in inTrain:
            if direction:
                if i > xTr:
                    pred.append(1)
                else:
                    pred.append(0)
            else:
                if i < xTr:
                    pred.append(1)
                else:
                    pred.append(0)
        
        acc.append(accuracy_score(outTrain, pred))
        CM = confusion_matrix(outTrain, pred)

        TN = CM[0][0]
        FN = CM[1][0]
        TP = CM[1][1]
        FP = CM[0][1]
        
        TPR = TP/(TP+FN) if (TP+FN) > 0 else 0
        TNR = TN/(TN+FP) if (TN+FP) > 0 else 0
        tp.append(TPR)
        fp.append(1-TNR)
        
    return acc, fp, tp

def single_roc_analysis(X, y, feature_names, feature_labels, directions, colors):
    """
    Perform ROC analysis for all features (single trial)
    
    Args:
        X: DataFrame of features
        y: Target values
        feature_names: List of feature column names
        feature_labels: List of display names for features
        directions: List of direction booleans for each feature
        colors: List of colors for plotting
    """
    # Validate input lengths
    n_features = len(feature_names)
    if (len(feature_labels) != n_features or 
        len(directions) != n_features or 
        len(colors) != n_features):
        raise ValueError("All input lists (feature_names, feature_labels, directions, colors) must have the same length")
    
    roc_data = {}
    feature_aucs = {}
    
    for f, label, d, color in zip(feature_names, feature_labels, directions, colors):
        x = np.array(X[[f]]).flatten()
        y_vals = np.array(y).flatten()
        # acc, tp, fp = threshold(x, y_vals, d)
        acc, fp, tp = threshold(x, y_vals, d)
        
        feature_auc = auc(fp, tp)
        feature_aucs[f] = feature_auc
        roc_data[f] = {'fp': fp, 'tp': tp, 'auc': feature_auc, 'color': color, 'label': label}
    
    return {
        'features': feature_names.copy(),
        'auc_scores': feature_aucs.copy(),
        'roc_data': roc_data.copy(),
        'n_features': len(feature_names)
    }

def plot_single_roc(results, figsize=(10, 8)):
    """Plot ROC curves from single analysis with legend sorted by AUC (high to low)"""
    roc_data = results['roc_data']
    n_features = results['n_features']
    
    pl.figure(figsize=figsize)
    
    # Sort features by AUC in descending order
    sorted_features = sorted(roc_data.items(), 
                            key=lambda x: x[1]['auc'], 
                            reverse=True)
    
    # Plot ROC for each feature in sorted order
    for f, data in sorted_features:
        pl.plot(data['fp'], data['tp'], 
                color=data['color'], 
                lw=2,
                label=f"{data['label']} AUC: {data['auc']:.2f}")
    
    pl.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
    pl.xlim([0.0, 1.0])
    pl.ylim([0.0, 1.05])
    
    pl.xlabel('False Positive Rate', fontsize=20)
    pl.ylabel('True Positive Rate', fontsize=20)
    # Adjust tick label sizes
    pl.xticks(fontsize=20)
    pl.yticks(fontsize=20)
    
    # Create legend with sorted entries
    handles, labels = pl.gca().get_legend_handles_labels()
    pl.legend(handles, labels, loc="lower right", prop={"size":16})
    pl.tight_layout()
    pl.savefig('single_roc_analysis.pdf')
    pl.show()
    
    # Print summary (already sorted by AUC)
    print("\n# ROC Analysis Summary")
    print("- **ROC AUC scores**:")
    for feat, auc_score in sorted(results['auc_scores'].items(), 
                                key=lambda x: x[1], 
                                reverse=True):
        print(f"  - {feat}: {auc_score:.4f}")

In [None]:
print(len(feat), len(label), len(di_feat), len(color))

In [None]:
# Assuming you have X (features), y (target), and other parameters defined
results = single_roc_analysis(X_train, y_target, feat, label, di_feat, color)


In [None]:
plot_single_roc(results)
