In [None]:
# import modules
import pandas as pd
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
# Load your data
CPAT_Human = pd.read_table('CPAT_human.TSV', sep='\t')
CPAT_Plants = pd.read_table('CPAT_plants.TSV', sep='\t')
CPAT_Sorghum = pd.read_table('CPAT_sorghum.TSV', sep='\t')
RNAsamba_Full = pd.read_table('RNAsamba_fl.TSV', sep='\t')
RNAsamba_Partial = pd.read_table('RNAsamba_pl.TSV', sep='\t')
RNAsamba_Sorghum = pd.read_table('RNAsamba_sorghum.TSV', sep='\t')
CPC2 = pd.read_table('CPC2.TSV', sep='\t')
LGC = pd.read_table('LGC.TSV', sep='\t')
Pfamscan = pd.read_table('Pfamscan.TSV', sep='\t')
FEELnc_sorghum = pd.read_table('FEELnc_sorghum.TSV', sep='\t')
FEELnc_arabidopsis = pd.read_table('FEELnc_arabidopsis.TSV', sep='\t')
FEELnc_human = pd.read_table('FEELnc_human.TSV', sep='\t')
LncFinder_sorghum = pd.read_table('LncFinder_sorghum.TSV', sep='\t')
LncFinder_plants = pd.read_table('LncFinder_plants.TSV', sep='\t')
LncFinder_human = pd.read_table('LncFinder_human.TSV', sep='\t')
CPPred_sorghum = pd.read_table('CPPred_sorghum.TSV', sep='\t')
CPPred_plants = pd.read_table('CPPred_plants.TSV', sep='\t')
CPPred_human = pd.read_table('CPPred_human.TSV', sep='\t')

In [None]:
# Dataframes and corresponding approaches
dataframes = [CPAT_Human,CPAT_Plants,CPAT_Sorghum,RNAsamba_Full,RNAsamba_Partial,RNAsamba_Sorghum,CPC2,LGC,Pfamscan,FEELnc_sorghum,FEELnc_arabidopsis,FEELnc_human,LncFinder_sorghum,LncFinder_plants,LncFinder_human,CPPred_sorghum,CPPred_plants,CPPred_huma]
approaches = ['CPAT-Human','CPAT-Plants','CPAT-Sorghum', 'RNAsamba-full length','RNAsamba-partial length','RNAsamba-Sorghum','CPC2','LGC','PfamScan','FEELnc-Sorghum','FEELnc-Arabidopsis','FEELnc-Human','LncFinder-Sorghum','LncFinder-Plants','LncFinder-Human','CPPred','','']

In [None]:
# Initialize lists to store AUC and ROC curve data
auc_values = []
roc_curves = []

In [None]:
# Loop through each dataframe and calculate the ROC curve
for i, df in enumerate(dataframes):
    y_true = df['Class']  # True class labels
    y_pred = df['Label']  # Predicted labels

    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    
    # Calculate AUC (Area Under Curve)
    auc_value = auc(fpr, tpr)
    auc_values.append({
        'Approach': approaches[i],
        'AUC': auc_value
    })
    print(f'{approaches[i]} AUC: {auc_value:.4f}')
    
    # Create a DataFrame for ROC values
    roc_df = pd.DataFrame({
        'FPR': fpr,
        'TPR': tpr,
        'Approach': approaches[i]
    })
    roc_curves.append(roc_df)

In [None]:
# Concatenate all ROC dataframes
roc_data = pd.concat(roc_curves, ignore_index=True)

In [None]:
# Write to a TSV file
roc_data.to_csv('ROC_values.TSV', sep='\t', index=False)

In [None]:
auc_df = pd.DataFrame(auc_values)

In [None]:
# Write to a TSV file
auc_df.to_csv('AUC_values.TSV', sep='\t', index=False)