In [1]:
import os
import joblib
import pandas as pd

In [2]:
#Gets the current working directory and parent directory to make loading files easier
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
grandparent_dir = os.path.dirname(parent_dir)

In [3]:
demographics = pd.read_pickle(f'{grandparent_dir}/Audio Demographics/FEMH_demographics.pkl')
demographics = demographics.drop(['ID', 'Disease category', 'pathology'], axis=1)
demographics.head()

Unnamed: 0,Sex,Age,Narrow pitch range,Decreased volume,Fatigue,Dryness,Lumping,Heartburn,Choking,Eye dryness,...,Noise at work,Occupational vocal demand,Diabetes,Hypertension,CAD,Head and Neck Cancer,Head injury,CVA,Voice handicap index - 10,filename
0,1,97,0,0,0,0,0,0,1,0,...,1,2,0,0,0,0,0,0,12,Atrophy-00002mg
1,1,86,0,0,0,0,0,0,1,0,...,1,4,0,1,0,0,0,1,36,Atrophy-0001297
2,2,45,0,0,0,1,0,0,0,0,...,1,3,0,0,0,0,0,0,16,Atrophy-0001apo
3,1,75,1,1,0,0,1,0,0,0,...,1,3,0,0,1,0,0,0,19,Atrophy-0001qd3
4,1,64,0,0,0,1,0,0,0,0,...,1,2,0,0,1,0,0,0,34,Atrophy-0002ipt


In [4]:
model_dir = f'{grandparent_dir}//Pre-Trained Models//'

In [5]:
test_files = joblib.load(f'{grandparent_dir}/Predict Audio/FEMH_test_files.pkl')

In [6]:
malignant_pathologies = ['Laryngeal cancer', 'Dysplasia']

In [7]:
results = pd.DataFrame(columns=['file', 'ground truth'])

In [8]:
for subdir, dirs, files in os.walk(model_dir):
    for file in files:
        if file.endswith('.pkl'):

            filename = file.split('.')[0]
        
            feature = file.split('_')[1] 
            model = joblib.load(f'{subdir}/{file}')
    
            input_features = model.feature_names_in_
    
            if feature == 'MFCC':
                test_df = pd.read_pickle(f"{grandparent_dir}/Audio Features/{feature}_FEMH.pkl")
            else:
                test_df = pd.read_csv(f"{grandparent_dir}/Audio Features/{feature}_FEMH.csv", index_col=0)
            
            test_df['filename'] = test_df['file'].str.split('.', expand=True)[0]
            test_df = test_df[test_df['filename'].isin(test_files)]
            test_df = test_df.reset_index(drop=True)
            
            # Replace specified pathologies with "Malignant"
            test_df['pathology'] = test_df['pathology'].apply(lambda x: 'Malignant' if x in malignant_pathologies else 'Benign')
    
            test_df = pd.merge(test_df, demographics, on='filename', how='inner')
    
            X_test = test_df[input_features]
            y_test = test_df['pathology']
    
            y_pred = model.predict(X_test)
            y_pred_prob = model.predict_proba(X_test)[:, 1]
    
            results_temp = pd.DataFrame(columns=['file', 'ground truth', f'{filename} prediction', f'{filename} probability'])
            results_temp['file'] = test_df['filename']
            results_temp['ground truth'] = y_test
            results_temp[f'{filename} prediction'] = y_pred
            results_temp[f'{filename} probability'] = y_pred_prob
    
            if len(results)>0:
                results = pd.merge(results, results_temp, on=['file', 'ground truth'])
            else:
                results = results_temp.copy()
            
            print(file, feature)

LR_FeatureStates_Age_Sex.pkl FeatureStates
LR_FeatureStates_Age_Sex_Symptoms.pkl FeatureStates
LR_FeatureStates_Rec_Only.pkl FeatureStates
LR_FeatureStates_Symptoms.pkl FeatureStates
LR_MFCC_Age_Sex.pkl MFCC
LR_MFCC_Age_Sex_Symptoms.pkl MFCC
LR_MFCC_Rec_Only.pkl MFCC
LR_MFCC_Symptoms.pkl MFCC
LR_OpenSmile_Age_Sex.pkl OpenSmile
LR_OpenSmile_Age_Sex_Symptoms.pkl OpenSmile
LR_OpenSmile_Rec_Only.pkl OpenSmile
LR_OpenSmile_Symptoms.pkl OpenSmile
NN_FeatureStates_Age_Sex.pkl FeatureStates
NN_FeatureStates_Age_Sex_Symptoms.pkl FeatureStates
NN_FeatureStates_Rec_Only.pkl FeatureStates
NN_FeatureStates_Symptoms.pkl FeatureStates
NN_MFCC_Age_Sex.pkl MFCC
NN_MFCC_Age_Sex_Symptoms.pkl MFCC
NN_MFCC_Rec_Only.pkl MFCC
NN_MFCC_Symptoms.pkl MFCC
NN_OpenSmile_Age_Sex.pkl OpenSmile
NN_OpenSmile_Age_Sex_Symptoms.pkl OpenSmile
NN_OpenSmile_Rec_Only.pkl OpenSmile
NN_OpenSmile_Symptoms.pkl OpenSmile
SVM_FeatureStates_Age_Sex.pkl FeatureStates
SVM_FeatureStates_Age_Sex_Symptoms.pkl FeatureStates
SVM_FeatureSt

In [10]:
results.head()

Unnamed: 0,file,ground truth,LR_FeatureStates_Age_Sex prediction,LR_FeatureStates_Age_Sex probability,LR_FeatureStates_Age_Sex_Symptoms prediction,LR_FeatureStates_Age_Sex_Symptoms probability,LR_FeatureStates_Rec_Only prediction,LR_FeatureStates_Rec_Only probability,LR_FeatureStates_Symptoms prediction,LR_FeatureStates_Symptoms probability,...,SVM_MFCC_Symptoms prediction,SVM_MFCC_Symptoms probability,SVM_OpenSmile_Age_Sex prediction,SVM_OpenSmile_Age_Sex probability,SVM_OpenSmile_Age_Sex_Symptoms prediction,SVM_OpenSmile_Age_Sex_Symptoms probability,SVM_OpenSmile_Rec_Only prediction,SVM_OpenSmile_Rec_Only probability,SVM_OpenSmile_Symptoms prediction,SVM_OpenSmile_Symptoms probability
0,Atrophy-0001apo,Benign,Benign,0.001301,Benign,0.157229,Benign,2.6e-05,Benign,0.221742,...,Benign,0.008403,Benign,0.010395,Benign,0.017766,Benign,0.019887,Benign,0.025877
1,Atrophy-0400og6,Benign,Benign,0.002063,Benign,0.213735,Benign,0.000276,Benign,0.451704,...,Malignant,0.055249,Benign,0.008386,Benign,0.015901,Benign,0.014908,Benign,0.026531
2,Atrophy-0500cg6,Benign,Benign,0.070688,Malignant,0.711418,Benign,0.044419,Malignant,0.573204,...,Malignant,0.131172,Malignant,0.123806,Malignant,0.083921,Malignant,0.047273,Malignant,0.079512
3,Atrophy-0601ntf,Benign,Malignant,0.961759,Malignant,0.625363,Malignant,0.926107,Malignant,0.62538,...,Benign,0.02258,Benign,0.035384,Benign,0.018849,Benign,0.024361,Benign,0.039769
4,Atrophy-07017ip,Benign,Benign,0.000648,Benign,0.124255,Benign,2.9e-05,Benign,0.321406,...,Benign,0.014545,Benign,0.011126,Malignant,0.042882,Benign,0.025011,Malignant,0.060018


In [11]:
results.to_csv('ModelPredictions.csv')