In [15]:
from ipynb.fs.full.Final_Diagnostic_Model import aggregate_dataset_construct
from ipynb.fs.full.Final_Diagnostic_Model import zscores_for_train_and_test
import pandas as pd
import random as rng
import numpy as np

In [22]:
"""
# evaluate_pure_random_model simply evaluates the purely random models based on an even coin flip
# 
# Input:
# train_flag - Boolean flags for the rows to be included in the train data set
# test_flag - Boolean flags for the rows to be included in the test data set
# MIMIC_zscores - Z-Scores generated from the base data set: patientdata
# patientdata - The base data set to investigate
# test_name - The name of the test being conducted
# run_num - The number of the test run being calculated
# is_debug - Whether or not to include print statements in the code
# 
# Output: 
# N/A
# 
"""
def evaluate_pure_random_model(train_flag, test_flag, MIMIC_zscores, patientdata, test_name, run_num, is_debug):
    # Grab the unique icustays with their corresponding mortality rates
    train_set = patientdata[train_flag][['icustayid','mortality_90d']].drop_duplicates()
    test_set = patientdata[test_flag][['icustayid','mortality_90d']].drop_duplicates()
    total_alive:int = test_set['mortality_90d'].value_counts()[0]
    total_dead:int = test_set['mortality_90d'].value_counts()[1]
    # Construct a set of data to record
    death_states:List[int] = test_set['mortality_90d'].tolist()
    total_patients:int = len(death_states)
    correct_guesses:int = 0
    dead_instead_live:int = 0
    live_instead_dead:int = 0
    PATIENT_LIVED:int = 0
    PATIENT_DIED:int = 1
    # The Proportional Training set uses train and test data in order to fairly blind the 'model'
    total_alive_train = train_set['mortality_90d'].value_counts()[0]
    total_patients_train = len(train_set['mortality_90d'].tolist())
    alive_proportion:float = total_alive_train/total_patients_train
    print(alive_proportion)
    for i in range(0, total_patients):
        random_guess = 1
        # Generate proportional guess based on Randomly Uniform Distribution [0.0, 1.0]
        random_double = rng.random()
        # Suppose 60% of the people are alive. That means the code should say the person is alive
        # If a float between 0 and 0.5999999... is rolled, and dead otherwise
        if random_double < alive_proportion:
            random_guess = 0
        
        patient_status = death_states[i]
        # If the random guesser guesses correctly, add to the count
        if random_guess == patient_status:
            correct_guesses = correct_guesses + 1
        else:
            # If the patient was guessed alive when they died
            if patient_status == PATIENT_LIVED:
                live_instead_dead = live_instead_dead + 1
            # If the patient was guessed dead when they lived
            else:
                dead_instead_live = dead_instead_live + 1
    # Grab all the calculated values
    overall_accuracy:float = correct_guesses/total_patients
    dead_accuracy:float = (total_dead - dead_instead_live) / total_dead
    live_accuracy:float = (total_alive - live_instead_dead) / total_alive
    if is_debug:
        print("Test Name: " + test_name)
        print("Overall Accuracy: " + str(overall_accuracy))
        print("Accuracy for Dead: " + str(dead_accuracy))
        print("Accuracy for Living: " + str(live_accuracy))
        print("Living People Guessed Dead: " + str(live_instead_dead))
        print("Dead People Guessed Living: " + str(dead_instead_live))
        print("Total Patients: " + str(total_patients))
        print("Correct Guesses: " + str(correct_guesses))
        print("Alive People: " + str(total_alive))
        print("Dead People: " + str(total_dead))
        print("\n")
    # Construct a formatted CSV string with all the values that have been set
    total_string = (f'{run_num},{total_patients},{correct_guesses},'
                    f'{overall_accuracy},{dead_accuracy}'
                    f'{live_accuracy},{live_instead_dead},{dead_instead_live},'
                    f'{total_alive},{total_dead},'
                    f'{test_name}')
    if test_name == 'Test_Proportional_Random':
        with open('test_proportional_random_runs.csv', 'a') as f:
            f.write(total_string + "\n")
    

In [23]:
#  The Data File that will be used to conduct the experiments
patientdata:pd.DataFrame = pd.read_csv("G:/MIMIC-ALL/MIMIC-PATIENTS/patient_data_modified.csv")
    
# This is the run loop to execute and save the results to the file for the Q-Learning Weighted and Unweighted Model
if __name__ == "__main__":
    # Use the function from the Diagnostic Model to generate the stratified data sets
    train_flag_set, test_flag_set, MIMIC_zscores = aggregate_dataset_construct()
    # Set seed for random function
    rng.seed(123456789)
    for i in range(0, len(test_flag_set)):
        test_flag = test_flag_set[i]
        train_flag = train_flag_set[i]
        evaluate_pure_random_model(train_flag, test_flag, 
                                   MIMIC_zscores, patientdata, 
                                   test_name='Test_Proportional_Random', 
                                   run_num=i, is_debug=True)
    
        

0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5025879917184265
Accuracy for Dead: 0.5175438596491229
Accuracy for Living: 0.49796747967479676
Living People Guessed Dead: 741
Dead People Guessed Living: 220
Total Patients: 1932
Correct Guesses: 971
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4808488612836439
Accuracy for Dead: 0.44298245614035087
Accuracy for Living: 0.49254742547425473
Living People Guessed Dead: 749
Dead People Guessed Living: 254
Total Patients: 1932
Correct Guesses: 929
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5108695652173914
Accuracy for Dead: 0.4934210526315789
Accuracy for Living: 0.516260162601626
Living People Guessed Dead: 714
Dead People Guessed Living: 231
Total Patients: 1932
Correct Guesses: 987
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5134575569358178
Accuracy for Dead: 0.530

0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5152770585189022
Accuracy for Dead: 0.512087912087912
Accuracy for Living: 0.516260162601626
Living People Guessed Dead: 714
Dead People Guessed Living: 222
Total Patients: 1931
Correct Guesses: 995
Alive People: 1476
Dead People: 455


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4989648033126294
Accuracy for Dead: 0.5285087719298246
Accuracy for Living: 0.4898373983739837
Living People Guessed Dead: 753
Dead People Guessed Living: 215
Total Patients: 1932
Correct Guesses: 964
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4922360248447205
Accuracy for Dead: 0.4824561403508772
Accuracy for Living: 0.49525745257452575
Living People Guessed Dead: 745
Dead People Guessed Living: 236
Total Patients: 1932
Correct Guesses: 951
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5015527950310559
Accuracy for Dead: 0.521929

0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4940445365095805
Accuracy for Dead: 0.5296703296703297
Accuracy for Living: 0.4830623306233062
Living People Guessed Dead: 763
Dead People Guessed Living: 214
Total Patients: 1931
Correct Guesses: 954
Alive People: 1476
Dead People: 455


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.48886587260486797
Accuracy for Dead: 0.5054945054945055
Accuracy for Living: 0.483739837398374
Living People Guessed Dead: 762
Dead People Guessed Living: 225
Total Patients: 1931
Correct Guesses: 944
Alive People: 1476
Dead People: 455


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4922360248447205
Accuracy for Dead: 0.49780701754385964
Accuracy for Living: 0.4905149051490515
Living People Guessed Dead: 752
Dead People Guessed Living: 229
Total Patients: 1932
Correct Guesses: 951
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.4994824016563147
Accuracy for Dead: 0.4254

0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5137234593474883
Accuracy for Dead: 0.5186813186813187
Accuracy for Living: 0.5121951219512195
Living People Guessed Dead: 720
Dead People Guessed Living: 219
Total Patients: 1931
Correct Guesses: 992
Alive People: 1476
Dead People: 455


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5199378560331435
Accuracy for Dead: 0.5538461538461539
Accuracy for Living: 0.5094850948509485
Living People Guessed Dead: 724
Dead People Guessed Living: 203
Total Patients: 1931
Correct Guesses: 1004
Alive People: 1476
Dead People: 455


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.5025879917184265
Accuracy for Dead: 0.4868421052631579
Accuracy for Living: 0.5074525745257452
Living People Guessed Dead: 727
Dead People Guessed Living: 234
Total Patients: 1932
Correct Guesses: 971
Alive People: 1476
Dead People: 456


0.5
Test Name: Test_Proportional_Random
Overall Accuracy: 0.505175983436853
Accuracy for Dead: 0.48464

In [20]:
# Calculate average accuracy
with open('test_proportional_random_runs.csv', 'r') as f:
    lines = f.readlines()
    all_accuracies = []
    for line in lines:
        split_line = line.split(',')
        accuracy = float(split_line[3])
        all_accuracies.append(accuracy)
    avg_accuracy = np.average(all_accuracies)
    print("Average accuracy is:", avg_accuracy)

Average accuracy is: 0.5004221254394627


In [34]:
# This proportional run-through builds a loaded coin that is run on the entirety of the dataset 
# as opposed to the split cross-val values that are 50-50 life and death
all_data = patientdata[['icustayid','mortality_90d']].drop_duplicates()
total_alive:int = all_data['mortality_90d'].value_counts()[0]
total_dead:int = all_data['mortality_90d'].value_counts()[1]
alive_proportion = total_alive / (total_alive + total_dead)
death_states = all_data['mortality_90d'].tolist()
correct_guesses = 0
rng.seed(123456789)
for i in range(0, len(death_states)):
    random_guess = 1
    # Generate proportional guess based on Randomly Uniform Distribution [0.0, 1.0]
    random_double = rng.random()
    # Suppose 60% of the people are alive. That means the code should say the person is alive
    # If a float between 0 and 0.5999999... is rolled, and dead otherwise
    if random_double < alive_proportion:
        random_guess = 0
    
    patient_status = death_states[i]
    # If the random guesser guesses correctly, add to the count
    if random_guess == patient_status:
        correct_guesses = correct_guesses + 1
print(correct_guesses/len(death_states))

0.6387271117737502


In [None]:
a