# Log data analysis 

## Prepare the log data for analysis

### 1. Import the required libraries

In [None]:
import pandas as pd 
import numpy as np
import re
import math
from datetime import timedelta
from sklearn.metrics import confusion_matrix, roc_auc_score

### 2. Provide some general information about the log

In [None]:
### General input
use_case = 'aki'

# HDZ
date_logs =['20210219', '20210226', '20210305','20210319','20210409','20210416','20210507','20210521','20210528','20210604','20210611','20210618','20210625']

customer = 'HDZ'
path = r'logs'
path_log_file = f'{path}/{customer}/{date_logs[-1]}'

#Last outcome file path
path_outcome_file = f'{path_log_file}/{date_logs[-1]}_{customer.lower()}_{use_case[0:3]}.txt'

In [None]:
# the data of outcome is extracted from the path of outcome file
outcome_date = pd.to_datetime(path_outcome_file[-20:-12], format="%Y%m%d")

# Define Code deadline as 60 days before the end date
code_deadline = outcome_date - timedelta(days = 60 )

### 3. Read in the log file and create a dataframe with all observation in the logs

In [None]:
aggregated_log=[]

for date_log in date_logs:
    with open(f'{path}/{customer}/{date_log}/{customer}_{date_log}-{use_case[0:3]}-model.log', "r") as log:
        for line in log:
            aggregated_log.append(line)

### 4. Extract the predictions from the RESPONSE line in the log

In [None]:
prediction_data = pd.DataFrame(columns=['CASEID','DATE', 'OBS','BELIEF'])
cases=[]
beliefs = []
observations = []
dates=[]
for line in aggregated_log:
    if re.search('RESPONSE',line):
        step1 = re.sub('^.* RESPONSE ','',line)
        step2 = re.sub('\\\\n".*$','',step1)
        step3 = re.sub('^.*:"','',line)[:19]
        
        step5 = line.replace("\\","")
        obs = re.sub('}n',': ',step5).split('": "')[2]
        result = re.sub('\n', '', step2)
        case = result.split(': ')[0]
        belief = result.split(': ')[2].split(',')[0]
        cases.append(case)
        
        if ('DELIRIUM 1' in line )| ('SEPSIS 1' in line )|('AKI 1' in line ):
            beliefs.append(math.exp(float(belief)))
        else:
            beliefs.append(1-math.exp(float(belief)))        
        
        dates.append(step3)
        observations.append(obs)
            
prediction_data.CASEID = cases
prediction_data.BELIEF = beliefs  
prediction_data.DATE = dates
prediction_data.OBS = observations
prediction_data = prediction_data.sort_values(['CASEID','DATE'])

In [None]:
prediction_data = prediction_data.drop_duplicates()
prediction_data['CASEID'] = prediction_data['CASEID'].astype(int)
prediction_data['DATETIME'] = pd.to_datetime(prediction_data['DATE'], format="%Y-%m-%dT%H:%M:%S")

# date only on day level
prediction_data['DATE'] = prediction_data['DATETIME'].apply(lambda x: x.date())

# for predictions that belong to the same medical case and have same observations and belief, take just the last instance.
prediction_data['DATETIME'] = prediction_data.groupby(['CASEID','DATE','OBS','BELIEF'])['DATETIME'].transform(max)

### 5. Merge the prediction_data with the outcome file

In [None]:
outcome_file = pd.read_csv(path_outcome_file,  sep = ';')
outcome_file = outcome_file[~outcome_file.CANCELDATE.notnull()]

In [None]:
## Assign label to the prediction data 
prediction_data['LABEL'] = np.where(prediction_data['CASEID'].isin(outcome_file.FALLID),1,0)

### 6. Log data analysis

In [None]:
# Check period of the logs
start_date = prediction_data.DATETIME.min().floor('D')
end_date = prediction_data.DATETIME.max().floor('D')

print(start_date)
print(end_date)

### 6.1. Read all the discharged cases

In [None]:
#include all the discharged cases
dis_logs = date_logs
discharged_df_total = pd.DataFrame(columns=['FALLID','AUFNDAT','ENTLDAT'])
parser = lambda x: pd.datetime.strptime(x[0:10], '%d.%m.%Y')
dtypes = {'FALLID':int, 'AUFNDAT':str, 'ENTLDAT':str}

for dis_log in dis_logs:
    discharged_file1 = pd.read_csv(f'{path}/{customer}/{dis_log}/{dis_log}_{customer.lower()}_discharges.txt',  sep = ';', dtype = dtypes , parse_dates=['AUFNDAT', 'ENTLDAT'], date_parser=parser)
    discharged_df_total = pd.concat([discharged_file1, discharged_df_total])
discharged_file = discharged_df_total.drop_duplicates(subset="FALLID")
open_cases_file = pd.read_csv(f'{path}/{customer}/{date_logs[-1]}/{date_logs[-1]}_{customer.lower()}_open_cases.txt',  sep = ';', dtype = dtypes , parse_dates=['AUFNDAT'], date_parser=parser)

In [None]:
# If a case is discharged more than 60 days before the outcome file, it is considered as coded
coded_cases = discharged_file[discharged_file.ENTLDAT<code_deadline].FALLID

# If a case is considered as coded, keep the label, otherwise, set the label as -1, meaning unknown
prediction_data['LABEL'] = np.where(prediction_data.CASEID.isin(coded_cases), prediction_data['LABEL'],-1)

### 6.2. Categorize the cases in ["admitted in the log", "discharged in the log", "admitted and discharged in the log"]

In [None]:
# cases_adm is defeined as cases that are admitted and discharged (subset of cases_dis).
# open_cases is defined as cases that are admitted during the log period and is not yet closed.
# Those admitted during the period is the merge of the above two sets.
# cases_dis is defined as cases that are discharged during the log period.
cases_adm = discharged_file[(discharged_file.AUFNDAT >= start_date) & (discharged_file.AUFNDAT <= end_date)].FALLID.unique().tolist()
cases_dis = discharged_file[discharged_file.ENTLDAT <= end_date].FALLID.unique().tolist()
open_cases = open_cases_file[(open_cases_file.AUFNDAT >= start_date) & (open_cases_file.AUFNDAT <= end_date)].FALLID.unique().tolist()

admission_df = prediction_data[prediction_data.CASEID.isin(cases_adm) | prediction_data.CASEID.isin(open_cases)].reset_index(drop=True)

discharged_df = prediction_data[prediction_data.CASEID.isin(cases_dis)].reset_index(drop=True)
discharged_df = discharged_df.reset_index()

# for admission df, only take the last instance of the first day. 
# for discharge df, only take the last instance of the last day.
admission_df = admission_df[admission_df.groupby(['CASEID'])['DATE'].transform(min)==admission_df['DATE']]
admission_df = admission_df[admission_df.groupby(['CASEID'])['DATETIME'].transform(max)==admission_df['DATETIME']]
admission_df = admission_df.drop_duplicates(subset='CASEID', keep="last")

discharged_df = discharged_df[discharged_df.groupby(['CASEID'])['DATETIME'].transform(max)==discharged_df['DATETIME']]
discharged_df = discharged_df.drop_duplicates(subset='CASEID', keep="last")

### 6.3. Calculate the alert rates at admission and at discharge

In [None]:
#Comparison alerts at admission and at discharge
step = admission_df[admission_df['BELIEF'] >= 0.5]
med_A = len(step[step['BELIEF'] < 0.75])
high_A = len(step[step['BELIEF'] >= 0.75])
alert_rate_A_M = "{:.1%}".format((med_A) / len(admission_df))
alert_rate_A_H = "{:.1%}".format((high_A) / len(admission_df))

step = discharged_df[discharged_df['BELIEF'] >= 0.5]
med_D = len(step[step['BELIEF'] < 0.75])
high_D = len(step[step['BELIEF'] >= 0.75])
alert_rate_D_M = "{:.1%}".format((med_D) / len(discharged_df))
alert_rate_D_H = "{:.1%}".format((high_D) / len(discharged_df))

matrix_comp = pd.DataFrame(columns=['at admission', 'at discharge'], index=[
                           'amount of cases', 'amount of positive predicted cases', 'med risk', 'high risk', 'alert rate medium risk', 'alert_rate_high_risk'])
matrix_comp['at admission'] = [len(admission_df), len(admission_df[admission_df['BELIEF'] >= 0.5]), med_A, high_A, alert_rate_A_M, alert_rate_A_H]
matrix_comp['at discharge'] = [len(discharged_df), len(discharged_df[discharged_df['BELIEF'] >= 0.5]), med_D, high_D, alert_rate_D_M, alert_rate_D_H]
matrix_comp.to_csv(f'{path_log_file}/{use_case}/matrix_comparative.csv', index = False, sep = ";")

### 6.4. Calculate the alert rates per department

In [None]:
# Comparison alerts rates per department at admission and at discharge 
# Read table departments file
departments = pd.read_csv(f'{path}/{customer}/departments.csv', sep=',', dtype ='str', header=None)
departments = departments.rename(columns={0: 'F', 1: 'DESCRIPTION'})
departments['F'] = 'DEPARTMENT-' + departments.F
  
admission_df['OBS_SPLIT'] = admission_df.apply(lambda x: x.OBS.split(' '), axis=1)
admission_df['DEPARTMENT'] = admission_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])
admission_df_dep = admission_df.explode('DEPARTMENT')
admission_df_dep = admission_df_dep.sort_values(by = ['DEPARTMENT']).reset_index(drop = True)

count = len(admission_df_dep.CASEID.unique().tolist())
department_list = admission_df_dep['DEPARTMENT'].unique().tolist()
alert_rate_first_dep_M = []
alert_rate_first_dep_H = []
count_first = []

discharged_df['OBS_SPLIT'] = discharged_df.apply(lambda x: x.OBS.split(' '), axis=1)
discharged_df['DEPARTMENT'] = discharged_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])

discharged_df_dep = discharged_df.explode('DEPARTMENT')
discharged_df_dep = discharged_df_dep.sort_values(by = ['DEPARTMENT']).reset_index(drop = True)
count_L = len(discharged_df_dep.CASEID.unique().tolist())
department_list = discharged_df_dep['DEPARTMENT'].unique().tolist()
alert_rate_last_dep_M = []
alert_rate_last_dep_H = []
count_last = []
total_alert_rate_first=[]
total_alert_rate_last=[]

for dep in department_list:
    dep_df = admission_df_dep[admission_df_dep['DEPARTMENT'] == dep]
    if len(dep_df) > 0:
        step = dep_df[dep_df['BELIEF'] >= 0.5]
        med = len(step[step['BELIEF'] < 0.75])
        high = len(step[step['BELIEF'] >= 0.75])
        alert_rate_first_dep_M.append("{:.1%}".format((med) / len(dep_df)))
        alert_rate_first_dep_H.append("{:.1%}".format((high) / len(dep_df)))
        total_alert_rate_first.append("{:.1%}".format(len(step)/len(dep_df)))
    else:
        alert_rate_first_dep_M.append("0.0%")
        alert_rate_first_dep_H.append("0.0%")
    count_first.append(len(dep_df.CASEID.unique().tolist()))

for dep in department_list:
    dep_df = discharged_df_dep[discharged_df_dep['DEPARTMENT'] == dep]
    if len(dep_df) > 0:
        step = dep_df[dep_df['BELIEF'] >= 0.5]
        med = len(step[step['BELIEF'] < 0.75])
        high = len(step[step['BELIEF'] >= 0.75])
        alert_rate_last_dep_M.append("{:.1%}".format((med) / len(dep_df)))
        alert_rate_last_dep_H.append("{:.1%}".format((high) / len(dep_df)))
        total_alert_rate_last.append("{:.1%}".format(len(step)/len(dep_df)))
    else:
        alert_rate_last_dep_M.append("0.0%")
        alert_rate_last_dep_H.append("0.0%")
    count_last.append(len(dep_df.CASEID.unique().tolist()))
    
matrix_alert_rate = pd.DataFrame(columns=['department_code', 'count_adm','total_alerts_A', 'AR_A_M', 'AR_A_H', 'count_dis', 'total_alerts_D','AR_D_M', 'AR_D_H'])
counts_F = count * np.ones(len(count_first))
counts_L = count_L * np.ones(len(count_last))
matrix_alert_rate['AR_A_M'] = alert_rate_first_dep_M
matrix_alert_rate['AR_A_H'] = alert_rate_first_dep_H
matrix_alert_rate['AR_D_M'] = alert_rate_last_dep_M
matrix_alert_rate['AR_D_H'] = alert_rate_last_dep_H
matrix_alert_rate['department_code'] = department_list
matrix_alert_rate['total_alerts_A'] = total_alert_rate_first
matrix_alert_rate['total_alerts_D'] = total_alert_rate_last
matrix_alert_rate['count_dis'] = count_last
matrix_alert_rate['count_adm'] = count_first
ratio_dis = count_last/counts_L
ratio_adm = count_first/counts_F

matrix_alert_rate = matrix_alert_rate.rename(columns={'department_code': 'F'})
matrix_alert_rate = matrix_alert_rate.merge(departments, on='F', how='left')
matrix_alert_rate = matrix_alert_rate.rename(columns={'F': 'department_code'})
matrix_alert_rate = matrix_alert_rate.rename(columns={'DESCRIPTION': 'department'})
matrix_alert_rate = matrix_alert_rate.sort_values(by='count_dis', ascending = False).reset_index(drop=True)
matrix_alert_rate.to_csv(f'{path_log_file}/{use_case}/matrix_alert_rate.csv', index = False, sep = ";")

### 6.5. Calculate metrics per departments
Calculate sensitivity, specificity, ppv, auroc per department. Only include the responses that have been discharged 60 days before the last outcome file and admitted in the log period (start_date - end_date).

In [None]:
#Calculate metrics for each medical case
#If a medical case has at least one high risk alert then is considered as a positive prediction.
log_df = prediction_data.drop(prediction_data[prediction_data.LABEL == -1].index)
log_df['OBS_SPLIT'] = log_df.apply(lambda x: x.OBS.split(' '), axis=1)
log_df['DEPARTMENT'] = log_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])
log_df['PREDICTION_M'] = np.where(log_df['BELIEF'] >= 0.5,1,0)
log_df['PREDICTION_H'] = np.where(log_df['BELIEF'] >= 0.75,1,0)

log_df_dep = log_df.explode('DEPARTMENT')
log_df_dep = log_df_dep.sort_values(by = ['DEPARTMENT']).reset_index(drop = True)
log_df_dep = log_df_dep.groupby(by=["CASEID","DEPARTMENT"])[["CASEID","LABEL","PREDICTION_M","PREDICTION_H",'DEPARTMENT','OBS_SPLIT','BELIEF']].max()
department_list = log_df_dep['DEPARTMENT'].unique().tolist()

total_cases = []
pos_cases =[]
sensitivity_M = []
specificity_M = []
ppv_M = []
sensitivity_H = []
specificity_H = []
ppv_H = []
incidence = []
for dep in department_list:
    dep_df = log_df_dep[log_df_dep['DEPARTMENT'] == dep]
    tn, fp, fn, tp = confusion_matrix(dep_df.LABEL, dep_df.PREDICTION_M).ravel()
    specificity_M.append(tn / (tn+fp))
    sensitivity_M.append(tp / (tp+fn))
    ppv_M.append(tp / (tp+fp))
    tn, fp, fn, tp = confusion_matrix(dep_df.LABEL, dep_df.PREDICTION_H).ravel()
    specificity_H.append(tn / (tn+fp))
    sensitivity_H.append(tp / (tp+fn))
    ppv_H.append(tp / (tp+fp))
    incidence.append("{:.1%}".format(len(dep_df[dep_df["LABEL"]==1].CASEID.unique().tolist())/len(dep_df.CASEID.unique().tolist())))
    total_cases.append(len(dep_df.CASEID.unique().tolist()))
    pos_cases.append(len(dep_df[dep_df["LABEL"]==1].CASEID.unique().tolist()))

metrics_per_department_mc = pd.DataFrame(columns=['department_code', 'total_cases','pos_cases','incidence','sensitivity_M','specificity_M','ppv_M','sensitivity_H','specificity_H','ppv_H'])
metrics_per_department_mc['department_code'] = department_list
metrics_per_department_mc['total_cases'] = total_cases
metrics_per_department_mc['pos_cases'] = pos_cases
metrics_per_department_mc['incidence'] = incidence
metrics_per_department_mc['sensitivity_M'] = sensitivity_M
metrics_per_department_mc['specificity_M'] = specificity_M
metrics_per_department_mc['ppv_M'] = ppv_M
metrics_per_department_mc['sensitivity_H'] = sensitivity_H
metrics_per_department_mc['specificity_H'] = specificity_H
metrics_per_department_mc['ppv_H'] = ppv_H
metrics_per_department_mc = metrics_per_department_mc.sort_values(by='total_cases', ascending = False).reset_index(drop=True)
metrics_per_department_mc.to_csv(f'{path_log_file}/{use_case}/metrics_per_department.csv', index = False, sep = ";")

### 6.6. Calculate metrics of all medical cases

In [None]:
log_df = prediction_data.drop(prediction_data[prediction_data.LABEL == -1].index)
log_df['OBS_SPLIT'] = log_df.apply(lambda x: x.OBS.split(' '), axis=1)
log_df['DEPARTMENT'] = log_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])
log_df['PREDICTION_M'] = np.where(log_df['BELIEF'] >= 0.5,1,0)
log_df['PREDICTION_H'] = np.where(log_df['BELIEF'] >= 0.75,1,0)
log_df_all = log_df.groupby(by=["CASEID"])[["CASEID","LABEL","PREDICTION_M","PREDICTION_H",'DEPARTMENT','OBS_SPLIT','BELIEF']].max()

tn, fp, fn, tp = confusion_matrix(log_df_all.LABEL, log_df_all.PREDICTION_M).ravel()
specificity_M = (tn / (tn+fp))
sensitivity_M = (tp / (tp+fn))
ppv_M = (tp / (tp+fp))
tn, fp, fn, tp = confusion_matrix(log_df_all.LABEL, log_df_all.PREDICTION_H).ravel()
specificity_H = (tn / (tn+fp))
sensitivity_H = (tp / (tp+fn))
ppv_H = (tp / (tp+fp))
incidence = "{:.1%}".format(len(log_df_all[log_df_all["LABEL"]==1].CASEID.unique().tolist())/len(log_df_all.CASEID.unique().tolist()))
total_cases = len(log_df_all.CASEID.unique().tolist())
pos_cases = len(log_df_all[log_df_all["LABEL"]==1].CASEID.unique().tolist())
auroc_score = roc_auc_score(log_df_all.LABEL, log_df_all.BELIEF)
    
metrics_all = pd.DataFrame(columns=['total_cases','pos_cases','incidence','sensitivity_M','specificity_M','ppv_M','sensitivity_H','specificity_H','ppv_H'])
metrics_all['total_cases'] = [total_cases]
metrics_all['pos_cases'] = pos_cases
metrics_all['incidence'] = incidence
metrics_all['sensitivity_M'] = sensitivity_M
metrics_all['specificity_M'] = specificity_M
metrics_all['ppv_M'] = ppv_M
metrics_all['sensitivity_H'] = sensitivity_H
metrics_all['specificity_H'] = specificity_H
metrics_all['ppv_H'] = ppv_H
metrics_all['AUROC'] = auroc_score
metrics_all.to_csv(f'{path_log_file}/{use_case}/metrics_all_cases.csv', index = False, sep = ";")

### 6.7. Calculate metrics for all medical cases at discharge

In [None]:
discharged_df = discharged_df.drop(discharged_df[discharged_df.LABEL == -1].index)
discharged_df['OBS_SPLIT'] = discharged_df.apply(lambda x: x.OBS.split(' '), axis=1)
discharged_df['DEPARTMENT'] = discharged_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])
discharged_df['PREDICTION_M'] = np.where(discharged_df['BELIEF'] >= 0.5,1,0)
discharged_df['PREDICTION_H'] = np.where(discharged_df['BELIEF'] >= 0.75,1,0)
discharged_df_all = discharged_df.groupby(by=["CASEID"])[["CASEID","LABEL","PREDICTION_M","PREDICTION_H",'DEPARTMENT','OBS_SPLIT','BELIEF']].max()

tn, fp, fn, tp = confusion_matrix(discharged_df_all.LABEL, discharged_df_all.PREDICTION_M).ravel()
specificity_M = (tn / (tn+fp))
sensitivity_M = (tp / (tp+fn))
ppv_M = (tp / (tp+fp))
tn, fp, fn, tp = confusion_matrix(discharged_df_all.LABEL, discharged_df_all.PREDICTION_H).ravel()
specificity_H = (tn / (tn+fp))
sensitivity_H = (tp / (tp+fn))
ppv_H = (tp / (tp+fp))
incidence = "{:.1%}".format(len(discharged_df_all[discharged_df_all["LABEL"]==1].CASEID.unique().tolist())/len(discharged_df_all.CASEID.unique().tolist()))
total_cases = len(discharged_df_all.CASEID.unique().tolist())
pos_cases = len(discharged_df_all[discharged_df_all["LABEL"]==1].CASEID.unique().tolist())
auroc_score = roc_auc_score(discharged_df_all.LABEL, discharged_df_all.BELIEF)
 
metrics_dis_all = pd.DataFrame(columns=['total_cases','pos_cases','incidence','sensitivity_M','specificity_M','ppv_M','sensitivity_H','specificity_H','ppv_H'])
metrics_dis_all['total_cases'] = [total_cases]
metrics_dis_all['pos_cases'] = pos_cases
metrics_dis_all['incidence'] = incidence
metrics_dis_all['sensitivity_M'] = sensitivity_M
metrics_dis_all['specificity_M'] = specificity_M
metrics_dis_all['ppv_M'] = ppv_M
metrics_dis_all['sensitivity_H'] = sensitivity_H
metrics_dis_all['specificity_H'] = specificity_H
metrics_dis_all['ppv_H'] = ppv_H
metrics_dis_all['AUROC'] = auroc_score
metrics_dis_all.to_csv(f'{path_log_file}/{use_case}/metrics_dis_all_cases.csv', index = False, sep = ";")

### 6.8. Calculate metrics for all medical cases at admission

In [None]:
admission_df = admission_df.drop(admission_df[admission_df.LABEL == -1].index)
admission_df['OBS_SPLIT'] = admission_df.apply(lambda x: x.OBS.split(' '), axis=1)
admission_df['DEPARTMENT'] = admission_df.OBS_SPLIT.apply(lambda x: [f for f in x if f.startswith('DEPARTMENT')])
admission_df['PREDICTION_M'] = np.where(admission_df['BELIEF'] >= 0.5,1,0)
admission_df['PREDICTION_H'] = np.where(admission_df['BELIEF'] >= 0.75,1,0)
admission_df_all = admission_df.groupby(by=["CASEID"])[["CASEID","LABEL","PREDICTION_M","PREDICTION_H",'DEPARTMENT','OBS_SPLIT','BELIEF']].max()

tn, fp, fn, tp = confusion_matrix(admission_df_all.LABEL, admission_df_all.PREDICTION_M).ravel()
specificity_M = (tn / (tn+fp))
sensitivity_M = (tp / (tp+fn))
ppv_M = (tp / (tp+fp))
tn, fp, fn, tp = confusion_matrix(admission_df_all.LABEL, admission_df_all.PREDICTION_H).ravel()
specificity_H = (tn / (tn+fp))
sensitivity_H = (tp / (tp+fn))
ppv_H = (tp / (tp+fp))
incidence = "{:.1%}".format(len(admission_df_all[admission_df_all["LABEL"]==1].CASEID.unique().tolist())/len(admission_df_all.CASEID.unique().tolist()))
total_cases = len(admission_df_all.CASEID.unique().tolist())
pos_cases = len(admission_df_all[admission_df_all["LABEL"]==1].CASEID.unique().tolist())
auroc_score = roc_auc_score(admission_df_all.LABEL, admission_df_all.BELIEF)
    
metrics_adm_all = pd.DataFrame(columns=['total_cases','pos_cases','incidence','sensitivity_M','specificity_M','ppv_M','sensitivity_H','specificity_H','ppv_H'])
metrics_adm_all['total_cases'] = [total_cases]
metrics_adm_all['pos_cases'] = pos_cases
metrics_adm_all['incidence'] = incidence
metrics_adm_all['sensitivity_M'] = sensitivity_M
metrics_adm_all['specificity_M'] = specificity_M
metrics_adm_all['ppv_M'] = ppv_M
metrics_adm_all['sensitivity_H'] = sensitivity_H
metrics_adm_all['specificity_H'] = specificity_H
metrics_adm_all['ppv_H'] = ppv_H
metrics_adm_all['AUROC'] = auroc_score
metrics_adm_all.to_csv(f'{path_log_file}/{use_case}/metrics_adm_all_cases.csv', index = False, sep = ";")