In [32]:
import os
import numpy as np
import pandas as pd
import csv

In [33]:
directory = "/home/fehrdelt/data_ssd/data/clinical_data/"
MISSING_VALUE = np.nan
#MISSING_VALUE = -1

In [34]:
def get_age(row):
    # returns -1 if age can't be computed
    
    if len(row[3])>0 and len(row[8])>0:
        birth = row[3].split('/')[2]
        entry = row[8].split(' ')[0].split('/')[2]
        return int(entry)-int(birth)
    
    else:
        return MISSING_VALUE

    
def get_hemocue(row):
    # hemoglobine
    try:
        hemocue = float(row[23])
    except:
        return MISSING_VALUE
    
    if hemocue < 200: #    <------- vÃ©rif
            return hemocue
    else: return MISSING_VALUE


def get_fracas_bassin(row):
    try:
        fracas = int(row[27])
    except:
        return MISSING_VALUE
    
    return fracas


def get_catecholamines(row):
    try:
        catecholamines = int(row[35])
    except:
        return MISSING_VALUE
    
    return catecholamines


def get_PAS(row):
    try:
        PAS = int(row[16])
    except:
        return MISSING_VALUE
    
    if PAS>0:return PAS
    else:return MISSING_VALUE


def get_PAD(row): #pression_arterielle_systolique_PAS_arrivee_du_smur
    try:
        PAD = int(row[17])
    except:
        return MISSING_VALUE
    
    if PAD>0:return PAD
    else:return MISSING_VALUE


def get_glasgow(row): #pression_arterielle_diastolique_PAD_arrivee_du_smur
    try:
        glasgow = int(row[21])
    except:
        return MISSING_VALUE
    
    return glasgow


def get_glasgow_moteur(row):
    try:
        glasgow_moteur = int(row[22])
    except:
        return MISSING_VALUE
    
    return glasgow_moteur


def get_anomalie_pupille(row):
    try:
        anomalie_pupille = int(row[26])
    except:
        return MISSING_VALUE
    
    return anomalie_pupille


def get_freq_cardiaque(row):
    try:
        freq_cardiaque = int(row[18])
    except:
        return MISSING_VALUE
    
    if freq_cardiaque>0: return freq_cardiaque
    else: return MISSING_VALUE


def get_ACR(row): # arret_cardio_respiratoire_massage
    try:
        ACR = int(row[29])
    except:
        return MISSING_VALUE
    
    return ACR


def get_penetrant(row):
    try:
        penetrant = int(row[14])
    except:
        return MISSING_VALUE
    
    return penetrant


def get_ischemie(row): # ischemie_du_membre
    try:
        ischemie = int(row[31])
    except:
        return MISSING_VALUE
    
    return ischemie


def get_hemorragie(row): # hemorragie_externe
    try:
        hemorragie = int(row[30])
    except:
        return MISSING_VALUE
    
    return hemorragie


def get_amputation(row): # hemorragie_externe
    try:
        amputation = int(row[28])
    except:
        return MISSING_VALUE
    
    return amputation

def get_neurochir(row):
    try:
        neurochir = int(row[85])
    except:
        return MISSING_VALUE

    return neurochir

def get_pic(row):
    try:
        pic = int(row[62])
    except:
        return MISSING_VALUE

    return pic

In [35]:
# hemocue = hemoglobine
# catecholamines = vasopresseur
# arret cardio respiratoire = ACR

df = pd.DataFrame(columns=['age', 'hemocue_initial', 'fracas_du_bassin', 'catecholamines', 
                                   'pression_arterielle_systolique_PAS_arrivee_du_smur', 
                                   'pression_arterielle_diastolique_PAD_arrivee_du_smur', 
                                   'score_glasgow_initial', 'score_glasgow_moteur_initial', 
                                   'anomalie_pupillaire_prehospitalier', 'frequence_cardiaque_FC_arrivee_du_smur', 
                                   'arret_cardio_respiratoire_massage', 'penetrant_objet', 'ischemie_du_membre', 
                                   'hemorragie_externe', 'amputation'])


with open(directory+"cleaned_data.csv") as csv_file:
    
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        
        if line_count>1:
            
            age = get_age(row)
            hemocue = get_hemocue(row)
            fracas_bassin = get_fracas_bassin(row)
            catecholamines = get_catecholamines(row)
            PAS = get_PAS(row)
            PAD = get_PAD(row)
            glasgow = get_glasgow(row)
            glasgow_moteur = get_glasgow_moteur(row)
            anomalie_pupille = get_anomalie_pupille(row)
            freq_cardiaque = get_freq_cardiaque(row)
            ACR = get_ACR(row)
            penetrant = get_penetrant(row)
            ischemie = get_ischemie(row)
            hemorragie = get_hemorragie(row)
            amputation = get_amputation(row)
            #print(amputation)
            
            df.loc[len(df.index)] = [age, hemocue, fracas_bassin, catecholamines, PAS, PAD, glasgow, 
                                     glasgow_moteur, anomalie_pupille, freq_cardiaque, ACR, penetrant,
                                    ischemie, hemorragie, amputation]
        
        
        
        
        line_count+=1
        

In [36]:
print(df.head())

    age  hemocue_initial  fracas_du_bassin  catecholamines  \
0  79.0              NaN               0.0             0.0   
1  52.0              NaN               0.0             0.0   
2  23.0              NaN               0.0             0.0   
3  42.0             13.1               0.0             0.0   
4  34.0             15.8               0.0             0.0   

   pression_arterielle_systolique_PAS_arrivee_du_smur  \
0                                              190.0    
1                                               87.0    
2                                              100.0    
3                                              101.0    
4                                              110.0    

   pression_arterielle_diastolique_PAD_arrivee_du_smur  score_glasgow_initial  \
0                                              103.0                     15.0   
1                                               49.0                     15.0   
2                                        

In [37]:
df.to_csv(directory+"cleaned_dataframe.csv", index=False)

In [38]:
# hemocue = hemoglobine
# catecholamines = vasopresseur
# arret cardio respiratoire = ACR

df_outcome = pd.DataFrame(columns=['neurochir+pic'])


with open(directory+"cleaned_data.csv") as csv_file:
    
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    
    for row in csv_reader:
        
        if line_count>1:
            
            neurochir = get_neurochir(row)
            pic = get_pic(row)

            combined = int(0)

            if neurochir==int(1) or pic == int(1):
                combined = int(1)
            
            if np.isnan(neurochir) and np.isnan(pic):
                combined = MISSING_VALUE
            
            df_outcome.loc[len(df_outcome.index)] = [combined]
        
        
        
        
        line_count+=1
        

In [39]:
print(df_outcome.head())

   neurochir+pic
0            0.0
1            0.0
2            0.0
3            0.0
4            0.0


In [40]:
df_outcome.to_csv(directory+"cleaned_dataframe_outcome.csv", index=False)