In [None]:
import os
import pandas as pd
import numpy as np
import joblib
from IPython.display import clear_output
from datetime import timedelta
from constants import *

In [None]:
input_directory = 'in'
input_files = os.listdir(input_directory)
input_dates = {date[4:6]+'-'+date[2:4]+'-'+'20'+date[:2] : date for date in input_files}

In [None]:
def get_patients(patients, dtype):
    if dtype == 'Defunciones':
        patients = patients[patients['day_of_death'] != '9999-99-99' ]
        patients = patients[patients['result'] == 1]
        return pd.to_datetime(patients['day_of_death'].copy())
    if dtype == 'Confirmados':
        patients = patients[patients['result'] == 1 ]
        return pd.to_datetime(patients['onset_symptoms'].copy())
    if dtype == 'Negativos':
        patients = patients[patients['result'] == 2 ]
        return pd.to_datetime(patients['onset_symptoms'].copy())
    if dtype == 'Sospechosos':
        patients = patients[patients['result'] == 3 ]
        return pd.to_datetime(patients['onset_symptoms'].copy())
    if dtype == 'Activos':
        patients = patients[patients['result'] == 1 ]
        patients['onset_symptoms'] = pd.to_datetime(patients['onset_symptoms'])
        return patients
    if dtype == 'Hospitalizados':
        patients = patients[patients['result'] == 1 ]
        patients = patients[patients['patient_type'] == 2 ]
        return pd.to_datetime(patients['admission_date'])
    if dtype == 'Ambulantes':
        patients = patients[patients['result'] == 1 ]
        patients = patients[patients['patient_type'] == 1 ]
        return pd.to_datetime(patients['admission_date'])

In [None]:
def create_data_from_patients_files(types = 'all', save = False, actives_window = 14, dtypes=dtypes):
    
    data = {}
    if types != 'all':
        dtypes = types
    
    for state in state_names:
        data[state] = {}
        for dtype in dtypes:
            data[state][dtype] = {} 
            for today in input_dates.keys():
                base_date = pd.to_datetime('08-01-2020',dayfirst=True)
                data[state][dtype][today] = []
                
                patients = change_df_names(pd.read_csv(os.path.join( input_directory, input_dates[today] ), encoding='ANSI'))
                if state != 'Nacional':
                    patients = patients[patients['treated_at'] == inverse_dict_for_name_states[state] ]
                
                patients = get_patients(patients, dtype)
                
                if len(patients) == 0:
                    min_date = base_date
                else:
                    if dtype == 'Activos':
                        min_date = min(min(patients['onset_symptoms']), base_date)
                    else:
                        min_date = min(min(patients), base_date)
                            
                local_index = pd.date_range(start = min_date, end = pd.to_datetime(today, dayfirst=True)) 
                
                today_result = []
                
                if dtype == 'Actives':
                    active_patients = {key:0 for key in local_index}
                    
                    for ind, day_active in enumerate(patients['onset_symptoms']):
                        for _ in range(actives_window):
                            if day_active not in local_index:
                                break
                            elif patients['day_of_death'].iloc[ind] != '9999-99-99' and day_active > pd.to_datetime(patients['day_of_death'].iloc[ind]):
                                break
                            else:
                                active_patients[day_active] +=1
                                day_active = day_active + timedelta(days=1)
                    for date in local_index:
                        today_result.append(active_patients[date])
                else:
                    
                    for day in pd.to_datetime(local_index):
                        try:
                            today_result.append(list(patients).count(day))

                        except:
                            today_result.append(0)

                data[state][dtype][today] = today_result
                data[state][dtype][today+'_localindex'] = local_index
                print(f' Done for: {state}-{dtype}-{today} ')
                clear_output(wait=True)
                    
    if save == True:
        joblib.dump(data,'raw_data.pkl')
    
    return data

In [None]:
def make_csvs(input_data):
    for dtype in dtypes.keys():
        for date in input_dates.keys():
            base  = pd.read_csv('base.csv',encoding='ANSI')
            data = {}
            for state in input_data.keys():

                data[state] = {}
                data[state] = input_data[state][dtype][date]

            max_len = max([len(data[x]) for x in input_data.keys()])

            for state in input_data.keys():
                if len(data[state]) != max_len:
                    data[state] = [0]*( max_len - len(data[state]) ) +data[state]

            today = pd.to_datetime(date, dayfirst=True)
            data = pd.DataFrame.from_dict(data).T
            index = pd.date_range(start=(today - timedelta(days=len(data.columns)-1) ), end = today)
            index=[str(date)[8:10]+'-'+str(date)[5:7]+'-'+str(date)[:4] for date in index]
            data.columns = index
            data['nombre'] = data.index
            data['poblacion'] = base['poblacion'].values.copy()
            data['cve_ent'] = base['cve_ent'].values.copy()
            order = ['cve_ent','poblacion', 'nombre']+index
            data = data[order]
            data = data.reset_index(drop = True)

            data.to_csv(f'out/{dtypes[dtype]}{str(today)[5:7]+str(today)[8:10]}.csv', encoding='ANSI',index=False)