# Get the Data
https://datos.gob.mx/busca/dataset/informacion-referente-a-casos-covid-19-en-mexico

http://187.191.75.115/gobmx/salud/datos_abiertos/datos_abiertos_covid19.zip

In [88]:
# imports
import requests 
import pathlib
import os
import sys
from datetime import date, datetime
import zipfile
import pandas as pd

# to enable local imports
module_path = os.path.abspath('../code')
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
    
import dataFunctions
import resolveFunctions

today = date.today()
currentDir = pathlib.Path().absolute()
dataDir = os.path.join(currentDir, "data")

covidDataUrl = "http://187.191.75.115/gobmx/salud/datos_abiertos/datos_abiertos_covid19.zip"
datasetZipTodayPath = "{dataDir}\{date}.zip".format(dataDir=dataDir, date=today)

# get the cataloge
catalogeFilePath = os.path.join(currentDir, 'Catalogos_0412.xlsx')
df_cataloge = pd.read_excel(catalogeFilePath, sheet_name=None)

# Get the descriptors
descriptorsFilePath = os.path.join(currentDir, "Descriptores_0419.xlsx")
df_descriptors = pd.read_excel(descriptorsFilePath)


print("Data Dir: {}".format(dataDir))
print("Dataset .zip path: {}".format(datasetZipTodayPath))

D:\OneDrive - bwedu\code
Data Dir: D:\OneDrive - bwedu\covid19mexico\data
Dataset .zip path: D:\OneDrive - bwedu\covid19mexico\data\2020-06-15.zip


### Functions

In [78]:
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    open(save_path, 'wb').write(r.content)
    
    with zipfile.ZipFile(save_path, 'r') as zip_ref:
        zip_ref.extractall(dataDir)

def downloadTodaysData():
    if not os.path.isfile(datasetZipTodayPath):
        print("Downloading new covid-19 data...")
        download_url(url=covidDataUrl, save_path=datasetZipTodayPath)
    else:
        print("Todays data is already there!")
        
def getCsvFromDate(csvDate : date = today):
    if type(csvDate) is not date:
        raise Exception("The given date is no from the type: datetime.date")
    
    fileName = os.path.join(dataDir, "{year}{month:02d}{day}COVID19MEXICO.csv".format(year=str(csvDate.year)[2:4], # the last two numbers from the year
                                                                                 month=csvDate.month,
                                                                                 day=csvDate.day))
    if not os.path.isfile(fileName):
        raise Exception("Data from {} does not exists! Path: {}".format(csvDate, fileName))        
    
    return pd.read_csv(fileName, encoding = "ISO-8859-1")

In [83]:
downloadTodaysData()
df = getCsvFromDate(date(year=2020, month=6, day=13))
df.index.name = "Cases"
df.head()

Todays data is already there!


Unnamed: 0_level_0,FECHA_ACTUALIZACION,ID_REGISTRO,ORIGEN,SECTOR,ENTIDAD_UM,SEXO,ENTIDAD_NAC,ENTIDAD_RES,MUNICIPIO_RES,TIPO_PACIENTE,...,CARDIOVASCULAR,OBESIDAD,RENAL_CRONICA,TABAQUISMO,OTRO_CASO,RESULTADO,MIGRANTE,PAIS_NACIONALIDAD,PAIS_ORIGEN,UCI
Cases,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2020-06-13,04f3dd,2,3,25,2,25,25,6,1,...,2,2,2,2,1,1,99,MÃ©xico,99,97
1,2020-06-13,1b7c4b,2,3,27,1,27,27,5,2,...,1,2,2,2,2,1,99,MÃ©xico,99,2
2,2020-06-13,03f6dd,2,4,9,1,9,15,58,1,...,2,1,2,2,99,1,99,MÃ©xico,99,97
3,2020-06-13,09e350,2,4,15,1,15,15,37,1,...,2,2,2,2,99,1,99,MÃ©xico,99,97
4,2020-06-13,187fc7,2,4,15,2,15,15,122,2,...,2,2,2,2,99,1,99,MÃ©xico,99,2
