In [11]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

# Dependencies
import numpy as np
import pandas as pd
import requests
import unidecode
import datetime
import dateutil
import subprocess
import sys
import json
import tempfile
import os

# Install missing dependencies
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# PDFMiner pdfminer.six
try:
    from pdfminer.high_level import extract_text
except Exception:
    install('pdfminer.six')
    from pdfminer.high_level import extract_text

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

---

# Colombia Covid19 Pipeline
Dataset obtained from [Instituto Nacional de Salud](https://www.ins.gov.co/Noticias/Paginas/Coronavirus.aspx) daily report Covid19 from Colombia.

You can get the official dataset here: 
[INS - Official Report](https://www.datos.gov.co/Salud-y-Protecci-n-Social/Casos-positivos-de-COVID-19-en-Colombia/gt2j-8ykr)

The number of new cases are increasing day by day around the world.
This dataset has information about reported cases from 32 Colombia departments.

Also you can get the dataset Google COVID-19 Community Mobility Reports - Colombia.

You can view and collaborate to the analysis here:
[colombia_covid_19_analysis](https://www.kaggle.com/sebaxtian/colombia-covid-19-analysis) Kaggle Notebook Kernel.

---

## Data Sources

In [12]:
# Input data files are available in the "../input/" directory.
INPUT_DIR = './'
if os.path.split(os.path.abspath('.'))[-1] == 'src':
    INPUT_DIR = '../input'
# Output data files are available in the "../output/" directory.
OUTPUT_DIR = './'
if os.path.split(os.path.abspath('.'))[-1] == 'src':
    OUTPUT_DIR = '../output'
# Official Daily Report Until Now
URL_OFFICIAL_DATASET = 'https://www.datos.gov.co/api/views/gt2j-8ykr/rows.csv?accessType=DOWNLOAD'
# Official Daily Samples Processed
URL_SAMPLES_PROCESSED = 'https://infogram.com/api/live/flex/4524241a-91a7-4bbd-a58e-63c12fb2952f/96848e74-6055-4aa8-9944-502bf69ef6fc?'

---

## Official Covid19 Colombia Daily Report

In [13]:
# Official Daily Report Until Now
with requests.get(URL_OFFICIAL_DATASET) as official_dataset:
    with open(os.path.join(INPUT_DIR, 'covid19co_official.csv'), 'wb') as dataset_file:
        dataset_file.write(official_dataset.content)

In [14]:
# Open Official Daily Report
covid19co = pd.read_csv(os.path.join(INPUT_DIR, 'covid19co_official.csv'))
# Total Daily Report
covid19co.shape

(4561, 16)

In [15]:
# Show dataframe
covid19co.tail()

Unnamed: 0,ID de caso,Fecha de notificación,Codigo DIVIPOLA,Ciudad de ubicación,Departamento o Distrito,atención,Edad,Sexo,Tipo,Estado,País de procedencia,FIS,Fecha de muerte,Fecha diagnostico,Fecha recuperado,fecha reporte web
4556,4557,2020-04-20T00:00:00.000,11001,Bogotá D.C.,Bogotá D.C.,Casa,20,F,Relacionado,Leve,COLOMBIA,2020-04-20T00:00:00.000,- -,2020-04-23T00:00:00.000,,2020-04-23T00:00:00.000
4557,4558,2020-04-20T00:00:00.000,11001,Bogotá D.C.,Bogotá D.C.,Casa,33,F,Relacionado,Leve,COLOMBIA,2020-04-20T00:00:00.000,- -,2020-04-23T00:00:00.000,,2020-04-23T00:00:00.000
4558,4559,2020-04-18T00:00:00.000,76001,Cali,Valle del Cauca,Casa,69,F,En estudio,Leve,COLOMBIA,2020-04-13T00:00:00.000,- -,2020-04-23T00:00:00.000,,2020-04-23T00:00:00.000
4559,4560,2020-04-23T00:00:00.000,5001,Medellín,Antioquia,Casa,8,F,Relacionado,Leve,COLOMBIA,Asintomático,- -,2020-04-23T00:00:00.000,,2020-04-23T00:00:00.000
4560,4561,2020-04-23T00:00:00.000,47001,Santa Marta,Santa Marta D.T. y C.,Casa,33,M,Relacionado,Leve,COLOMBIA,Asintomático,- -,2020-04-23T00:00:00.000,,2020-04-23T00:00:00.000


In [16]:
# Show attributes
list(covid19co.columns.values)

['ID de caso',
 'Fecha de notificación',
 'Codigo DIVIPOLA',
 'Ciudad de ubicación',
 'Departamento o Distrito ',
 'atención',
 'Edad',
 'Sexo',
 'Tipo',
 'Estado',
 'País de procedencia',
 'FIS',
 'Fecha de muerte',
 'Fecha diagnostico',
 'Fecha recuperado',
 'fecha reporte web']

In [17]:
# Update Name Columns
# Remove Accents and Uppercase
covid19co.columns = [unidecode.unidecode(value).upper() for value in covid19co.columns]
# Show dataframe
covid19co.head()

Unnamed: 0,ID DE CASO,FECHA DE NOTIFICACION,CODIGO DIVIPOLA,CIUDAD DE UBICACION,DEPARTAMENTO O DISTRITO,ATENCION,EDAD,SEXO,TIPO,ESTADO,PAIS DE PROCEDENCIA,FIS,FECHA DE MUERTE,FECHA DIAGNOSTICO,FECHA RECUPERADO,FECHA REPORTE WEB
0,1,2020-03-02T00:00:00.000,11001,Bogotá D.C.,Bogotá D.C.,Recuperado,19,F,Importado,Leve,ITALIA,2020-02-27T00:00:00.000,- -,2020-03-06T00:00:00.000,2020-03-13T00:00:00.000,2020-03-06T00:00:00.000
1,2,2020-03-06T00:00:00.000,76111,Guadalajara de Buga,Valle del Cauca,Recuperado,34,M,Importado,Leve,ESPAÑA,2020-03-04T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-19T00:00:00.000,2020-03-09T00:00:00.000
2,3,2020-03-07T00:00:00.000,5001,Medellín,Antioquia,Recuperado,50,F,Importado,Leve,ESPAÑA,2020-02-29T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-15T00:00:00.000,2020-03-09T00:00:00.000
3,4,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,55,M,Relacionado,Leve,COLOMBIA,2020-03-06T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-26T00:00:00.000,2020-03-11T00:00:00.000
4,5,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,25,M,Relacionado,Leve,COLOMBIA,2020-03-08T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-23T00:00:00.000,2020-03-11T00:00:00.000


In [18]:
# Update texto to title text format
for attr in covid19co.columns:
    if covid19co[attr].dtypes == 'object':
        covid19co[attr] = covid19co[attr].transform(lambda value: str(value).title())
# Show dataframe
covid19co.head()

Unnamed: 0,ID DE CASO,FECHA DE NOTIFICACION,CODIGO DIVIPOLA,CIUDAD DE UBICACION,DEPARTAMENTO O DISTRITO,ATENCION,EDAD,SEXO,TIPO,ESTADO,PAIS DE PROCEDENCIA,FIS,FECHA DE MUERTE,FECHA DIAGNOSTICO,FECHA RECUPERADO,FECHA REPORTE WEB
0,1,2020-03-02T00:00:00.000,11001,Bogotá D.C.,Bogotá D.C.,Recuperado,19,F,Importado,Leve,Italia,2020-02-27T00:00:00.000,- -,2020-03-06T00:00:00.000,2020-03-13T00:00:00.000,2020-03-06T00:00:00.000
1,2,2020-03-06T00:00:00.000,76111,Guadalajara De Buga,Valle Del Cauca,Recuperado,34,M,Importado,Leve,España,2020-03-04T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-19T00:00:00.000,2020-03-09T00:00:00.000
2,3,2020-03-07T00:00:00.000,5001,Medellín,Antioquia,Recuperado,50,F,Importado,Leve,España,2020-02-29T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-15T00:00:00.000,2020-03-09T00:00:00.000
3,4,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,55,M,Relacionado,Leve,Colombia,2020-03-06T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-26T00:00:00.000,2020-03-11T00:00:00.000
4,5,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,25,M,Relacionado,Leve,Colombia,2020-03-08T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-23T00:00:00.000,2020-03-11T00:00:00.000


In [19]:
# Fill NaN Values
if covid19co.isna().sum().sum() > 0:
    covid19co.fillna(value='-', inplace=True)
# Show dataframe
covid19co.head()

Unnamed: 0,ID DE CASO,FECHA DE NOTIFICACION,CODIGO DIVIPOLA,CIUDAD DE UBICACION,DEPARTAMENTO O DISTRITO,ATENCION,EDAD,SEXO,TIPO,ESTADO,PAIS DE PROCEDENCIA,FIS,FECHA DE MUERTE,FECHA DIAGNOSTICO,FECHA RECUPERADO,FECHA REPORTE WEB
0,1,2020-03-02T00:00:00.000,11001,Bogotá D.C.,Bogotá D.C.,Recuperado,19,F,Importado,Leve,Italia,2020-02-27T00:00:00.000,- -,2020-03-06T00:00:00.000,2020-03-13T00:00:00.000,2020-03-06T00:00:00.000
1,2,2020-03-06T00:00:00.000,76111,Guadalajara De Buga,Valle Del Cauca,Recuperado,34,M,Importado,Leve,España,2020-03-04T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-19T00:00:00.000,2020-03-09T00:00:00.000
2,3,2020-03-07T00:00:00.000,5001,Medellín,Antioquia,Recuperado,50,F,Importado,Leve,España,2020-02-29T00:00:00.000,- -,2020-03-09T00:00:00.000,2020-03-15T00:00:00.000,2020-03-09T00:00:00.000
3,4,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,55,M,Relacionado,Leve,Colombia,2020-03-06T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-26T00:00:00.000,2020-03-11T00:00:00.000
4,5,2020-03-09T00:00:00.000,5001,Medellín,Antioquia,Recuperado,25,M,Relacionado,Leve,Colombia,2020-03-08T00:00:00.000,- -,2020-03-11T00:00:00.000,2020-03-23T00:00:00.000,2020-03-11T00:00:00.000


In [20]:
# Setup Date Format
def setup_date(value):
    try:
        value = value.split('T')[0].split('-')
        if len(value) == 3:
            value = value[2] + '/' + value[1] + '/' + value[0]
        else:
            value = '-'
    except IndexError:
        value = '-'
    if len(value) != 10 and len(value) != 1:
        value = '-'
    return value
# Date Columns
date_columns = list(filter(lambda value: value.find('FECHA') != -1 or value.find('FIS') != -1, covid19co.columns))
# For each date column
for date_column in date_columns:
    covid19co[date_column] = covid19co[date_column].transform(lambda value: setup_date(value))
# Show dataframe
covid19co.head()

Unnamed: 0,ID DE CASO,FECHA DE NOTIFICACION,CODIGO DIVIPOLA,CIUDAD DE UBICACION,DEPARTAMENTO O DISTRITO,ATENCION,EDAD,SEXO,TIPO,ESTADO,PAIS DE PROCEDENCIA,FIS,FECHA DE MUERTE,FECHA DIAGNOSTICO,FECHA RECUPERADO,FECHA REPORTE WEB
0,1,02/03/2020,11001,Bogotá D.C.,Bogotá D.C.,Recuperado,19,F,Importado,Leve,Italia,27/02/2020,-,06/03/2020,13/03/2020,06/03/2020
1,2,06/03/2020,76111,Guadalajara De Buga,Valle Del Cauca,Recuperado,34,M,Importado,Leve,España,04/03/2020,-,09/03/2020,19/03/2020,09/03/2020
2,3,07/03/2020,5001,Medellín,Antioquia,Recuperado,50,F,Importado,Leve,España,29/02/2020,-,09/03/2020,15/03/2020,09/03/2020
3,4,09/03/2020,5001,Medellín,Antioquia,Recuperado,55,M,Relacionado,Leve,Colombia,06/03/2020,-,11/03/2020,26/03/2020,11/03/2020
4,5,09/03/2020,5001,Medellín,Antioquia,Recuperado,25,M,Relacionado,Leve,Colombia,08/03/2020,-,11/03/2020,23/03/2020,11/03/2020


In [21]:
# Add Day, Month, Year, Month Name and Day Name for each Date

# Spanish
nombre_mes = ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio', 'Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre']
nombre_dia = ['Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes', 'Sábado', 'Domingo']

# Get day
def get_day(value):
    if value not in '-':
        return value.split('/')[0]
    return value
# Get month
def get_month(value):
    if value not in '-':
        return value.split('/')[1]
    return value
# Get year
def get_year(value):
    if value not in '-':
        return value.split('/')[2]
    return value
# Get month name
def get_month_name(value):
    if value not in '-':
        return nombre_mes[int(value.split('/')[1]) - 1]
    return value
# Get weekday
def get_weekday(value):
    if value not in '-':
        return nombre_dia[datetime.date(int(value.split('/')[2]), int(value.split('/')[1]), int(value.split('/')[0])).weekday()]
    return value

# For each date column
for date_column in date_columns:
    covid19co[date_column + ' DIA'] = covid19co[date_column].transform(lambda value: get_day(value))
    covid19co[date_column + ' MES'] = covid19co[date_column].transform(lambda value: get_month(value))
    covid19co[date_column + ' ANIO'] = covid19co[date_column].transform(lambda value: get_year(value))
    covid19co[date_column + ' NOMBRE MES'] = covid19co[date_column].transform(lambda value: get_month_name(value))
    covid19co[date_column + ' DIA SEMANA'] = covid19co[date_column].transform(lambda value: get_weekday(value))
# Show dataframe
covid19co.head()

Unnamed: 0,ID DE CASO,FECHA DE NOTIFICACION,CODIGO DIVIPOLA,CIUDAD DE UBICACION,DEPARTAMENTO O DISTRITO,ATENCION,EDAD,SEXO,TIPO,ESTADO,...,FECHA RECUPERADO DIA,FECHA RECUPERADO MES,FECHA RECUPERADO ANIO,FECHA RECUPERADO NOMBRE MES,FECHA RECUPERADO DIA SEMANA,FECHA REPORTE WEB DIA,FECHA REPORTE WEB MES,FECHA REPORTE WEB ANIO,FECHA REPORTE WEB NOMBRE MES,FECHA REPORTE WEB DIA SEMANA
0,1,02/03/2020,11001,Bogotá D.C.,Bogotá D.C.,Recuperado,19,F,Importado,Leve,...,13,3,2020,Marzo,Viernes,6,3,2020,Marzo,Viernes
1,2,06/03/2020,76111,Guadalajara De Buga,Valle Del Cauca,Recuperado,34,M,Importado,Leve,...,19,3,2020,Marzo,Jueves,9,3,2020,Marzo,Lunes
2,3,07/03/2020,5001,Medellín,Antioquia,Recuperado,50,F,Importado,Leve,...,15,3,2020,Marzo,Domingo,9,3,2020,Marzo,Lunes
3,4,09/03/2020,5001,Medellín,Antioquia,Recuperado,55,M,Relacionado,Leve,...,26,3,2020,Marzo,Jueves,11,3,2020,Marzo,Miércoles
4,5,09/03/2020,5001,Medellín,Antioquia,Recuperado,25,M,Relacionado,Leve,...,23,3,2020,Marzo,Lunes,11,3,2020,Marzo,Miércoles


## Covid19 Colombia Dataset
> ***Output file***: covid19co.csv

In [22]:
# Save dataframe
covid19co.to_csv(os.path.join(OUTPUT_DIR, 'covid19co.csv'), index=False)

---

## Official Covid19 Colombia Samples Processed

In [23]:
# Official Samples Processed Until Now
with requests.get(URL_SAMPLES_PROCESSED) as official_dataset:
    with open(os.path.join(INPUT_DIR, 'covid19co_samples_processed_official.json'), 'w') as json_file:
        json_data = official_dataset.json()
        del json_data['refreshed']
        json.dump(json_data, json_file, ensure_ascii=False, indent=4)

In [24]:
# Open Official Samples Processed
with open(os.path.join(INPUT_DIR, 'covid19co_samples_processed_official.json')) as official_dataset:
    official_dataset = json.load(official_dataset)
# Official Samples Processed
official_dataset = official_dataset['data'][0]
covid19co_samples_processed = pd.DataFrame(columns=official_dataset[0], data=official_dataset[1:])
# Total Daily Report
covid19co_samples_processed.shape

(52, 2)

In [25]:
# Show dataframe
covid19co_samples_processed.head()

Unnamed: 0,Fecha,Muestras procesadas según fecha de resultado acumuladas
0,Sin fecha,36
1,3/4/20,26
2,3/5/20,27
3,3/6/20,72
4,3/7/20,140


In [26]:
# Update Name Columns
# Remove Accents and Uppercase
covid19co_samples_processed.columns = [unidecode.unidecode(value).upper() for value in covid19co_samples_processed.columns]
# Show dataframe
covid19co_samples_processed.head()

Unnamed: 0,FECHA,MUESTRAS PROCESADAS SEGUN FECHA DE RESULTADO ACUMULADAS
0,Sin fecha,36
1,3/4/20,26
2,3/5/20,27
3,3/6/20,72
4,3/7/20,140


In [27]:
# Setup Date Format
def setup_date_samples(value):
    #print('date:', value)
    try:
        value = value.split('/')
        #print(len(value))
        if len(value) == 3:
            # Month
            if len(value[0]) == 1:
                value[0] = '0' + value[0]
            # Day
            if len(value[1]) == 1:
                value[1] = '0' + value[1]
            # Year
            if len(value[2]) == 2:
                value[2] = value[2] + '20'
            # Date
            value = value[1] + '/' + value[0] + '/' + value[2]
        else:
            value = '-'
    except IndexError:
        value = '-'
    #print('VALUE:', value)
    if len(value) != 10 and len(value) != 1:
        value = '-'
    return value
# Setup Date Format
covid19co_samples_processed['FECHA'] = covid19co_samples_processed['FECHA'].transform(lambda value: setup_date_samples(value))
# Show dataframe
covid19co_samples_processed.head()

Unnamed: 0,FECHA,MUESTRAS PROCESADAS SEGUN FECHA DE RESULTADO ACUMULADAS
0,-,36
1,04/03/2020,26
2,05/03/2020,27
3,06/03/2020,72
4,07/03/2020,140


## Covid19 Colombia Samples Processed Dataset
> ***Output file***: covid19co_samples_processed.csv

In [28]:
# Save dataframe
covid19co_samples_processed.to_csv(os.path.join(OUTPUT_DIR, 'covid19co_samples_processed.csv'), index=False)

---

## Google Community Mobility Reports - Colombia

In [29]:
# Google Community Mobility Reports - Colombia
google_community_mobility_reports = pd.DataFrame(columns=['date', 'country', 'file', 'url'])
google_community_mobility_reports['date'] = [dti.strftime('%Y-%m-%d') for dti in pd.date_range(start='2020-03-29', end=datetime.date.today().isoformat(), freq='D')]
google_community_mobility_reports['country'] = 'Colombia'
google_community_mobility_reports['file'] = [date + '_CO_Mobility_Report_en.pdf' for date in google_community_mobility_reports['date'].values]
# Get URL report
def get_report_url(file):
    with requests.get('https://www.gstatic.com/covid19/mobility/' + file) as community_mobility_report:
        if community_mobility_report.status_code == 200:
            return community_mobility_report.url
        else:
            return np.nan
# Get URL report
google_community_mobility_reports['url'] = google_community_mobility_reports['file'].transform(lambda value: get_report_url(value))
# Drop any report without URL
google_community_mobility_reports.dropna(inplace=True)
# Reset index
google_community_mobility_reports.reset_index(inplace=True, drop=True)
# Show dataframe
google_community_mobility_reports.head()

Unnamed: 0,date,country,file,url
0,2020-03-29,Colombia,2020-03-29_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
1,2020-04-05,Colombia,2020-04-05_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
2,2020-04-11,Colombia,2020-04-11_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
3,2020-04-17,Colombia,2020-04-17_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...


In [30]:
# Get/Add Mobility Changes
def get_mobility_changes(URL):
    # Target changes
    targets = ['Retail & recreation', 'Grocery & pharmacy', 'Parks', 'Transit stations', 'Workplaces', 'Residential']
    # Mobility Changes
    mobility_changes = []
    # Get Mobility Report
    with requests.get(URL) as mobility_report:
        if mobility_report.status_code == 200:
            temp = tempfile.NamedTemporaryFile()
            temp.write(mobility_report.content)
            with open(temp.name, 'rb') as file:
                # By pages
                pdf_text = []
                page = 0
                while page != -1:
                    text = extract_text(file, maxpages=1, page_numbers=[page])
                    if text:
                        pdf_text.append(text.split('\n'))
                        page += 1
                    else:
                        page = -1
                # Page 1
                page1 = pdf_text[0]
                page1 = filter(lambda value: value != '', page1)
                page1 = filter(lambda value: value in targets or value[-1] == '%', list(page1))
                page1 = list(page1)[:6]
                # Page 2
                page2 = pdf_text[1]
                page2 = filter(lambda value: value != '', page2)
                page2 = filter(lambda value: value in targets or value[-1] == '%', list(page2))
                page2 = list(page2)[:6]
                # Merge
                mobility_changes = page1 + page2
    return mobility_changes
# Add Mobility Changes
google_community_mobility_reports['mobility_changes'] = google_community_mobility_reports['url'].transform(lambda value: get_mobility_changes(value))
# By case
google_community_mobility_reports['Retail & recreation'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[1])
google_community_mobility_reports['Grocery & pharmacy'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[3])
google_community_mobility_reports['Parks'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[5])
google_community_mobility_reports['Transit stations'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[7])
google_community_mobility_reports['Workplaces'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[9])
google_community_mobility_reports['Residential'] = google_community_mobility_reports['mobility_changes'].transform(lambda value: value[11])
# Drop column
google_community_mobility_reports.drop(columns=['mobility_changes'], inplace=True)
# Sort columns
google_community_mobility_reports = google_community_mobility_reports[['date', 'country', 'Retail & recreation', 'Grocery & pharmacy', 'Parks', 'Transit stations', 'Workplaces', 'Residential', 'file', 'url']]
# Setup date format
google_community_mobility_reports['date'] = [value.strftime('%d/%m/%Y') for value in pd.to_datetime(google_community_mobility_reports['date'], format='%Y-%m-%d')]
# Show dataframe
google_community_mobility_reports.head()

Unnamed: 0,date,country,Retail & recreation,Grocery & pharmacy,Parks,Transit stations,Workplaces,Residential,file,url
0,29/03/2020,Colombia,-86%,-68%,-80%,-82%,-58%,+26%,2020-03-29_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
1,05/04/2020,Colombia,-85%,-66%,-79%,-81%,-57%,+26%,2020-04-05_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
2,11/04/2020,Colombia,-84%,-65%,-77%,-81%,-69%,+32%,2020-04-11_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...
3,17/04/2020,Colombia,-76%,-49%,-67%,-75%,-66%,+35%,2020-04-17_CO_Mobility_Report_en.pdf,https://www.gstatic.com/covid19/mobility/2020-...


## Google Community Mobility Reports - Colombia
> ***Output file***: google_community_mobility_reports.csv

In [31]:
# Save dataframe
google_community_mobility_reports.to_csv(os.path.join(OUTPUT_DIR, 'google_community_mobility_reports.csv'), index=False)

---