<a href="https://colab.research.google.com/github/mopreni/data_science/blob/master/covid19_marisol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Análisis básico del dataset sobre Covid19 publicado en el sitio de Kaggle.**
1. Curva de contagios en todos los países
2. Comparación de la curva de Argentina con el país de curva más alta
3. Geolocalización de contagios.
4. Impacto de la cuarentena en países que la implementaron.
5. La cuarentena en Argentina, ¿se parece a la de algún otro país?
6. Ver cómo impactó la cuarentena en países con más infectados.
7. Edad de los infectados
8. Velocidad de cambio de la cantidad de infectados en el tiempo
9. Asociar la velocidad de cambio a la estación del lugar (es esperable que en verano baje)
10. Comparación de las curvas de casos considerando el primer caso detectado para cada país

In [0]:
# Importar bibliotecas en esta celda
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns

In [0]:
# La siguiente línea es para ver las imagenes dentro del notebook
%matplotlib inline
# Acá configuramos el tamaño de las figuras
plt.rcParams['figure.figsize'] = (12,8)
# Seteamos opciones de pandas sobre las columnas y su ancho
pd.set_option('max_columns', 120)
pd.set_option('max_colwidth', 5000)

In [0]:
df = pd.read_csv("time_series_covid19_confirmed_global.csv", parse_dates = True)

In [0]:
df.info()

In [0]:
df.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20',
       '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20',
       '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20',
       '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20',
       '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20',
       '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20',
       '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20',
       '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20',
       '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20'],
      dtype='object')

In [0]:
df['Country/Region'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Brazil', 'Brunei', 'Bulgaria',
       'Burkina Faso', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Diamond Princess', 'Cuba', 'Cyprus',
       'Czechia', 'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Guinea', 'Guyana', 'Haiti', 'Holy See', 'Honduras', 'Hungary',
       'Iceland', 'India

In [0]:
df.describe()

Unnamed: 0,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20
count,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0,248.0
mean,22.273867,24.150757,2.237903,2.637097,3.794355,5.782258,8.540323,11.802419,22.491935,24.862903,33.201613,40.028226,48.540323,67.689516,80.165323,96.33871,111.431452,124.169355,138.673387,149.677419,161.895161,172.427419,180.653226,182.342742,243.419355,269.697581,278.346774,287.193548,295.395161,302.967742,304.995968,307.245968,309.754032,316.822581,318.379032,320.810484,324.217742,328.177419,333.653226,339.16129,346.818548,356.326613,364.137097,374.354839,383.548387,394.701613,410.487903,426.802419,442.826613,458.024194,478.306452,507.560484,517.548387,585.504032,629.439516,675.217742,732.153226,794.766129,866.21371,977.822581,1096.915323,1227.403226,1358.681452,1525.141129,1685.665323,1885.697581,2135.447581
std,24.606559,71.375287,28.249679,28.349563,35.289234,49.109818,68.626006,92.459715,226.958199,228.327056,314.657399,373.048807,459.063488,713.482561,862.519162,1063.035725,1252.7255,1408.239224,1588.629239,1725.024675,1885.61864,2018.686289,2122.665643,2123.007799,3063.268483,3456.478354,3573.45181,3696.109699,3810.758927,3918.176901,3940.346327,3966.405233,3980.430758,4070.610668,4070.641462,4083.578354,4115.278692,4140.90548,4167.423735,4188.604624,4217.412816,4255.908575,4271.262002,4284.007814,4295.78391,4310.301362,4328.031833,4345.874042,4362.764716,4382.783805,4399.25459,4433.44319,4443.987575,4540.003771,4624.205039,4722.379249,4839.493968,4978.900158,5165.948232,5465.975619,5830.445404,6272.70799,6724.478408,7350.711015,7984.45313,8831.851666,9978.870743
min,-41.4545,-135.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,8.390125,-15.212825,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,3.0,3.75,4.0,5.0,8.75,8.75,11.0,13.0
50%,24.487,20.97265,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,1.0,1.0,2.0,2.0,3.0,5.0,5.0,6.5,10.0,12.5,18.0,27.0,35.0,42.0,49.0,62.0,75.0,78.0,89.0,97.5,110.0
75%,41.435275,85.953175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0,6.0,7.0,8.0,11.0,15.0,16.5,21.25,22.25,32.75,34.5,42.5,60.25,67.5,82.75,105.25,116.25,136.0,163.5,177.25,220.5,254.5,305.25,328.75,381.5,411.75,467.0,524.5
max,71.7069,178.065,444.0,444.0,549.0,761.0,1058.0,1423.0,3554.0,3554.0,4903.0,5806.0,7153.0,11177.0,13522.0,16678.0,19665.0,22112.0,24953.0,27100.0,29631.0,31728.0,33366.0,33366.0,48206.0,54406.0,56249.0,58182.0,59989.0,61682.0,62031.0,62442.0,62662.0,64084.0,64084.0,64287.0,64786.0,65187.0,65596.0,65914.0,66337.0,66907.0,67103.0,67217.0,67332.0,67466.0,67592.0,67666.0,67707.0,67743.0,67760.0,67773.0,67781.0,67786.0,67790.0,67794.0,67798.0,67799.0,67800.0,67800.0,67800.0,67800.0,67800.0,67800.0,69176.0,74386.0,83836.0


In [0]:
suma = df[df['Country/Region'] =="Argentina"].sum()
suma

Province/State            0
Country/Region    Argentina
Lat                -38.4161
Long               -63.6167
1/22/20                   0
                    ...    
3/22/20                 266
3/23/20                 301
3/24/20                 387
3/25/20                 387
3/26/20                 502
Length: 69, dtype: object

In [0]:
df = pd.read_csv("covid_19_data.csv", parse_dates = True)


In [0]:
df.head(3)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0


In [0]:
df.describe()

Unnamed: 0,SNo,Confirmed,Deaths,Recovered
count,8811.0,8811.0,8811.0,8811.0
mean,4406.0,733.346272,27.071388,250.276132
std,2543.660944,5204.411092,266.861329,2806.600404
min,1.0,0.0,0.0,0.0
25%,2203.5,2.0,0.0,0.0
50%,4406.0,20.0,0.0,0.0
75%,6608.5,147.0,1.0,10.0
max,8811.0,74386.0,7503.0,60811.0


In [0]:
df.columns

Index(['SNo', 'ObservationDate', 'Province/State', 'Country/Region',
       'Last Update', 'Confirmed', 'Deaths', 'Recovered'],
      dtype='object')

In [0]:
df.['Country/Region']

In [4]:
df = pd.read_csv("drive/My Drive/Colab Notebooks/DS-ejercicios/Covid19/covid_19_data.csv", parse_dates = True)
df.head(3)


Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0


In [5]:
df = pd.read_csv("drive/My Drive/Colab Notebooks/DS-ejercicios/Covid19/COVID19_line_list_data.csv", parse_dates = True)
df.head(3)


Unnamed: 0,id,case_in_country,reporting date,Unnamed: 3,summary,location,country,gender,age,symptom_onset,If_onset_approximated,hosp_visit_date,exposure_start,exposure_end,visiting Wuhan,from Wuhan,death,recovered,symptom,source,link,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,1,,1/20/2020,,First confirmed imported COVID-19 pneumonia pa...,"Shenzhen, Guangdong",China,male,66.0,01/03/20,0.0,01/11/20,12/29/2019,01/04/20,1,0.0,0,0,,Shenzhen Municipal Health Commission,http://wjw.sz.gov.cn/wzx/202001/t20200120_1898...,,,,,,
1,2,,1/20/2020,,First confirmed imported COVID-19 pneumonia pa...,Shanghai,China,female,56.0,1/15/2020,0.0,1/15/2020,,01/12/20,0,1.0,0,0,,Official Weibo of Shanghai Municipal Health Co...,https://www.weibo.com/2372649470/IqogQhgfa?fro...,,,,,,
2,3,,1/21/2020,,First confirmed imported cases in Zhejiang: pa...,Zhejiang,China,male,46.0,01/04/20,0.0,1/17/2020,,01/03/20,0,1.0,0,0,,Health Commission of Zhejiang Province,http://www.zjwjw.gov.cn/art/2020/1/21/art_1202...,,,,,,


In [6]:
df = pd.read_csv("drive/My Drive/Colab Notebooks/DS-ejercicios/Covid19/COVID19_open_line_list.csv", parse_dates = True)
df.columns


Index(['ID', 'age', 'sex', 'city', 'province', 'country',
       'wuhan(0)_not_wuhan(1)', 'latitude', 'longitude', 'geo_resolution',
       'date_onset_symptoms', 'date_admission_hospital', 'date_confirmation',
       'symptoms', 'lives_in_Wuhan', 'travel_history_dates',
       'travel_history_location', 'reported_market_exposure',
       'additional_information', 'chronic_disease_binary', 'chronic_disease',
       'source', 'sequence_available', 'outcome', 'date_death_or_discharge',
       'notes_for_discussion', 'location', 'admin3', 'admin2', 'admin1',
       'country_new', 'admin_id', 'data_moderator_initials', 'Unnamed: 33',
       'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37',
       'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41',
       'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44'],
      dtype='object')

In [7]:
df.head(3)

Unnamed: 0,ID,age,sex,city,province,country,wuhan(0)_not_wuhan(1),latitude,longitude,geo_resolution,date_onset_symptoms,date_admission_hospital,date_confirmation,symptoms,lives_in_Wuhan,travel_history_dates,travel_history_location,reported_market_exposure,additional_information,chronic_disease_binary,chronic_disease,source,sequence_available,outcome,date_death_or_discharge,notes_for_discussion,location,admin3,admin2,admin1,country_new,admin_id,data_moderator_initials,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44
0,1.0,30,male,"Chaohu City, Hefei City",Anhui,China,1.0,31.64696,117.7166,admin3,18.01.2020,20.01.2020,22.01.2020,,yes,17.01.2020,Wuhan,,,,,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,,,,,,Chaohu City,Hefei City,Anhui,China,340181,,,,,,,,,,,,,
1,2.0,47,male,"Baohe District, Hefei City",Anhui,China,1.0,31.77863,117.3319,admin3,10.01.2020,21.01.2020,23.01.2020,,no,10.01.2020,"Luzhou Hunan, via Wuhan",,,,,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,,,,,,Baohe District,Hefei City,Anhui,China,340111,,,,,,,,,,,,,
2,3.0,49,male,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,15.01.2020,20.01.2020,23.01.2020,,no,10.01.2020,"Yinzhou Hunan, via Wuhan",,,,,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,,,,,High-Tech Zone,Shushan District,Hefei City,Anhui,China,340104,,,,,,,,,,,,,


In [8]:
df = pd.read_csv("drive/My Drive/Colab Notebooks/DS-ejercicios/Covid19/time_series_covid_19_confirmed.csv", parse_dates = True)
df.head(3)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,4,4,5,7,7,7,11,16,21,22,22,22,24,24,40,40,74,84,94
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,10,12,23,33,38,42,51,55,59,64,70,76,89,104,123,146,174
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,3,5,12,12,17,17,19,20,20,20,24,26,37,48,54,60,74,87,90,139,201,230,264,302,367
