In [1]:
import pandas as pd
import numpy as np
import datetime
from extract import get_full_data


def mean(arr):
    cleaned = arr[arr > 0]
    return np.mean(cleaned)

def std(arr):
    cleaned = arr[arr > 0]
    return np.std(cleaned)

def lwmean(arr):
    return np.mean(arr[-7:])


def lwmax(arr):
    return np.max(arr[-7:])


def blwmax(arr):
    return np.max(arr[-14:-7])


def blwmean(arr):
    return np.mean(arr[-14:-7])


def bblwmax(arr):
    return np.max(arr[-21:-14])


def bblwmean(arr):
    return np.mean(arr[-21:-14])


def bbblwmax(arr):
    return np.max(arr[-28:-21])


def bbblwmean(arr):
    return np.mean(arr[-28:-21])


def max_full(arr):
    return np.max(arr)


def summary_save(source, label, threshold=None):
    full_df = all_countries[all_countries['iso_code'].isin(source['iso_code'].unique())]
    full_df = full_df[full_df['date'] == today]
    full_df['case_per_millon'] = (1000000 * full_df['total_cases']) / full_df['population']
    full_df['tests_per_millon'] = (1000000 * full_df['total_tests']) / full_df['population']

    filtered = full_df[['iso_code', 'location', 'date', 'total_cases', 'new_cases', 'case_per_millon', 'tests_per_millon', 'total_tests', 'population']]
    if threshold is not None:
        filtered = filtered[filtered['case_per_millon'] > threshold]

    filtered.reset_index(inplace=True, drop=True)

    cap_mean = filtered['case_per_millon'].mean()
    cap_std = filtered['case_per_millon'].std()
    cap_max = filtered['case_per_millon'].max()
    cap_min = filtered['case_per_millon'].min()

    print(f"mean: {cap_mean}")
    print(f"std: {cap_std}")
    print(f"max: {cap_max}")
    print(f"min: {cap_min}")

    filtered.to_csv(f'data/{label}.csv', index=False)
    filtered.to_csv(f'data/date/{label}{today}.csv', index=False)
    return filtered

def get_summary(data, threshold=None):
    if threshold is not None:
        filtered = data[data['population'] > threshold]

    filtered = filtered[['iso_code', 'new_cases']]
    summary = filtered.groupby('iso_code').agg([mean, std, lwmean, blwmean, bblwmean, bbblwmean])
    summary.columns = summary.columns.droplevel(0)
    summary['descending'] = summary['mean'] > summary['lwmean']
    summary['closing'] = (summary['mean'] - summary['std'] / 2) > summary['lwmean']
    summary.reset_index(inplace=True)
    return summary

In [2]:
today_dt = datetime.date.today()
today = today_dt.strftime('%Y-%m-%d')
all_countries = get_full_data(force=True)
all_countries['cases_per_million'] = all_countries['total_cases'] / all_countries['population'] * 1000000

exclude_small = all_countries[all_countries['population'] > 500000]
small = all_countries[all_countries['population'] < 1000000]

small.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,total_deaths,new_deaths,total_cases_per_million,new_cases_per_million,...,cvd_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,Entity,Year,Population,cases_per_million
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,0.0,0.0,18.733,18.733,...,,11.62,,,,,Aruba,2019,106000.0,18.732555
1,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.0,0.0,37.465,18.733,...,,11.62,,,,,Aruba,2019,106000.0,37.465111
2,ABW,North America,Aruba,2020-03-24,12.0,8.0,0.0,0.0,112.395,74.93,...,,11.62,,,,,Aruba,2019,106000.0,112.395332
3,ABW,North America,Aruba,2020-03-25,17.0,5.0,0.0,0.0,159.227,46.831,...,,11.62,,,,,Aruba,2019,106000.0,159.22672
4,ABW,North America,Aruba,2020-03-26,19.0,2.0,0.0,0.0,177.959,18.733,...,,11.62,,,,,Aruba,2019,106000.0,177.959275


### Cuales estan descendiendo o cerrando su curva?

In [3]:
summary = get_summary(all_countries, threshold=500000)
summary

Unnamed: 0,iso_code,mean,std,lwmean,blwmean,bblwmean,bbblwmean,descending,closing
0,AFG,270.808989,286.453082,650.142857,718.000000,646.714286,513.714286,False,False
1,AGO,3.729730,3.414189,7.428571,0.285714,3.428571,1.714286,False,False
2,ALB,15.092784,9.545423,33.142857,15.714286,19.000000,8.000000,False,False
3,ARE,423.232323,287.678055,518.857143,624.571429,741.714286,868.142857,False,False
4,ARG,325.612903,389.669119,1182.142857,829.428571,694.428571,506.857143,False,False
...,...,...,...,...,...,...,...,...,...
164,VNM,5.060606,5.760047,0.714286,0.285714,0.428571,0.857143,True,True
165,YEM,16.022727,15.188944,33.142857,23.285714,14.000000,12.571429,False,False
166,ZAF,670.775510,884.664926,2823.285714,2143.714286,1374.857143,998.285714,False,False
167,ZMB,24.672727,41.660120,29.000000,13.857143,19.571429,34.428571,False,False


In [4]:
BOL = all_countries[all_countries['iso_code'] == 'BOL'][all_countries['date'] == today]['total_cases'].values[0]
bol_case_per_million = BOL / 11.63

print(f"BOL: {BOL}")
print(f"bol_case_per_million: {bol_case_per_million}")

BOL: 17842.0
bol_case_per_million: 1534.1358555460017


  """Entry point for launching an IPython kernel.


In [18]:
if not summary[summary['iso_code'] == 'BOL']['descending'].values[0]:
    factor = 2
else:
    factor =1

In [19]:
worse = exclude_small[exclude_small['cases_per_million'] > (bol_case_per_million * factor)][exclude_small['date'] == today]
worse = worse[worse['cases_per_million'] < 10000]
worse = worse.sort_values(['cases_per_million'], ascending=False)
worse = worse[['iso_code', 'location', 'date', 'total_cases', 'new_cases', 'cases_per_million', 'population']]
worse.reset_index(inplace=True, drop=True)
worst = worse.head(25)
worst.reset_index(inplace=True, drop=True)
worst.head(50)

  """Entry point for launching an IPython kernel.


Unnamed: 0,iso_code,location,date,total_cases,new_cases,cases_per_million,population
0,CHL,Chile,2020-06-14,167355.0,6509.0,8754.612382,19116209.0
1,KWT,Kuwait,2020-06-14,35466.0,514.0,8304.759817,4270563.0
2,SGP,Singapore,2020-06-14,40197.0,347.0,6870.879195,5850343.0
3,PER,Peru,2020-06-14,225132.0,4383.0,6828.007143,32971846.0
4,LUX,Luxembourg,2020-06-14,4063.0,8.0,6490.664179,625976.0
5,USA,United States,2020-06-14,2074526.0,25540.0,6267.400031,331002647.0
6,BLR,Belarus,2020-06-14,53241.0,721.0,5634.373094,9449321.0
7,ARM,Armenia,2020-06-14,16004.0,723.0,5400.855957,2963234.0
8,BEL,Belgium,2020-06-14,59918.0,99.0,5169.972845,11589616.0
9,IRL,Ireland,2020-06-14,25295.0,45.0,5122.73087,4937796.0


In [20]:
summary_save(worst, 'worst', threshold=bol_case_per_million)

mean: 5123.4682985366435
std: 1504.9402947318063
max: 8754.612381565821
min: 3541.9219588562223


Unnamed: 0,iso_code,location,date,total_cases,new_cases,case_per_millon,tests_per_millon,total_tests,population
0,ARE,United Arab Emirates,2020-06-14,41900.0,401.0,4236.431287,,,9890400.0
1,ARM,Armenia,2020-06-14,16004.0,723.0,5400.855957,,,2963234.0
2,BEL,Belgium,2020-06-14,59918.0,99.0,5169.972845,,,11589616.0
3,BLR,Belarus,2020-06-14,53241.0,721.0,5634.373094,,,9449321.0
4,BRA,Brazil,2020-06-14,850514.0,21704.0,4001.300173,,,212559409.0
5,CHE,Switzerland,2020-06-14,31011.0,31.0,3583.173746,,,8654618.0
6,CHL,Chile,2020-06-14,167355.0,6509.0,8754.612382,,,19116209.0
7,DJI,Djibouti,2020-06-14,4449.0,8.0,4503.027322,,,988002.0
8,GBR,United Kingdom,2020-06-14,294375.0,1425.0,4336.313565,,,67886004.0
9,IRL,Ireland,2020-06-14,25295.0,45.0,5122.73087,,,4937796.0


### Worse but small

In [21]:
worse_small = small[small['cases_per_million'] > (bol_case_per_million * factor)][small['date'] == today]
worse_small = worse_small.sort_values(['cases_per_million'], ascending=False)
worse_small = worse_small[['iso_code', 'location', 'date', 'total_cases', 'new_cases', 'cases_per_million', 'population']]
worse_small.reset_index(inplace=True, drop=True)
worse_small
summary_save(worse_small, 'worse_small')

mean: 6917.518683936204
std: 5303.519127242972
max: 20449.05415758147
min: 3096.7716402995857


  """Entry point for launching an IPython kernel.


Unnamed: 0,iso_code,location,date,total_cases,new_cases,case_per_millon,tests_per_millon,total_tests,population
0,AND,Andorra,2020-06-14,853.0,0.0,11039.927522,,,77265.0
1,DJI,Djibouti,2020-06-14,4449.0,8.0,4503.027322,,,988002.0
2,FLK,Falkland Islands,2020-06-14,13.0,0.0,3732.414585,,,3483.0
3,FRO,Faeroe Islands,2020-06-14,187.0,0.0,3826.869948,,,48865.0
4,GGY,Guernsey,2020-06-14,252.0,0.0,3758.277158,,,67052.0
5,GIB,Gibraltar,2020-06-14,176.0,0.0,5223.947048,,,33691.0
6,IMN,Isle of Man,2020-06-14,336.0,0.0,3951.45357,,,85032.0
7,ISL,Iceland,2020-06-14,1808.0,1.0,5298.168498,,,341250.0
8,JEY,Jersey,2020-06-14,313.0,0.0,3096.77164,,,101073.0
9,LUX,Luxembourg,2020-06-14,4063.0,8.0,6490.664179,,,625976.0


### Que paises estan cerrrando o en decenso de su curva de contagio?

In [22]:
summary_save(summary[summary['descending']][summary['closing']], 'closing', threshold=1000)

mean: 2955.3362142380824
std: 1563.7025786956174
max: 6490.664178818357
min: 1118.8504610691416


  """Entry point for launching an IPython kernel.


Unnamed: 0,iso_code,location,date,total_cases,new_cases,case_per_millon,tests_per_millon,total_tests,population
0,AUT,Austria,2020-06-14,17014.0,20.0,1889.101084,,,9006400.0
1,BEL,Belgium,2020-06-14,59918.0,99.0,5169.972845,,,11589616.0
2,CAN,Canada,2020-06-14,98399.0,467.0,2607.13769,,,37742157.0
3,CHE,Switzerland,2020-06-14,31011.0,31.0,3583.173746,,,8654618.0
4,CYP,Cyprus,2020-06-14,980.0,0.0,1118.850461,,,875899.0
5,DEU,Germany,2020-06-14,186269.0,247.0,2223.206367,,,83783945.0
6,DNK,Denmark,2020-06-14,12139.0,40.0,2095.748371,,,5792203.0
7,EST,Estonia,2020-06-14,1973.0,3.0,1487.329057,,,1326539.0
8,FIN,Finland,2020-06-14,7087.0,14.0,1279.076105,,,5540718.0
9,FRA,France,2020-06-14,156813.0,526.0,2402.398694,,,65273512.0


In [23]:
summary[summary['descending']]
summary_save(summary[summary['descending']], 'descending', threshold=1000)

mean: 2854.017028534325
std: 1436.9665832099552
max: 6490.664178818357
min: 1118.8504610691416


Unnamed: 0,iso_code,location,date,total_cases,new_cases,case_per_millon,tests_per_millon,total_tests,population
0,AUT,Austria,2020-06-14,17014.0,20.0,1889.101084,,,9006400.0
1,BEL,Belgium,2020-06-14,59918.0,99.0,5169.972845,,,11589616.0
2,CAN,Canada,2020-06-14,98399.0,467.0,2607.13769,,,37742157.0
3,CHE,Switzerland,2020-06-14,31011.0,31.0,3583.173746,,,8654618.0
4,CYP,Cyprus,2020-06-14,980.0,0.0,1118.850461,,,875899.0
5,DEU,Germany,2020-06-14,186269.0,247.0,2223.206367,,,83783945.0
6,DJI,Djibouti,2020-06-14,4449.0,8.0,4503.027322,,,988002.0
7,DNK,Denmark,2020-06-14,12139.0,40.0,2095.748371,,,5792203.0
8,ECU,Ecuador,2020-06-14,46356.0,578.0,2627.435377,,,17643060.0
9,EST,Estonia,2020-06-14,1973.0,3.0,1487.329057,,,1326539.0
