In [21]:
import pandas as pd
import numpy as np
import glob
import re
import matplotlib as plt
import plotly.express as px

Citation: 'Suggested citation: Pan American Health Organization / World Health Organization. Zika suspected and confirmed cases reported by countries and territories in the Americas Cumulative cases, 2015-2016. Updated as of 23 November 2016. Washington, D.C.: PAHO/WHO; 2016; Pan American Health Organization • www.paho.org • © PAHO/WHO, 2016'

In [2]:
pwd

'/Users/Lupine/Documents/Projects/DS4A/Projecting Infectious Disease 139/zika'

#### Writing Datasets

In [24]:
filepath = r'/Users/Lupine/Documents/Projects/DS4A/Projecting Infectious Disease 139/zika/paho zika'
all_files = glob.glob(filepath + "/*.xls")

li = []

for filename in all_files:
    df = pd.read_excel(filename, header=6)
#     len('/Users/Lupine/Documents/Projects/DS4A/Projecting Infectious Disease 139/paho zika/')
    date = filename[87:]
    date = date[:11]
    df = df.iloc[:,:12]
    df.columns=['Country/Territory', 'Suspected', 'Confirmed'
                    , 'Imported Cases' , 'Incidence Rate'
                    , 'Zika Case Deaths' , 'Zika Congenital Syndrome'
                    , 'Population x1000e', 'Congenital Suspected'
                    , 'Congenital Probable', 'GBS',
                    'Confirmed congenital syndrome']
    df['Date'] = date
    df = df.dropna(thresh=5)
    li.append(df)
    
zika16to18 = pd.concat(li).reset_index()

### Preparing Frame

In [26]:
Date = zika16to18['Date'].str.split(r"\-", n=2, expand=True)
Date.columns=['Year','Month','Day']
Date['Day'] = Date['Day'].str.replace('-','')
Date['Month'] = Date['Month'].str.replace('ago','aug').replace('June','jun')
zika16to18 = zika16to18.join(Date)
zika16to18['Date'] = pd.to_datetime(zika16to18['Year'].astype(str) + zika16to18['Month'] + zika16to18['Day'].astype(str), format='%Y%b%d')
zika16to18 = zika16to18.sort_values(by='Date').reset_index().iloc[:,1:]
zika16to18['Country/Territory'] = zika16to18['Country/Territory'].str.replace('\d+', '', regex=True).replace('[¹²³⁴⁵⁶⁷⁸⁹⁰]*', '', regex=True)
zika16to18.drop(['Congenital Suspected', 'Congenital Probable', 'GBS', 'Confirmed congenital syndrome'], axis=1)
zika16to18 = zika16to18[1:]
zika16to18 = zika16to18[zika16to18['Country/Territory'].notna()]

In [27]:
zika16to18

Unnamed: 0,index,Country/Territory,Suspected,Confirmed,Imported Cases,Incidence Rate,Zika Case Deaths,Zika Congenital Syndrome,Population x1000e,Congenital Suspected,Congenital Probable,GBS,Confirmed congenital syndrome,Date,Year,Month,Day
1,40,Paraguay,546,12,0.0,8.297398,0.0,2.0,6725,,,,,2016-11-17,2016,nov,17
2,41,Uruguay,0,0,1.0,0.000000,0.0,0.0,344,,,,,2016-11-17,2016,nov,17
3,42,Subtotal,2367,38,57.0,3.483084,0.0,3.0,69048,0,0,0,0,2016-11-17,2016,nov,17
4,44,Anguilla,40,5,1.0,264.705882,0.0,0.0,17,,,,,2016-11-17,2016,nov,17
5,45,Antigua and Barbuda,393,14,2.0,432.978723,0.0,0.0,94,,,,,2016-11-17,2016,nov,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3715,35,Subtotal,172905,17073,41.0,136.072772,0.0,276.0,139615,195,216,417,0,2017-12-21,2017,dec,21
3716,36,Brazil,231725,137288,0.0,176.095308,11.0,2952.0,209553,9289,2989,,,2017-12-21,2017,dec,21
3717,38,Argentina,539,278,41.0,1.854290,0.0,5.0,44060,,,,,2017-12-21,2017,dec,21
3718,3,Canada,0,0,544.0,0.000000,0.0,1.0,36284,0,0,0,0,2017-12-21,2017,dec,21


#### Making reports not cumulative

In [28]:
def noncumulative(df, cumulativecolumns, groupbythis):
    for cumulativecol in cumulativecolumns:
        non_cumulative_name = "Noncumulative " + cumulativecol
        df[non_cumulative_name] = df.groupby(groupbythis)[cumulativecol].shift(0) - df.groupby(groupbythis)[cumulativecol].shift(1)
    return df

In [29]:
zikacumulative = ['Suspected', 'Confirmed', 'Zika Case Deaths', 'Zika Congenital Syndrome']
noncumulative(zika16to18, cumulativecolumns=zikacumulative, groupbythis='Country/Territory')

Unnamed: 0,index,Country/Territory,Suspected,Confirmed,Imported Cases,Incidence Rate,Zika Case Deaths,Zika Congenital Syndrome,Population x1000e,Congenital Suspected,...,GBS,Confirmed congenital syndrome,Date,Year,Month,Day,Noncumulative Suspected,Noncumulative Confirmed,Noncumulative Zika Case Deaths,Noncumulative Zika Congenital Syndrome
1,40,Paraguay,546,12,0.0,8.297398,0.0,2.0,6725,,...,,,2016-11-17,2016,nov,17,,,,
2,41,Uruguay,0,0,1.0,0.000000,0.0,0.0,344,,...,,,2016-11-17,2016,nov,17,,,,
3,42,Subtotal,2367,38,57.0,3.483084,0.0,3.0,69048,0,...,0,0,2016-11-17,2016,nov,17,,,,
4,44,Anguilla,40,5,1.0,264.705882,0.0,0.0,17,,...,,,2016-11-17,2016,nov,17,,,,
5,45,Antigua and Barbuda,393,14,2.0,432.978723,0.0,0.0,94,,...,,,2016-11-17,2016,nov,17,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3715,35,Subtotal,172905,17073,41.0,136.072772,0.0,276.0,139615,195,...,417,0,2017-12-21,2017,dec,21,82718,-25163,-5.0,131.0
3716,36,Brazil,231725,137288,0.0,176.095308,11.0,2952.0,209553,9289,...,,,2017-12-21,2017,dec,21,0,0,0.0,0.0
3717,38,Argentina,539,278,41.0,1.854290,0.0,5.0,44060,,...,,,2017-12-21,2017,dec,21,0,0,0.0,0.0
3718,3,Canada,0,0,544.0,0.000000,0.0,1.0,36284,0,...,0,0,2017-12-21,2017,dec,21,0,0,0.0,0.0


In [42]:
zika16to18.to_csv('zika16to18.csv',index=False)

In [18]:
cdcdata = pd.read_csv('cdc_zika.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [19]:
cdcdata

Unnamed: 0,report_date,location,location_type,data_field,data_field_code,time_period,time_period_type,value,unit
0,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_local_cases,AR0001,,,0,cases
1,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_local_cases,AR0002,,,0,cases
2,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_imported_cases,AR0003,,,2,cases
3,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_imported_cases,AR0004,,,1,cases
4,2016-03-19,Argentina-Buenos_Aires,province,cumulative_cases_under_study,AR0005,,,127,cases
...,...,...,...,...,...,...,...,...,...
107614,2016-06-28,United_States_Virgin_Islands,territory,confirmed_conjunctivitis,VI0017,,,7.0,cases
107615,2016-06-28,United_States_Virgin_Islands,territory,confirmed_eyepain,VI0018,,,13.0,cases
107616,2016-06-28,United_States_Virgin_Islands,territory,confirmed_headache,VI0019,,,14.0,cases
107617,2016-06-28,United_States_Virgin_Islands,territory,confirmed_malaise,VI0020,,,5.0,cases


In [30]:
pd.get_dummies(cdcdata, columns=['data_field'])

Unnamed: 0,report_date,location,location_type,data_field_code,time_period,time_period_type,value,unit,data_field_GBS_reported_cumulative_2015-2016,data_field_GBS_reported_cumulative_2015-2016_flavi,...,data_field_zika_pending,data_field_zika_reported,data_field_zika_reported_local,data_field_zika_reported_travel,data_field_zika_suspected,data_field_zika_suspected_4weeks,data_field_zika_suspected_clinic,data_field_zika_suspected_cumulative,data_field_zika_suspected_pregnant,data_field_zika_suspected_pregnant_cumulative
0,2016-03-19,Argentina-Buenos_Aires,province,AR0001,,,0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2016-03-19,Argentina-Buenos_Aires,province,AR0002,,,0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2016-03-19,Argentina-Buenos_Aires,province,AR0003,,,2,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2016-03-19,Argentina-Buenos_Aires,province,AR0004,,,1,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2016-03-19,Argentina-Buenos_Aires,province,AR0005,,,127,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107614,2016-06-28,United_States_Virgin_Islands,territory,VI0017,,,7.0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
107615,2016-06-28,United_States_Virgin_Islands,territory,VI0018,,,13.0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
107616,2016-06-28,United_States_Virgin_Islands,territory,VI0019,,,14.0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0
107617,2016-06-28,United_States_Virgin_Islands,territory,VI0020,,,5.0,cases,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
cdcdata['data_field'].value_counts()

zika_confirmed_laboratory       28963
zika_suspected                  28963
zika_suspected_clinic           16170
zika_confirmed_clinic           12793
yearly_reported_travel_cases     1035
                                ...  
Zika_confirmed_F                    1
confirmed_age_35-49                 1
confirmed_age_25-34                 1
confirmed_age_60_plus               1
zika_confirmed_2weeks               1
Name: data_field, Length: 148, dtype: int64

In [40]:
cdcdata[cdcdata['data_field'] == 'zika_confirmed_laboratory']['location'].value_counts()

Colombia-Cundinamarca-Tocancipa             25
Colombia-Guajira-Dibulla                    25
Colombia-Choco-Atrato                       25
Colombia-Bolivar-Cantagallo                 25
Colombia-Bolivar-El_Carmen_De_Bolivar       25
                                            ..
Colombia-Bogota-Usaquen_Santa_Barbara        4
Colombia-Bogota-Fontibon_Fontibon            4
Colombia-Bogota-Tunjuelito_Tunjuelito        4
Colombia-Bogota-Puente_Aranda_San_Rafael     4
Colombia-Bogota-Tunjuelito_Venecia           4
Name: location, Length: 1181, dtype: int64

In [35]:
cdcdata

Unnamed: 0,report_date,location,location_type,data_field,data_field_code,time_period,time_period_type,value,unit
0,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_local_cases,AR0001,,,0,cases
1,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_local_cases,AR0002,,,0,cases
2,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_imported_cases,AR0003,,,2,cases
3,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_imported_cases,AR0004,,,1,cases
4,2016-03-19,Argentina-Buenos_Aires,province,cumulative_cases_under_study,AR0005,,,127,cases
...,...,...,...,...,...,...,...,...,...
107614,2016-06-28,United_States_Virgin_Islands,territory,confirmed_conjunctivitis,VI0017,,,7.0,cases
107615,2016-06-28,United_States_Virgin_Islands,territory,confirmed_eyepain,VI0018,,,13.0,cases
107616,2016-06-28,United_States_Virgin_Islands,territory,confirmed_headache,VI0019,,,14.0,cases
107617,2016-06-28,United_States_Virgin_Islands,territory,confirmed_malaise,VI0020,,,5.0,cases


In [33]:
cdcdata

Unnamed: 0,report_date,location,location_type,data_field,data_field_code,time_period,time_period_type,value,unit
0,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_local_cases,AR0001,,,0,cases
1,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_local_cases,AR0002,,,0,cases
2,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_imported_cases,AR0003,,,2,cases
3,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_imported_cases,AR0004,,,1,cases
4,2016-03-19,Argentina-Buenos_Aires,province,cumulative_cases_under_study,AR0005,,,127,cases
...,...,...,...,...,...,...,...,...,...
107614,2016-06-28,United_States_Virgin_Islands,territory,confirmed_conjunctivitis,VI0017,,,7.0,cases
107615,2016-06-28,United_States_Virgin_Islands,territory,confirmed_eyepain,VI0018,,,13.0,cases
107616,2016-06-28,United_States_Virgin_Islands,territory,confirmed_headache,VI0019,,,14.0,cases
107617,2016-06-28,United_States_Virgin_Islands,territory,confirmed_malaise,VI0020,,,5.0,cases


In [None]:
cdcdata

#### Exploratory Data Analysis Visualization

- Proportion of Infections across countries

In [105]:
maxcases = zika16to18.groupby('Country/Territory').max().reset_index()

In [113]:
maxcases = maxcases[maxcases['Country/Territory'] != 'Subtotal ']

In [114]:
maxcases

Unnamed: 0,Country/Territory,index,Suspected,Confirmed,Imported Cases,Incidence Rate,Zika Case Deaths,Zika Congenital Syndrome,Population x1000e,Date,Year,Month,Day
0,Anguilla,44,58,23,1.0,388.235294,0.0,0.0,17,2017-12-21,2017,sep,9
1,Antigua and Barbuda,45,540,25,2.0,601.06383,0.0,0.0,94,2017-12-21,2017,sep,9
2,Argentina,38,2251,278,41.0,5.288243,0.0,5.0,44060,2017-12-21,2017,sep,9
3,Aruba,46,1208,703,7.0,1676.315789,0.0,0.0,114,2017-12-21,2017,sep,9
4,Bahamas,47,531,25,3.0,140.759494,0.0,0.0,395,2017-12-21,2017,sep,9
5,Barbados,48,715,150,0.0,296.232877,0.0,1.0,292,2017-12-21,2017,sep,9
6,Belize,10,2005,355,0.0,636.118598,0.0,0.0,371,2017-12-21,2017,sep,9
7,Belize,10,816,73,0.0,242.234332,0.0,0.0,371,2017-05-04,2017,nov,9
8,Bermuda,2,0,0,6.0,0.0,0.0,0.0,71,2017-12-21,2017,sep,9
9,Bolivia (Plurinational State of),30,2636,806,4.0,31.373621,0.0,14.0,10971,2017-12-21,2017,sep,9


In [119]:
# Too many categories, truncating too just those with > 0 value
confirmedfig = px.bar(maxcases[maxcases['Confirmed']>0], x="Country/Territory", y="Confirmed")
confirmedfig.update_xaxes(categoryorder='total descending')
confirmedfig.show()

- Proportion of Infections across months

In [55]:
# needs to be averaged confirmed
fig = px.bar(zika16to18[zika16to18['Country/Territory']=='TOTAL'], x="Month", y="Confirmed")
fig.show()

- Infections over time

In [52]:
fig = px.scatter(zika16to18, x="Date", y="Confirmed", color="Country/Territory")
fig.show()

Stretch Goal: Visualizations with geomapping