In [1]:
# utility libraries
from datetime import timedelta
import math

# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px

In [2]:
# dealing with geographic data
import geopandas as gpd
from geopandas.tools import geocode

# implicitly registered datetime converter for a matplotlib plotting method
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [3]:
# offline plotly
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [4]:
# theme
sns.set_style('whitegrid')

In [5]:
zika_url = 'cdc_zika.csv'

In [6]:
df = pd.read_csv(zika_url)
df.head(10)

Unnamed: 0,report_date,location,location_type,data_field,data_field_code,time_period,time_period_type,value,unit
0,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_local_cases,AR0001,,,0,cases
1,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_local_cases,AR0002,,,0,cases
2,2016-03-19,Argentina-Buenos_Aires,province,cumulative_confirmed_imported_cases,AR0003,,,2,cases
3,2016-03-19,Argentina-Buenos_Aires,province,cumulative_probable_imported_cases,AR0004,,,1,cases
4,2016-03-19,Argentina-Buenos_Aires,province,cumulative_cases_under_study,AR0005,,,127,cases
5,2016-03-19,Argentina-Buenos_Aires,province,cumulative_cases_discarded,AR0006,,,0,cases
6,2016-03-19,Argentina-CABA,province,cumulative_confirmed_local_cases,AR0001,,,0,cases
7,2016-03-19,Argentina-CABA,province,cumulative_probable_local_cases,AR0002,,,0,cases
8,2016-03-19,Argentina-CABA,province,cumulative_confirmed_imported_cases,AR0003,,,9,cases
9,2016-03-19,Argentina-CABA,province,cumulative_probable_imported_cases,AR0004,,,0,cases


In [7]:
df.drop('time_period', inplace=True, axis=1)
df.drop('time_period_type', inplace=True, axis=1)

In [8]:
df.dropna(inplace=True)

In [9]:
df['value']=pd.to_numeric(df['value'], errors='coerce')
df.dropna(inplace=True)
df = df.astype({"value":int})
df['report_date'] = pd.to_datetime(df['report_date'], errors='coerce')

In [10]:
df.dropna(inplace=True)

In [11]:
# Get names of indexes for which column value has value 0
indexNames = df[ df['value'] == 0 ].index
# # Delete these row indexes from dataFrame
df.drop(indexNames , inplace=True)

In [12]:
df.tail()

Unnamed: 0,report_date,location,location_type,data_field,data_field_code,value,unit
107614,2016-06-28,United_States_Virgin_Islands,territory,confirmed_conjunctivitis,VI0017,7,cases
107615,2016-06-28,United_States_Virgin_Islands,territory,confirmed_eyepain,VI0018,13,cases
107616,2016-06-28,United_States_Virgin_Islands,territory,confirmed_headache,VI0019,14,cases
107617,2016-06-28,United_States_Virgin_Islands,territory,confirmed_malaise,VI0020,5,cases
107618,2016-06-28,United_States_Virgin_Islands,territory,zika_no_specimen,VI0021,2,cases


In [13]:
df[['Country','State/Province','City/Mun']] = df.location.str.split('-',n=2,expand=True)
df.drop(columns=['location'], inplace=True)

In [14]:
df['Country']=df['Country'].replace('Dominican_Republic','Dominican Republic')
df['Country']=df['Country'].replace('El_Salvador','El Salvador')
df['Country']=df['Country'].replace('Puerto_Rico','Puerto Rico')
df['Country']=df['Country'].replace('United_States','United States')
df['Country']=df['Country'].replace('United_States_Virgin_Islands','U.S. Virgin Islands')

In [15]:
df_loc = df.groupby(['Country','State/Province','City/Mun']).sum()[['value']].apply(lambda x: x.sort_values(ascending=False)).reset_index()
df_loc.rename(columns={'value':'Cases'}).head(3)

Unnamed: 0,Country,State/Province,City/Mun,Cases
0,Colombia,Norte_Santander,Cucuta,135383
1,Colombia,Valle_Del_Cauca,Cali,123757
2,Colombia,Barranquilla,Barranquilla,92682


In [16]:
df_country = df.groupby(['Country']).sum()[['value']].apply(lambda x: x.sort_values(ascending=False)).reset_index()
df_country.rename(columns={'value':'Cases'}).head(5)

Unnamed: 0,Country,Cases
0,Brazil,2041135
1,Colombia,1536401
2,Sudeste,366093
3,El Salvador,354031
4,Nordeste,337035


In [17]:
df_br=df_loc[df_loc['Country']=="Brazil"]

In [18]:
world_map = pd.read_csv('countries.csv')
# world_map = world_map[world_map['name']!='Antarctica']
# world_map = world_map[['name', 'continent', 'geometry']]



In [19]:
zika_map = world_map.merge(df_country, left_on='name', right_on='Country')
zika_map = zika_map.sort_values('value', ascending=False)
zika_map['text'] = zika_map['name'] + '<br>Cases ' + (zika_map['value']).astype(str)

In [20]:
# zika_map.reset_index(drop=True,inplace=True)
zika_map.head()

Unnamed: 0,country,latitude,longitude,name,Country,value,text
1,BR,-14.235004,-51.92528,Brazil,Brazil,2041135,Brazil<br>Cases 2041135
2,CO,4.570868,-74.297333,Colombia,Colombia,1536401,Colombia<br>Cases 1536401
11,SV,13.794185,-88.89653,El Salvador,El Salvador,354031,El Salvador<br>Cases 354031
3,DO,18.735693,-70.162651,Dominican Republic,Dominican Republic,94983,Dominican Republic<br>Cases 94983
0,AR,-38.416097,-63.616672,Argentina,Argentina,21826,Argentina<br>Cases 21826


In [21]:
import plotly.graph_objects as go
%matplotlib inline

In [22]:
## Bubble Map with go.Scattergeo
countries = ['BR','CO','SV','DO','AR']
colors = ["crimson","orange","royalblue","lightseagreen","green"]
cities = []
scale = 1000
loc = ['Brazil','Colombia','El Salvador','Dominican Republic','Argentina']
fig = go.Figure()

for i in range(len(countries)):
    ct = countries[i]
    df_sub = zika_map[zika_map['country'] == ct]
    fig.add_trace(go.Scattergeo(
        locations = np.array(loc[i]),
        locationmode='country names',
        # z = df_sub['Country'],
        lon = df_sub['longitude'],
        lat = df_sub['latitude'],
        text = df_sub['text'],
        marker = dict(
            size = df_sub['value']/scale,
            color = colors[i],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'
        ),
        name = ct))

for i in range(len(countries)):
    ct = countries[i]
    df_sub = zika_map[zika_map['country'] == ct]
    fig['data'][i].update(mode='markers+text', textposition='middle center',text=df_sub['Country'])


fig.update_layout(
        title_text = 'Most affected countries by Zika',
        showlegend = True,
        geo = go.layout.Geo(
            scope = "world",
            landcolor = 'rgb(217, 217, 217)',
            showframe = True,
            showcoastlines = True,
            showcountries = True,
            countrycolor = "white" ,
            coastlinecolor = "black",
            # projection_type = 'mercator',
            lonaxis_range= [ -34, -110 ],
            lataxis_range= [ 40, -56 ],
            # bgcolor = 'rgba(0, 0, 0, 0.0)',
        ),
        
    )
fig.show()