In [1]:
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go

In [57]:
CMAP= {'A':'royalblue','B':'salmon'}
SUBTYPE_LIST=['A','B']

In [90]:
rsv_df_1 = pd.read_csv('../../data/RSVG_gb_metadata.csv', usecols=['collection_date', 'country','subtype'],
                     parse_dates=['collection_date'])

rsv_df_2 = pd.read_csv('../../data/RSVG_gb_metadata_5000-10000.csv', usecols=['collection_date', 'country','subtype'],
                     parse_dates=['collection_date'])

rsv_df = rsv_df_1.append(rsv_df_2, ignore_index=True)

rsv_df['year'] = rsv_df['collection_date'].apply(lambda x: x.year)
rsv_df['country'] = np.where(rsv_df['country'].str.contains('USA'),'United States',rsv_df['country']) #Change US states to USA

#use lat and long so datapoints can be jittered to show multiple subtypes
#lat and long data from https://worldmap.harvard.edu/data/geonode:country_centroids_az8

lat_lon = pd.read_csv('country_centroids_az8.csv', \
                      usecols=['name','brk_a3','Longitude','Latitude']\
                     ).rename(columns={'name':'country', 'brk_a3':'country_code'})

#count number of rows(seqs) from each country that are each subtype
df_count_time = pd.DataFrame({'count' : rsv_df.groupby(['country', 'subtype', 'year']).size()}).reset_index()

#compile country-specific subtype count data with lat and long for plotting
df_countries_time = df_count_time.merge(lat_lon, how='left', left_on='country', right_on='country')

In [91]:
YEAR_RANGE = [year for year in range(int(df_countries_time.year.min()),int(df_countries_time.year.max()))]

In [92]:
#Jitter points for countries that have multiple subtypes, so markers on map don't overlap
JITTER_DICT= {'A':1.0, 'B':-1.0}

country_group = df_countries_time.groupby('country').size()


#With data separated by year
df_countries_time['adj_lon'] = np.where(country_group[df_countries_time['country']]>1, 
                                   (df_countries_time['Longitude']+df_countries_time.subtype.map(lambda x: JITTER_DICT[x])
                                   ), df_countries_time['Longitude'])

df_countries_time['adj_lat'] = np.where(country_group[df_countries_time['country']]>1, 
                                   (df_countries_time['Latitude']+df_countries_time.subtype.map(lambda x: JITTER_DICT[x])
                                   ), df_countries_time['Latitude'])

In [93]:
scale_markers = 5
map_list = []


for i in range(len(df_countries_time)):

    map_country = dict(
        type = 'scattergeo',
#         locationmode = 'country names',
#         locations = [df_countries.loc[i,'country']],
        lat = [df_countries_time.loc[i,'adj_lat']],
        lon = [df_countries_time.loc[i,'adj_lon']],
        marker = dict(
            size = np.min([df_countries_time.loc[i,'count']*scale_markers, 60]), #Threshold max size of marker
            color = CMAP[df_countries_time.loc[i,'subtype']],
            line = dict(width=0.5, color='rgb(40,40,40)'),
            opacity=0.5,
            sizemode = 'diameter'),
        hovertext = (df_countries_time.loc[i,'country']+', subtype '+df_countries_time.loc[i,'subtype']+
                     ' : '+str(df_countries_time.loc[i,'count'])+' sequences'),
        name = df_countries_time.loc[i,'country']+' '+df_countries_time.loc[i,'subtype'],
        legendgroup= df_countries_time.loc[i,'subtype'],
        showlegend=False,
        hoverinfo = 'text'
    )
    map_list.append(map_country)    

#Work around for showing legend
for subtype in SUBTYPE_LIST:
    subtype_legend = dict(
            type = 'scattergeo',
            lat = [180.0],
            lon= [180.0],
            marker = dict(
                size = scale_markers*10,
                color = CMAP[subtype],
                opacity=0.5,
                sizemode = 'diameter'),
            legendgroup = subtype,
            name = 'Subtype ' + subtype,
            showlegend=True,
            hovertext=None,
        )
    map_list.append(subtype_legend) 
    
steps = []
for year in YEAR_RANGE:
    step = dict(
    method = 'restyle', 
    label = year,
    args = ['visible', [False] * (len(df_countries_time)+len(SUBTYPE_LIST))])
    for i in range(len(df_countries_time)):
        if df_countries_time.loc[i,'year']==year:
            step['args'][1][i] = True # Toggle i'th year to "visible"
    for subtype in SUBTYPE_LIST:
        step['args'][1][(len(df_countries_time)+SUBTYPE_LIST.index(subtype))] = True
    steps.append(step)   
            
layout = dict(
        title = 'Global distribution of RSV',
#         showlegend = True,
        sliders = [dict(
            steps = steps)],
            geo = dict(
            scope='world',
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            countrywidth=1,
        ),
    )

fig = dict(data=map_list, layout=layout)
py.iplot(fig)



Consider using IPython.display.IFrame instead

