In [2]:
import numpy as np
import pandas as pd
import feather
import plotly
from tqdm import tqdm
import pickle

In [3]:
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.grid_objs import Grid, Column
from plotly.graph_objs import *
from IPython.display import display, HTML
from datetime import datetime, timedelta

In [4]:
init_notebook_mode(connected=True)

# Get dataframe

In [5]:
antiflu = feather.read_dataframe('..\\..\\Data\\Datathon_2017\\antibiotic_sample_data')

In [6]:
postcode_populations_lat_lon = feather.read_dataframe('..\\..\\Data\\Datathon_2017\\postcode_populations_lat_lon')

In [7]:
antiflu.columns

Index(['Patient_ID', 'Store_ID', 'Prescriber_ID', 'Drug_ID',
       'SourceSystem_Code', 'Prescription_Week', 'Dispense_Week', 'Drug_Code',
       'NHS_Code', 'IsDeferredScript', 'Script_Qty', 'Dispensed_Qty',
       'MaxDispense_Qty', 'PatientPrice_Amt', 'WholeSalePrice_Amt',
       'GovernmentReclaim_Amt', 'RepeatsTotal_Qty', 'RepeatsLeft_Qty',
       'StreamlinedApproval_Code', 'MasterProductID',
       'MasterProductFullName.x', 'BrandName.x', 'FormCode.x',
       'StrengthCode.x', 'PackSizeNumber.x', 'GenericIngredientName.x',
       'EthicalSubCategoryName.x', 'EthicalCategoryName.x',
       'ManufacturerCode.x', 'ManufacturerName.x', 'ManufacturerGroupID.x',
       'ManufacturerGroupCode.x', 'ChemistListPrice.x', 'ATCLevel5Code.x',
       'ATCLevel4Code.x', 'ATCLevel3Code.x', 'ATCLevel2Code.x',
       'ATCLevel1Code.x', 'ATCLevel1Name', 'ATCLevel2Name', 'ATCLevel3Name',
       'ATCLevel4Name', 'ATCLevel5Name', 'gender', 'year_of_birth', 'postcode',
       'lat', 'long', 'MasterP

In [8]:
antiflu[['Patient_ID','lat','long','total_pop']].head(2)

Unnamed: 0,Patient_ID,lat,long,total_pop
0,25.0,-36.040463,146.932638,14377.0
1,25.0,-36.040463,146.932638,14377.0


In [9]:
len(antiflu)

484964

# init_params

In [10]:
grid_res = 50
lat_spacing = [ -39, -32 ]
lon_spacing = [ 141, 149 ]

In [11]:
lat_grid = np.linspace(lat_spacing[0],lat_spacing[1],grid_res)
lon_grid = np.linspace(lon_spacing[0],lon_spacing[1],grid_res)

In [12]:
def between(x,num1,num2):
    if (x < num2 and x >= num1):
        return(True)
    else:
        return(False)

In [13]:
def getRecordsBetweenDates(date_1,date_2):
    return antiflu[antiflu.apply( lambda x: between(x['Prescription_Week'],date_1,date_2),axis=1 ) ]

In [14]:
def addDaysToDate(date,days_num):
    return datetime.strftime((datetime.strptime(date,'%Y-%m-%d') + timedelta(days=days_num)),'%Y-%m-%d')

def addMonthsToDate(date,months_num):
    curr_date = datetime.strptime(date,'%Y-%m-%d')
    if curr_date.month == 12:
        return datetime.strftime(datetime(curr_date.year+1, 1,curr_date.day),'%Y-%m-%d')
    else:
        return datetime.strftime(datetime(curr_date.year, curr_date.month+1,curr_date.day),'%Y-%m-%d')

# 1 years worth of data here

In [15]:
date_list = [['2011-01-01','2013-02-01']]

for i in range(48):
    date_list.append([date_list[-1][1], addMonthsToDate(date_list[-1][1],1)])

In [16]:
date_list

[['2011-01-01', '2013-02-01'],
 ['2013-02-01', '2013-03-01'],
 ['2013-03-01', '2013-04-01'],
 ['2013-04-01', '2013-05-01'],
 ['2013-05-01', '2013-06-01'],
 ['2013-06-01', '2013-07-01'],
 ['2013-07-01', '2013-08-01'],
 ['2013-08-01', '2013-09-01'],
 ['2013-09-01', '2013-10-01'],
 ['2013-10-01', '2013-11-01'],
 ['2013-11-01', '2013-12-01'],
 ['2013-12-01', '2014-01-01'],
 ['2014-01-01', '2014-02-01'],
 ['2014-02-01', '2014-03-01'],
 ['2014-03-01', '2014-04-01'],
 ['2014-04-01', '2014-05-01'],
 ['2014-05-01', '2014-06-01'],
 ['2014-06-01', '2014-07-01'],
 ['2014-07-01', '2014-08-01'],
 ['2014-08-01', '2014-09-01'],
 ['2014-09-01', '2014-10-01'],
 ['2014-10-01', '2014-11-01'],
 ['2014-11-01', '2014-12-01'],
 ['2014-12-01', '2015-01-01'],
 ['2015-01-01', '2015-02-01'],
 ['2015-02-01', '2015-03-01'],
 ['2015-03-01', '2015-04-01'],
 ['2015-04-01', '2015-05-01'],
 ['2015-05-01', '2015-06-01'],
 ['2015-06-01', '2015-07-01'],
 ['2015-07-01', '2015-08-01'],
 ['2015-08-01', '2015-09-01'],
 ['2015-

In [17]:
getRecordsBetweenDates('2013-05-02','2013-05-10')

Unnamed: 0,Patient_ID,Store_ID,Prescriber_ID,Drug_ID,SourceSystem_Code,Prescription_Week,Dispense_Week,Drug_Code,NHS_Code,IsDeferredScript,...,ManufacturerGroupCode.y,ChemistListPrice.y,ATCLevel5Code.y,ATCLevel4Code.y,ATCLevel3Code.y,ATCLevel2Code.y,ATCLevel1Code.y,Prescription_Year,total_pop,working_age_pct
362,1434.0,2414,39276,7439.0,False,2013-05-05,2013-07-14,RES5,2951H,0,...,ALPHAPHARM,2.4500,J01EE01,J01EE,J01E,J01,J,2013.0,27535.0,68.9
363,1434.0,2414,39276,7439.0,False,2013-05-05,2013-08-11,RES5,2951H,0,...,ALPHAPHARM,2.4500,J01EE01,J01EE,J01E,J01,J,2013.0,27535.0,68.9
364,1434.0,2414,39276,7439.0,False,2013-05-05,2013-09-15,RES5,2951H,0,...,ALPHAPHARM,2.4500,J01EE01,J01EE,J01E,J01,J,2013.0,27535.0,68.9
1110,5241.0,555,326,2718.0,False,2013-05-05,2013-06-02,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,22536.0,64.8
1111,5241.0,555,326,2718.0,False,2013-05-05,2013-06-30,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,22536.0,64.8
1112,5241.0,555,326,2718.0,False,2013-05-05,2013-07-28,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,22536.0,64.8
1410,6298.0,131,9484,2718.0,False,2013-05-05,2013-05-26,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,30408.0,67.8
1411,6298.0,131,9484,2718.0,False,2013-05-05,2013-06-09,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,30408.0,67.8
1412,6298.0,131,9484,2718.0,False,2013-05-05,2013-06-23,DOXY11,2711Q,0,...,AMNEAL PHARMACEUTICALS PT,1.6800,J01AA02,J01AA,J01A,J01,J,2013.0,30408.0,67.8
1490,6568.0,1768,35320,2705.0,False,2013-05-05,2013-05-05,DOX7,2711Q,0,...,ALPHAPHARM,2.3300,J01AA02,J01AA,J01A,J01,J,2013.0,28746.0,88.9


# Generate Data

In [18]:
pop_matrix = np.zeros((grid_res,grid_res))

tmp_pop_data = postcode_populations_lat_lon[postcode_populations_lat_lon['YEAR'] == int(date_list[-1][0][0:4])-2]

In [19]:
for i in tqdm(range(grid_res-1)):
    for j in range(grid_res-1):
        pop_matrix[i,j] = tmp_pop_data[tmp_pop_data.apply(lambda x: ( between(x['lat'], lat_grid[i], lat_grid[i+1]) and between(x['long'], lon_grid[j], lon_grid[j+1]) ) ,axis=1) ]['total_pop'].sum()        

100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [01:08<00:00,  1.47s/it]


In [20]:
np.sum(pop_matrix)

9055154.0

In [21]:
index_matrix = np.array(range(grid_res*grid_res)).reshape(grid_res,grid_res)

In [22]:
def getFirstLatCoord(lat):
    try:
        return next(x[0] for x in enumerate(lat_grid) if x[1] > lat)
    except:
        return -1

def getFirstLonCoord(lon):
    try:
        return next(x[0] for x in enumerate(lon_grid) if x[1] > lon)
    except:
        return -1

In [23]:
antiflu['lat_index'] = antiflu.apply(lambda x: getFirstLatCoord(x['lat']), axis = 1)
antiflu['lon_index'] = antiflu.apply(lambda x: getFirstLonCoord(x['long']), axis = 1)

In [24]:
antiflu = antiflu[antiflu['lat_index'] != -1]
antiflu = antiflu[antiflu['lon_index'] != -1]

In [29]:
antiflu['index_matrix_coords'] = antiflu['lat_index'] + grid_res*antiflu['lon_index']

In [30]:
def GenerateCountMatrixEfficiently(df):

    count_matrix = []

    for x in index_matrix:
        for y in x:
            count_matrix.append(df[df['index_matrix_coords'] == y]['Patient_ID'].count())
        
    return np.array(count_matrix).reshape(grid_res,grid_res).T

In [31]:
#df_agg = pd.DataFrame(tmp_df.groupby('index_matrix_coords').size().rename('vol'))

In [32]:
def GenerateCountMatrixEfficientlyV2(df):

    count_matrix = []
    df_agg = pd.DataFrame(df.groupby('index_matrix_coords').size().rename('vol'))

    for x in index_matrix:
        for y in x:
            try:
                count_matrix.append(int(df_agg[df_agg.index == y]['vol']))
            except:
                count_matrix.append(0)
        
    return np.array(count_matrix).reshape(grid_res,grid_res)

In [33]:
def generateCountMatrix(tmp_df):
    
    count_matrix = np.zeros((grid_res,grid_res))
    for i in range(grid_res-1):
        for j in range(grid_res-1):
            count_matrix[i,j] = tmp_df[tmp_df.apply(lambda x: ( between(x['lat'], lat_grid[i], lat_grid[i+1]) and between(x['long'], lon_grid[j], lon_grid[j+1]) ) ,axis=1) ]['Patient_ID'].count()
    return count_matrix

# test

In [34]:
tmp_df = getRecordsBetweenDates(date_list[0][0],date_list[0][1])

In [35]:
len(tmp_df)

144630

In [36]:
count_matrix = GenerateCountMatrixEfficientlyV2(tmp_df)

In [37]:
np.sum(count_matrix)

144630

In [38]:
def div0( a, b ):
    """ ignore / 0, div0( [-1, 0, 1], 0 ) -> [0, 0, 0] """
    with np.errstate(divide='ignore', invalid='ignore'):
        c = np.true_divide( a, b )
        c[ ~ np.isfinite( c )] = 0  # -inf inf NaN
    return c.T

In [39]:
antiflu_ratio_matrix = div0(count_matrix, pop_matrix)

In [40]:
trace_heat = Heatmap(z = antiflu_ratio_matrix)

In [41]:
#iplot([trace_heat])

In [42]:
grid_res

50

In [43]:
#scale needs to be ebtween 0 and 1
colscl = [
        # Let first 10% (0.1) of the values have color rgb(0, 0, 0)
        [0, 'rgb(255, 255, 255)'],
        [0.2, 'rgb(255, 255, 255)'],

        # Let values between 10-20% of the min and max of z
        # have color rgb(20, 20, 20)
        [0.2, 'rgb(255, 255, 255)'],
        [0.4, 'rgb(255, 216, 32)'],

        # Values between 20-30% of the min and max of z
        # have color rgb(40, 40, 40)
        [0.4, 'rgb(255, 216, 32)'],
        [0.8, 'rgb(234, 35, 0)'],

        [0.8, 'rgb(234, 35, 0)'],
        [1, 'rgb(0, 0, 0)']
    ]

In [44]:
tmp_df

Unnamed: 0,Patient_ID,Store_ID,Prescriber_ID,Drug_ID,SourceSystem_Code,Prescription_Week,Dispense_Week,Drug_Code,NHS_Code,IsDeferredScript,...,ATCLevel4Code.y,ATCLevel3Code.y,ATCLevel2Code.y,ATCLevel1Code.y,Prescription_Year,total_pop,working_age_pct,lat_index,lon_index,index_matrix_coords
0,25.0,2420,64352,223.0,False,2011-05-29,2011-10-23,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2011.0,14377.0,65.6,21,37,1871
1,25.0,2420,64352,223.0,False,2011-05-29,2011-11-27,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2011.0,14377.0,65.6,21,37,1871
2,25.0,2420,64352,223.0,False,2011-05-29,2011-12-25,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2011.0,14377.0,65.6,21,37,1871
3,25.0,2420,64352,223.0,False,2011-08-21,2012-02-26,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2011.0,14377.0,65.6,21,37,1871
4,25.0,2420,64352,223.0,False,2011-08-21,2012-05-06,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2011.0,14377.0,65.6,21,37,1871
5,25.0,2420,64352,223.0,False,2012-03-11,2012-10-28,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2012.0,14260.0,64.9,21,37,1871
6,25.0,2420,64352,223.0,False,2012-03-11,2013-01-06,AKAM1,1616C,0,...,J01AA,J01A,J01,J,2012.0,14260.0,64.9,21,37,1871
31,51.0,1016,22067,2705.0,False,2011-09-04,2011-10-02,DOX7,2711Q,0,...,J01AA,J01A,J01,J,2011.0,22263.0,67.3,29,0,29
32,51.0,1016,22067,2705.0,False,2011-09-04,2011-10-30,DOX7,2711Q,0,...,J01AA,J01A,J01,J,2011.0,22263.0,67.3,29,0,29
33,51.0,1016,22067,2705.0,False,2011-09-04,2011-11-27,DOX7,2711Q,0,...,J01AA,J01A,J01,J,2011.0,22263.0,67.3,29,0,29


In [45]:

data_pts = []
slider_steps = []
original_dfs = []

for date_item in tqdm(date_list):

    tmp_df = getRecordsBetweenDates(date_item[0],date_item[1])

    x = np.transpose(np.array(tmp_df[['long']]))[0]
    y = np.transpose(np.array(tmp_df[['lat']]))[0]

    xv, yv = np.meshgrid(lat_grid[:(grid_res)], lon_grid[:(grid_res)])

    #H, xedges, yedges = np.histogram2d(x,y,bins=(lon_grid,lat_grid))
    count_matrix = GenerateCountMatrixEfficientlyV2(tmp_df)

    H = div0(count_matrix.T, pop_matrix)*1000

    H = H.reshape((grid_res)*(grid_res),1)
    xv = xv.reshape((grid_res)*(grid_res),1)
    yv = yv.reshape((grid_res)*(grid_res),1)
    
    perc = np.percentile(H,99.5)
    H = np.array([np.min([i[0],perc]) for i in H])
    H = H.reshape((grid_res)*(grid_res),1)
    
    df = pd.DataFrame(np.concatenate((H,xv,yv), axis = 1), columns=['density','lat','lon'])
    df_reduced = df[df['density'] > 0]
    df_reduced = df_reduced[df_reduced['density'] < 0.95*perc]
    #df_reduced = df
    
    
    data_pt = [ dict(
        lat = df_reduced['lat'],
        lon = df_reduced['lon'],
        text = df_reduced['density'].astype(str),
        marker = dict(
            symbol = "square-dot",
            color = df_reduced['density'],
            colorscale= colscl,
            reversescale = False,
            opacity = 0.7,
            size = 8,
            colorbar = dict(
                thickness = 10,
                titleside = "right",
                outlinecolor = "rgb(212,212,212)",
                ticks = "outside",
                ticklen = 3,
                showticksuffix = "last",
                dtick = 0.5
            ),
        ),
        type = 'scattergeo'
    ) ]
    
    
    slider_step = {
        'args': [
            [date_item[0]],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
                        ],
                        'label': date_item[0],
                        'method': 'animate'
                    }
    
    if (np.sum(H) > 0):
        original_dfs.append(df_reduced)
    
    if (np.sum(H) > 0):
        data_pts.append({'data' : data_pt, 'name' : date_item[0] })
        slider_steps.append(slider_step)

100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [05:23<00:00,  7.01s/it]


# total histogram

In [46]:
data = [ dict(
    lat = df_reduced['lat'],
    lon = df_reduced['lon'],
    text = df_reduced['density'].astype(str),
    marker = dict(
        color = df_reduced['density'],
        colorscale= colscl,
        reversescale = False,
        opacity = 0.7,
        size = 10,
        colorbar = dict(
            thickness = 10,
            titleside = "right",
            outlinecolor = "rgb(212,212,212)",
            ticks = "outside",
            ticklen = 3,
            showticksuffix = "last",
            dtick = 0.1
        ),
    ),
    type = 'scattergeo'
) ]

figure = {'data': data_pts[0]['data'],
         'layout': dict(
                            geo = dict(
                            scope = 'world',
                            showland = True,
                            landcolor = "rgb(212, 212, 212)",
                            subunitcolor = "rgb(255, 255, 255)",
                            countrycolor = "rgb(255, 255, 255)",
                            showlakes = True,
                            lakecolor = "rgb(255, 255, 255)",
                            showsubunits = True,
                            showcountries = True,
                            resolution = grid_res,
                            lonaxis = dict(
                                showgrid = True,
                                gridwidth = 0.5,
                                range= lon_spacing,
                                dtick = (lon_spacing[1]-lon_spacing[0])/grid_res*10
                            ),
                            lataxis = dict (
                                showgrid = True,
                                gridwidth = 0.5,
                                range= lat_spacing,
                                dtick = (lat_spacing[1]-lat_spacing[0])/grid_res*10
                            )
                            ),
        updatemenus = [{'type': 'buttons',
                                  'buttons': [{'label': 'Play',
                                               'method': 'animate',
                                               'args': [None, {'frame': {'duration': 500, 'redraw': False},
                                                 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}]},
                                              {
                                                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                                                'transition': {'duration': 0}}],
                                                'label': 'Pause',
                                                'method': 'animate'
                                            }
                                             ],
                        'direction': 'left',
                        'pad': {'r': 10, 't': 87},
                        'showactive': False,
                        'type': 'buttons',
                        'x': 0.1,
                        'xanchor': 'right',
                        'y': 0,
                        'yanchor': 'top'
                       }],
        sliders = [{
                'active': 0,
                'yanchor': 'top',
                'xanchor': 'left',
                'currentvalue': {
                    'font': {'size': 20},
                    'prefix': 'density as at: ',
                    'visible': True,
                    'xanchor': 'right'
                },
                'transition': {'duration': 300, 'easing': 'cubic-in-out'},
                'pad': {'b': 10, 't': 50},
                'len': 0.9,
                'x': 0.1,
                'y': 0,
                'steps': slider_steps
            }],
        
        title = 'East Australia antibiotic density scatter map'
        ),
          
        'frames' : data_pts
         }


plot(figure)

'file://C:\\Users\\Stoja\\Documents\\.Repositories\\iSel_MelbDatathon2017\\steve\\temp-plot.html'

In [47]:
data = [ dict(
    lat = df_reduced['lat'],
    lon = df_reduced['lon'],
    text = df_reduced['density'].astype(str),
    marker = dict(
        color = df_reduced['density'],
        colorscale= colscl,
        reversescale = False,
        opacity = 0.0,
        size = 0,
        colorbar = dict(
            thickness = 10,
            titleside = "right",
            outlinecolor = "rgb(212,212,212)",
            ticks = "outside",
            ticklen = 3,
            showticksuffix = "last",
            dtick = 0.1
        ),
    ),
    type = 'scattergeo'
) ]

figure = {'data': data,
         'layout': dict(
                            geo = dict(
                            scope = 'world',
                            showland = True,
                            landcolor = "rgb(212, 212, 212)",
                            subunitcolor = "rgb(255, 255, 255)",
                            countrycolor = "rgb(255, 255, 255)",
                            showlakes = True,
                            lakecolor = "rgb(255, 255, 255)",
                            showsubunits = True,
                            showcountries = True,
                            resolution = grid_res,
                            lonaxis = dict(
                                showgrid = True,
                                gridwidth = 0.5,
                                range= lon_spacing,
                                dtick = (lon_spacing[1]-lon_spacing[0])/grid_res*10
                            ),
                            lataxis = dict (
                                showgrid = True,
                                gridwidth = 0.5,
                                range= lat_spacing,
                                dtick = (lat_spacing[1]-lat_spacing[0])/grid_res*10
                            )
                            ),

        
        title = 'East Australia antibiotic density scatter map'
        )
         }


plot(figure)

'file://C:\\Users\\Stoja\\Documents\\.Repositories\\iSel_MelbDatathon2017\\steve\\temp-plot.html'

In [85]:
for i in range(len(slider_steps)):
    slider_steps[i]['args'] = [None]

In [86]:
slider_steps[0]

{'args': [None], 'label': '2011-01-01', 'method': 'animate'}

In [87]:
len(data_pts)

36

In [88]:
pickle.dump( data_pts, open( '..\\..\\Data\\Datathon_2017\\density_grid.pkl', 'wb' ) )

In [89]:
import plotly.graph_objs as go

In [263]:

tmp_df = getRecordsBetweenDates(date_item[0],date_item[1])

x = np.transpose(np.array(tmp_df[['long']]))[0]
y = np.transpose(np.array(tmp_df[['lat']]))[0]

xv, yv = np.meshgrid(lat_grid[:(grid_res)], lon_grid[:(grid_res)])

#H, xedges, yedges = np.histogram2d(x,y,bins=(lon_grid,lat_grid))
count_matrix = GenerateCountMatrixEfficientlyV2(tmp_df)

H = div0(count_matrix.T, pop_matrix)*1000

H = H.reshape((grid_res)*(grid_res),1)
xv = xv.reshape((grid_res)*(grid_res),1)
yv = yv.reshape((grid_res)*(grid_res),1)

perc = np.percentile(H,95)
H = np.array([np.min([i[0],perc]) for i in H])
H = H.reshape((grid_res)*(grid_res),1)

df = pd.DataFrame(np.concatenate((H,xv,yv), axis = 1), columns=['density','lat','lon'])
#df_reduced = df[df['density'] > 0]
#df_reduced = df_reduced[df_reduced['density'] < 0.95*perc]
df_reduced = df

trace1 = go.Scatter(
    x=x, y=y, mode='markers', name='points',
    marker=dict(color='rgb(102,0,0)', size=2, opacity=0.4)
)

trace2 = go.Histogram2dcontour(
    x=x, y=y, name='density', ncontours=120,
    colorscale='Hot', reversescale=True, showscale=True,
    contours=dict(
            start=0.5,
            end=5,
            size=2)
)

trace3 = go.Contour(
    z = df_reduced['density'],
    x = df_reduced['lon'],
    y = df_reduced['lat'],
    contours=dict(
            start=0.5,
            end=5,
            size=2,
        )
)

data = [trace2]


In [236]:
trace2 = go.Histogram2dcontour(
    x=x, y=y, name='density', #ncontours=10,
    colorscale='Hot', reversescale=True, showscale=True,
    contours=dict(
            start=100,
            end=500,
            size=100)
)

data = [trace2]

In [341]:
trace3 = go.Contour(
    z = df_reduced['density'],
    x = df_reduced['lon'],
    y = df_reduced['lat'],
    colorscale='Hot',
    opacity = 0.5,
    reversescale=True,
    contours=dict(
            start=1,
            end=6,
            size=0.5,
        )
)

data = [trace3]

In [324]:
lat_spacing = [ -42, -25 ]
lon_spacing = [ 135, 155 ]

In [337]:
layout = go.Layout(
    images= [dict(
                  source= "aus_capture.png",
                  xref= "x",
                  yref= "y",
                  x= 135,
                  y= -26,
                  sizex= 20,
                  sizey= 16,
                  sizing= "stretch",
                  opacity= 0.8,
                  layer= "below")],
    showlegend=False,
    autosize=False,
    width=600,
    height=550,
    xaxis=dict(
        domain=[135, 155],
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        domain=[-42, -25],
        showgrid=False,
        zeroline=False
    ),
    margin=dict(
        t=50
    ),
    hovermode='closest',
    bargap=0
)

fig = go.Figure(data=data, layout = layout)

In [328]:
iplot(fig)

In [165]:

contour_pts = []
slider_steps = []
original_dfs = []

for date_item in tqdm(date_list):

    tmp_df = getRecordsBetweenDates(date_item[0],date_item[1])

    x = np.transpose(np.array(tmp_df[['long']]))[0]
    y = np.transpose(np.array(tmp_df[['lat']]))[0]

    xv, yv = np.meshgrid(lat_grid[:(grid_res)], lon_grid[:(grid_res)])

    #H, xedges, yedges = np.histogram2d(x,y,bins=(lon_grid,lat_grid))
    count_matrix = GenerateCountMatrixEfficientlyV2(tmp_df)

    H = div0(count_matrix.T, pop_matrix)*1000

    H = H.reshape((grid_res)*(grid_res),1)
    xv = xv.reshape((grid_res)*(grid_res),1)
    yv = yv.reshape((grid_res)*(grid_res),1)

    perc = np.percentile(H,99)
    H = np.array([np.min([i[0],perc]) for i in H])
    H = H.reshape((grid_res)*(grid_res),1)

    df = pd.DataFrame(np.concatenate((H,xv,yv), axis = 1), columns=['density','lat','lon'])
    #df_reduced = df[df['density'] > 0]
    #df_reduced = df_reduced[df_reduced['density'] < 0.95*perc]
    df_reduced = df
    
    
    trace3 = go.Contour(
        z = df_reduced['density'],
        x = df_reduced['lon'],
        y = df_reduced['lat'],
        colorscale='Hot',
        opacity = 0.5,
        reversescale=True,
        contours=dict(
                start=0.01,
                end=3.51,
                size=0.25,
            )
    )
    
    
    slider_step = {
        'args': [
            [date_item[0]],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
                        ],
                        'label': date_item[0],
                        'method': 'animate'
                    }
    
    if (np.sum(H) > 0):
        original_dfs.append(df_reduced)
    
    if (np.sum(H) > 0):
        contour_pts.append({'data' : [trace3], 'name' : date_item[0] })
        slider_steps.append(slider_step)

100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [05:21<00:00,  6.65s/it]


In [180]:


figure = {'data': contour_pts[0]['data'],
         'layout': dict(
    images= [dict(
                  #source= "http://i.imgur.com/4xIGswl.png",
                   source= "http://i.imgur.com/2mf9rwz.png",
                  xref= "x",
                  yref= "y",
                  x= lon_spacing[0],
                  y= lat_spacing[1]-1,
                  sizex= lon_spacing[1]-lon_spacing[0]+0.15,
                  sizey= lat_spacing[1]-lat_spacing[0]-1,
                  sizing= "stretch",
                  opacity= 0.8,
                  layer= "below")],
    showlegend=True,
    xaxis=dict(
        domain=[135, 155],
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        domain=[-42, -25],
        showgrid=False,
        zeroline=False
    ),

    updatemenus = [{'type': 'buttons',
                                  'buttons': [{'label': 'Play',
                                               'method': 'animate',
                                               'args': [None, {'frame': {'duration': 500, 'redraw': False},
                                                 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}]},
                                              {
                                                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                                                'transition': {'duration': 0}}],
                                                'label': 'Pause',
                                                'method': 'animate'
                                            }
                                             ],
                        'direction': 'left',
                        'pad': {'r': 10, 't': 87},
                        'showactive': False,
                        'type': 'buttons',
                        'x': 0.1,
                        'xanchor': 'right',
                        'y': 0,
                        'yanchor': 'top'
                       }],
    sliders = [{
                'active': 0,
                'yanchor': 'top',
                'xanchor': 'left',
                'currentvalue': {
                    'font': {'size': 20},
                    'prefix': 'density as at: ',
                    'visible': True,
                    'xanchor': 'right'
                },
                'transition': {'duration': 300, 'easing': 'cubic-in-out'},
                'pad': {'b': 10, 't': 50},
                'len': 0.9,
                'x': 0.1,
                'y': 0,
                'steps': slider_steps
            }],
        
    title = 'East Australia antibiotic density contour map'
    ),
          
        'frames' : contour_pts
         }






#fig = go.Figure(data=data, layout = layout)

In [181]:
plot(figure)

'file://C:\\Users\\Stoja\\Documents\\.Repositories\\iSel_MelbDatathon2017\\steve\\temp-plot.html'

In [145]:
date_item = date_list[1]

tmp_df = getRecordsBetweenDates(date_item[0],date_item[1])

x = np.transpose(np.array(tmp_df[['long']]))[0]
y = np.transpose(np.array(tmp_df[['lat']]))[0]

xv, yv = np.meshgrid(lat_grid[:(grid_res)], lon_grid[:(grid_res)])

#H, xedges, yedges = np.histogram2d(x,y,bins=(lon_grid,lat_grid))
count_matrix = GenerateCountMatrixEfficientlyV2(tmp_df)

H = div0(count_matrix.T, pop_matrix)*1000

H = H.reshape((grid_res)*(grid_res),1)
xv = xv.reshape((grid_res)*(grid_res),1)
yv = yv.reshape((grid_res)*(grid_res),1)

perc = np.percentile(H,99)
H = np.array([np.min([i[0],perc]) for i in H])
H = H.reshape((grid_res)*(grid_res),1)

df = pd.DataFrame(np.concatenate((H,xv,yv), axis = 1), columns=['density','lat','lon'])
#df_reduced = df[df['density'] > 0]
#df_reduced = df_reduced[df_reduced['density'] < 0.95*perc]
df_reduced = df


trace3 = go.Contour(
    z = df_reduced['density'],
    x = df_reduced['lon'],
    y = df_reduced['lat'],
    colorscale='Hot',
    opacity = 0.5,
    reversescale=True,
    contours=dict(
            start=0.01,
            end=3.51,
            size=0.25,
        )
)

data = [trace3]

In [150]:
layout = go.Layout(
    images= [dict(
                  source= "http://i.imgur.com/4xIGswl.png",
                  xref= "x",
                  yref= "y",
                  x= 135,
                  y= -26,
                  sizex= 20,
                  sizey= 16,
                  sizing= "stretch",
                  opacity= 0.8,
                  layer= "below")],
    showlegend=False,
    autosize=False,
    width=600,
    height=550,
    xaxis=dict(
        domain=[135, 155],
        showgrid=False,
        zeroline=False
    ),
    yaxis=dict(
        domain=[-42, -25],
        showgrid=False,
        zeroline=False
    ),
    margin=dict(
        t=50
    ),
    hovermode='closest',
    bargap=0
)

fig = go.Figure(data=data, layout = layout)

In [151]:
plot(fig)

'file://C:\\Users\\Stoja\\Documents\\.Repositories\\iSel_MelbDatathon2017\\steve\\temp-plot.html'