# Geographic Analysis

In [2]:
import io, os, sys, types
from sklearn import cross_validation
from sklearn import neighbors
from sklearn import grid_search
from sklearn import metrics
from sklearn import linear_model

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid", font_scale=1)
%matplotlib inline

In [3]:
import nbformat

from IPython import get_ipython
from IPython.core.interactiveshell import InteractiveShell

In [4]:
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path
    
    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path

In [5]:
class NotebookLoader(object):
    """Module Loader for IPython Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path
    
    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)
        
        print ("importing notebook from %s" % path)
                                       
        # load the notebook object
        nb = nbformat.read(path, as_version=4)
        
        
        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod
        
        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__
        
        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod

In [6]:
class NotebookFinder(object):
    """Module finder that locates IPython Notebooks"""
    def __init__(self):
        self.loaders = {}
    
    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return
        
        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)
        
        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

In [7]:
sys.meta_path.append(NotebookFinder())

In [8]:
# Load data
df = pd.read_csv("data/prediction-1.csv")

# Check head
df.head()

Unnamed: 0,Id,Block,Latitude,Longitude,AddressAccuracy,Station,Tmax,Tmin,Tavg,Depart,...,Trap_T233,Trap_T234,Trap_T235,Trap_T236,Trap_T237,Trap_T238,Trap_T900,Trap_T903,WnvPresent,WnvPresent_Probability
0,1,41,41.95469,-87.800991,9,1,86,61,74,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,1,41,41.95469,-87.800991,9,2,86,66,76,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,2,41,41.95469,-87.800991,9,1,86,61,74,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,2,41,41.95469,-87.800991,9,2,86,66,76,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,3,41,41.95469,-87.800991,9,1,86,61,74,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [9]:
def process_presence(presence):
    presence = int(presence)
    if presence == 0:
        return 0.1
    else:
        return 0.9

In [10]:
import io, os, sys, types

import gmaps
gmaps.configure(api_key="AIzaSyAMy1_SO9GgGXqqvF5PmKAyVVoA2RK9hRc")

In [11]:
predicted_data = df

In [12]:
joined_data_list = []
current_tuple = ()
for index, row in predicted_data.iterrows():
    #print row
    presence = process_presence(row['WnvPresent'])
    print presence
    current_tuple = (row['Latitude'], row['Longitude'], presence)
    joined_data_list.append(current_tuple)

0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1
0.1


In [15]:
heat_map = gmaps.Map()
heat_map.add_layer(gmaps.WeightedHeatmap(data=joined_data_list))
heat_map

In [16]:
predicted_data.columns.values

array(['Id', 'Block', 'Latitude', 'Longitude', 'AddressAccuracy',
       'Station', 'Tmax', 'Tmin', 'Tavg', 'Depart', 'DewPoint', 'WetBulb',
       'Heat', 'Cool', 'Sunrise', 'Sunset', 'CodeSum', 'Depth', 'Water1',
       'SnowFall', 'PrecipTotal', 'StnPressure', 'SeaLevel', 'ResultSpeed',
       'ResultDir', 'AvgSpeed', 'Species_CULEX ERRATICUS',
       'Species_CULEX PIPIENS', 'Species_CULEX PIPIENS/RESTUANS',
       'Species_CULEX RESTUANS', 'Species_CULEX SALINARIUS',
       'Species_CULEX TARSALIS', 'Species_CULEX TERRITANS',
       'Species_UNSPECIFIED CULEX', 'Street_  W ARMITAGE AVENUE',
       'Street_ E 105TH ST', 'Street_ E 111TH ST', 'Street_ E 115TH ST',
       'Street_ E 118TH ST', 'Street_ E 130TH ST', 'Street_ E 136TH ST',
       'Street_ E 138TH ST', 'Street_ E 67TH ST', 'Street_ E 91ST PL',
       'Street_ E 91ST ST', 'Street_ E RANDOLPH ST',
       'Street_ N ASHLAND AVE', 'Street_ N ASHLAND AVE OVERPASS',
       'Street_ N AUSTIN AVE', 'Street_ N AVONDALE AVE',
    

In [14]:
import plotly.plotly as py
import plotly.graph_objs as go

# Create random data with numpy
import numpy as np



# Create traces
trace0 = go.Scatter(
    x = predicted_data['Date'],
    y = predicted_data['Tavg'],
    mode = 'markers',
    name = 'markers',
     marker = dict(
        size = 10,
        color = predicted_data['WnvPresent'],
        line = dict(
            width = 2,
        )
    )
)

layout = go.Layout(
    title='The Impact of Temperature on the Presence of West Nile Virus',
    xaxis=dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis=dict(
        title='Temperature (F)',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)


fig = go.Figure(data=[trace0], layout=layout)
py.plot(fig, filename='styling-names')



KeyError: 'Date'