# Weather and Motor Vehicle Collisions

In [4]:
import pandas as pd
import numpy as np
import datetime
from datetime import date
from dateutil.rrule import rrule, DAILY
from __future__ import division
import geoplotlib as glp
from geoplotlib.utils import BoundingBox, DataAccessObject

pd.set_option('display.max_columns', None)
%matplotlib inline  

## Read weather data from csv file

In [116]:
weather = pd.read_csv('datasets/weather_data_nyc_kjfk_clean2.csv')
incidents = pd.read_csv("datasets/NYPD_Motor_Vehicle_Collisions_weather4.csv")
weather['date'] = weather.Year.astype('str') +'/'+ weather.Month.astype('str') +'/'+ weather.Day.astype('str') +'/'+ weather.Hour.astype('str')



In [105]:
conditions = list(weather.Conditions.unique())
condic = {}
for cond in conditions:
    condic[cond] = 0;
condic

{'Blowing Snow': 0,
 'Clear': 0,
 'Fog': 0,
 'Haze': 0,
 'Heavy Rain': 0,
 'Heavy Snow': 0,
 'Heavy Thunderstorms and Rain': 0,
 'Ice Pellets': 0,
 'Light Drizzle': 0,
 'Light Freezing Drizzle': 0,
 'Light Freezing Rain': 0,
 'Light Ice Pellets': 0,
 'Light Rain': 0,
 'Light Rain Showers': 0,
 'Light Snow': 0,
 'Light Thunderstorms and Rain': 0,
 'Mist': 0,
 'Mostly Cloudy': 0,
 'Overcast': 0,
 'Partly Cloudy': 0,
 'Patches of Fog': 0,
 'Rain': 0,
 'Scattered Clouds': 0,
 'Shallow Fog': 0,
 'Snow': 0,
 'Squalls': 0,
 'Thunderstorm': 0,
 'Thunderstorms and Rain': 0,
 'Thunderstorms with Small Hail': 0,
 'Unknown': 0}

*** Frequency of measured weather conditions from 7/1/2012 to 3/1/2016, on hourly basis ***

In [126]:
for d in weather.date.unique():
    condi = weather[weather.date == d]['Conditions'].iloc[0]
    condic[condi] += 1

condic

{'Blowing Snow': 9,
 'Clear': 7312,
 'Fog': 729,
 'Haze': 377,
 'Heavy Rain': 326,
 'Heavy Snow': 34,
 'Heavy Thunderstorms and Rain': 67,
 'Ice Pellets': 7,
 'Light Drizzle': 984,
 'Light Freezing Drizzle': 27,
 'Light Freezing Rain': 128,
 'Light Ice Pellets': 40,
 'Light Rain': 6061,
 'Light Rain Showers': 3,
 'Light Snow': 1901,
 'Light Thunderstorms and Rain': 179,
 'Mist': 50,
 'Mostly Cloudy': 36032,
 'Overcast': 19924,
 'Partly Cloudy': 14058,
 'Patches of Fog': 24,
 'Rain': 989,
 'Scattered Clouds': 20917,
 'Shallow Fog': 28,
 'Snow': 107,
 'Squalls': 7,
 'Thunderstorm': 102,
 'Thunderstorms and Rain': 48,
 'Thunderstorms with Small Hail': 4,
 'Unknown': 28}

## Get frequency of collision per hour of specific weather condition

In [110]:
conditionCount = {}
for c in incidents.Conditions.unique():
    if (pd.notnull(c)):
        mask = ((incidents.Conditions == c))
        filtered_incidents = incidents[mask]
        conditionCount[c] = filtered_incidents.size
    
conditionCount

{'Blowing Snow': 884,
 'Clear': 1043732,
 'Fog': 116518,
 'Haze': 112200,
 'Heavy Rain': 82246,
 'Heavy Snow': 5644,
 'Heavy Thunderstorms and Rain': 21522,
 'Ice Pellets': 1496,
 'Light Drizzle': 229636,
 'Light Freezing Drizzle': 4760,
 'Light Freezing Rain': 30770,
 'Light Ice Pellets': 13090,
 'Light Rain': 1548632,
 'Light Rain Showers': 1326,
 'Light Snow': 479026,
 'Light Thunderstorms and Rain': 52258,
 'Mist': 9826,
 'Mostly Cloudy': 8756700,
 'Overcast': 3677100,
 'Partly Cloudy': 2912236,
 'Patches of Fog': 2346,
 'Rain': 267410,
 'Scattered Clouds': 5272788,
 'Shallow Fog': 3366,
 'Snow': 45628,
 'Squalls': 1564,
 'Thunderstorm': 29104,
 'Thunderstorms and Rain': 8670,
 'Thunderstorms with Small Hail': 1190,
 'Unknown': 6834}

In [140]:
# Calculate ratios
ratios = {}

for k,v in conditionCount.iteritems():
    conditionCountValue = conditionCount[k]
    weatherConditionCountValue = condic[k]
    ratio =  conditionCountValue / weatherConditionCountValue
    ratios[k] = ratio
    #print "%s: %s" % (k, ratio)
    
# Normalize ratios
#factor = 1.0/sum(ratios.itervalues())
#for k in ratios:
#    ratios[k] = ratios[k]*factor

reference = ratios["Mostly Cloudy"]

for k in ratios:
    ratios[k] = (ratios[k]/reference)*100

ratios

{'Blowing Snow': 40.416402424556189,
 'Clear': 58.735505884222093,
 'Fog': 65.767811542713218,
 'Haze': 122.46145194953262,
 'Heavy Rain': 103.81140835122848,
 'Heavy Snow': 68.305548893989737,
 'Heavy Thunderstorms and Rain': 132.17694868696702,
 'Ice Pellets': 87.938985495188177,
 'Light Drizzle': 96.02686983188994,
 'Light Freezing Drizzle': 72.542260762023915,
 'Light Freezing Rain': 98.915744515628035,
 'Light Ice Pellets': 134.6565715395069,
 'Light Rain': 105.13609507635935,
 'Light Rain Showers': 181.87381091050281,
 'Light Snow': 103.68713316546247,
 'Light Thunderstorms and Rain': 120.12894171441462,
 'Mist': 80.863894389438954,
 'Mostly Cloudy': 100.0,
 'Overcast': 75.941086320212492,
 'Partly Cloudy': 85.241468858291512,
 'Patches of Fog': 40.222092797515046,
 'Rain': 111.25748796850779,
 'Scattered Clouds': 103.72628205847508,
 'Shallow Fog': 49.465679341043348,
 'Snow': 175.46704115161378,
 'Squalls': 91.936212108605815,
 'Thunderstorm': 117.4087346450908,
 'Thunderstorms

In [None]:
# Plot ratios

df = pd.DataFrame(pd.Series(ratios, name="Collision Frequency (Normalized)").sort_values())
df.plot(kind='barh', figsize=(8,8))