In [None]:
%pylab inline
import numpy as np
import pandas as pd

In [None]:
plt.style.use('bmh')

In [None]:
import seaborn as sns
sns.set()

# Loading data

In [None]:
d = pd.read_csv('data/accidents_2005_to_2007.csv.zip')

In [None]:
d.head()

In [None]:
d.info(memory_usage="deep")

In [None]:
d.Date.head()

In [None]:
d.Time.head()

In [None]:
d.loc[:, 'dt'] = d.Date.str.cat(d.Time, sep=' ', na_rep='00:00')

In [None]:
d.loc[:, 'date_time'] = pd.to_datetime(d.dt, dayfirst=True)

In [None]:
d.info()

In [None]:
d.columns

In [None]:
COLS = [u'Accident_Index', 'Longitude', 'Latitude', 'Accident_Severity', 'Number_of_Vehicles', 'Number_of_Casualties', 'Weather_Conditions',
       'Day_of_Week', u'Weather_Conditions', u'Road_Surface_Conditions', u'Special_Conditions_at_Site',
        u'Urban_or_Rural_Area', u'Carriageway_Hazards', 'date_time']

In [None]:
d.drop([c for c in d.columns if c not in COLS], axis=1, inplace=True)

In [None]:
d.set_index('date_time', inplace=True)

In [None]:
d.index

# Resampling time series

In [None]:
daily = d.resample('D').size()
daily

In [None]:
daily.index

In [None]:
daily.index.is_monotonic, daily.index.is_unique

In [None]:
plt.figure(figsize=(8,3), dpi=150)
daily.plot(ax=plt.gca(), linewidth=0.5)
(d.resample('W').size()/7.).plot(ax=plt.gca(), linewidth=1, color='firebrick')
plt.ylabel('accidents')
plt.xlabel('month');

In [None]:
plt.figure(figsize=(12,5))

d.resample('D').Number_of_Vehicles.mean().plot(ax=plt.gca())
d.resample('W').Number_of_Vehicles.mean().plot(ax=plt.gca(), color='firebrick')

plt.ylabel('vehicles involved')
plt.xlabel('month');

In [None]:
plt.figure(figsize=(12,6))

(d.groupby([pd.Grouper(freq='1M'), 'Urban_or_Rural_Area'])
 .size()
 .unstack()
 .plot(alpha=0.6, linewidth=2, ax=plt.gca(), kind='bar', stacked=True));

In [None]:
d.Weather_Conditions.value_counts()

In [None]:
plt.figure(figsize=(12,6))

(d.groupby([pd.Grouper(freq='1M'), 'Accident_Severity'])
 .size()
 .unstack()
 .plot(alpha=0.6, linewidth=2, ax=plt.gca(), kind='bar', stacked=True));

In [None]:
plt.figure(figsize=(12,6))

d.groupby('Accident_Severity').resample('W').size().T.plot(alpha=0.6, linewidth=2)

plt.legend(loc=0)

In [None]:
d.index.min()

In [None]:
d.groupby('Accident_Severity').resample('W').size().T

In [None]:
d.groupby('Accident_Severity').resample('W-MON').size().T

In [None]:
d.groupby('Weather_Conditions').resample('W').size()

# GeoPandas

In [None]:
import geopandas as gpd
from shapely.geometry import Polygon, box

In [None]:
gd = gpd.GeoDataFrame(d[["Accident_Index", "Accident_Severity", "Number_of_Vehicles",
                         "Number_of_Casualties", "Number_of_Casualties"]],
                      geometry=gpd.points_from_xy(*d[["Longitude", "Latitude"]].values.T))

In [None]:
?gd.plot

In [None]:
gd.iloc[:1000].plot(figsize=(12,12), markersize=1)

In [None]:
gd[gd.within(box(-0.2, 51.5, -0.18, 51.52))]