In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()


In [2]:
%%time
# ZIP Package
pkg = mp.open_package('http://library.metatab.org/cityiq.io-pedestrians-san_diego-1.zip')

resource = pkg.resource('pedestrians') # Get a resource
ped = resource.dataframe() # Create a pandas Dataframe

obj = mp.open_package('http://library.metatab.org/cityiq.io-objects-san_diego-5.csv') 
walkways = obj.resource('walkways').dataframe()

ped = ped.merge(walkways[['locationuid','community_name','roadsegid']], on='locationuid', how='left')

tod_intervals = {
    pd.Interval(0, 6, closed='left'): 'night',
    pd.Interval(6, 11, closed='left'): 'morning',
    pd.Interval(11,13, closed='left'): 'lunch',
    pd.Interval(13, 17, closed='left'): 'afternoon',
    pd.Interval(17, 24, closed='left'): 'evening'
}

ped['tod']  = pd.cut(ped.timestamp.dt.hour, pd.IntervalIndex(list(tod_intervals.keys())))
ped['dow'] = ped.timestamp.dt.dayofweek

# There are a lot of really large values that I suspect are probably wrong, so let's clip at the 99% percentile
ped['clipped_count'] = ped['count'].clip(0,ped['count'].quantile(.99))


CPU times: user 2min, sys: 9.97 s, total: 2min 10s
Wall time: 1min 59s


In [3]:
def augment(df):
    df['is_workweek'] = df.dow.apply(lambda v: v < 5)
    df['is_party_day'] = df.dow.apply(lambda v: (v==4 or v==5))
    df['is_dark'] = df.tod.apply(lambda v: (v.left == 17 or v.left == 0))
    df['is_evening'] = df.tod.apply(lambda v: v.left == 17 )
    df['is_party_time'] = (df.is_evening & df.is_party_day)
    df['is_work_time'] = (df.is_workweek & ~df.is_dark)
    df['tod_span'] = df.tod.apply(lambda v: v.length).astype(int)
    return df

In [4]:
roads_pkg = mp.open_package('http://library.metatab.org/sangis.org-roads-sandiego-1.zip')
roads_pkg
roads = roads_pkg.resource('all_roads').dataframe()

In [14]:
t = ped[['roadsegid','tod','dow', 'clipped_count']]
t = t.groupby(['roadsegid','tod','dow'])[['clipped_count']].sum()
t = augment(t.reset_index().copy())
t.head()

t2 = t.groupby(['roadsegid','is_party_time']).max()
t2['rate'] = t2['clipped_count'] / t2.tod_span
party_time = t2.loc[(slice(None), True),:].reset_index()[['roadsegid','rate']].rename(columns={'rate':'party_time_rate'}).set_index('roadsegid')

t2 = t.groupby(['roadsegid','is_work_time']).max()
t2['rate'] = t2['clipped_count'] / t2.tod_span
work_time = t2.loc[(slice(None), True),:].reset_index()[['roadsegid','rate']].rename(columns={'rate':'work_time_rate'}).set_index('roadsegid')

pw_time = party_time.join(work_time)

pw_time['pw_ratio'] = (pw_time.party_time_rate - pw_time.work_time_rate) / (pw_time.party_time_rate + pw_time.work_time_rate)

pw_time = pw_time.join(roads[['roadsegid','geometry']].set_index('roadsegid'))
pw_time.to_csv('party_work_segments.csv')

pw_time.head()

Unnamed: 0_level_0,party_time_rate,work_time_rate,pw_ratio,geometry
roadsegid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
232.0,2268.0,2303.0,-0.007657,LINESTRING (-117.163852180505 32.7198913484443...
235.0,2261.0,2261.0,0.0,LINESTRING (-117.1638103956177 32.713979602404...
236.0,1170.428571,1170.428571,0.0,LINESTRING (-117.1638139829518 32.714639219362...
237.0,3680.571429,3433.714286,0.034699,LINESTRING (-117.1637680999948 32.711467773428...
239.0,3996.0,3377.857143,0.083829,LINESTRING (-117.1637602392135 32.710400888587...


In [15]:
pw_time.party_time_rate.describe()

count     1341.000000
mean      1029.831256
std       1974.472004
min          0.142857
25%         82.285714
50%        297.142857
75%       1132.142857
max      28244.714286
Name: party_time_rate, dtype: float64

In [16]:
pw_time.work_time_rate.describe()

count     1346.000000
mean      1024.234133
std       1682.579363
min          0.428571
25%        115.464286
50%        397.357143
75%       1204.285714
max      21830.857143
Name: work_time_rate, dtype: float64

In [17]:
len(ped)

31502953

In [18]:
ped.locationuid.nunique()

2691