# San Diego CityIq Pedestrian Events

This dataset contains scraped pedestrian events from the San Diego CityIQ system, starting from October 2018.

Before building this package, the pedestrain events must be scraped and cached with: 

```
    $ ciq_events -t 20180801 -s ped
```

The dataset includes only the ``locationUid`` to identify the locations. Refer to the [San Diego City IQ Assets and Locations](https://data.sandiegodata.org/dataset/sandiego-gov-cityiq_objects) dataset for complete metadata for the locations.

In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()
this_package_name = 'cityiq.io-events-sandiego-pedevt.ipynb'

%load_ext autoreload
%autoreload 2
pd.__version__

'0.24.2'

In [2]:
pkg = mp.open_package(this_package_name)
pkg

In [3]:
%matplotlib inline
import cityiq as ciq
from cityiq.scrape import LocationEventScraper, PedLocationEventScraper
from datetime import datetime, timezone, timedelta
from dateutil.relativedelta import relativedelta
from dateutil.parser import parse as parse_dt
from cityiq import Config, CityIq
import gc

import warnings
warnings.simplefilter("ignore") # tqdm autonotebook experimental warnings

In [4]:
tz = datetime.now(timezone.utc).astimezone().tzinfo
start_time = parse_dt('2018-08-01').replace(tzinfo=tz)
end_time = datetime.utcnow().replace(tzinfo=timezone.utc).replace(day=1)
#end_time = start_time +  relativedelta(months=1)

config = Config()

s = PedLocationEventScraper(config, None, start_time, end_time)
#s.cache_csv_files()

In [8]:
%%time 
df = s.cached_dataframe()

df['time'] =  pd.to_datetime(df['time']/1000,unit='s')\
                .dt.tz_localize('UTC', ambiguous=True, nonexistent='shift_forward')\
                .dt.tz_convert('America/Los_Angeles')\
                .dt.tz_localize(None)\
                .apply(lambda v: v.replace(nanosecond=0, microsecond=0,  second=0, minute=int(v.minute/15)*15))

df.drop(columns=['direction','speed', 'index'], inplace=True)
df = df.groupby(['time','location_uid']).sum()
gc.collect() 

len(df)




KeyboardInterrupt: 

In [None]:
%time df.to_csv('pedestrians.csv')

In [7]:
if False: # Save for later
    import metapack as mp
    alpkg = mp.open_package('http://library.metatab.org/sandiego.gov-cityiq_objects-4.csv')

    locations = alpkg.resource('locations').dataframe()
    loc = locations[['locationuid','community_name']].rename(columns={'locationuid':'location_uid'})
    loc.head()