# Load data

In [None]:
import json
import pandas as pd

In [17]:
with open('data/unprocessed/sf_censustracts.json', 'r') as f:
    zones = json.load(f)

In [51]:
bikes = pd.read_csv('data/unprocessed/bikes_2018_q1.csv')

Bounding box from data_loader:

In [53]:
SF_BBOX = {
    'lat_lower': 37.6373674248,
    'lon_lower': -122.5859431448,
    'lat_upper': 37.8138042743,
    'lon_upper': -122.3485049903,
}

# Make and export filtered zones

In [67]:
set(map(lambda feature: feature['geometry']['type'], zones['features']))

{'MultiPolygon'}

In [68]:
set(map(lambda feature: len(feature['geometry']['coordinates'][0]), zones['features']))

{1, 2, 3}

First we filter to only include zones in SF.

In [79]:
def in_bbox(coordinates):
    longtitude, lattitude = coordinates
    return (
        SF_BBOX['lat_lower'] < lattitude and lattitude < SF_BBOX['lat_upper'] and
        SF_BBOX['lon_lower'] < longtitude and longtitude < SF_BBOX['lon_upper']
    )

In [80]:
def feature_in_bbox(feature):
    multi_polygon = feature['geometry']['coordinates']
    for polygon in multi_polygon:
        exterior_ring = polygon[0]
        contained_in_bbox = map(in_bbox, exterior_ring)
        if any(contained_in_bbox):
            return True
    return False

In [81]:
features = zones['features']
sf_features = list(filter(feature_in_bbox, features))

In [82]:
len(features)

2710

In [100]:
len(sf_features)

235

Then we create the nice new geojson zones.

In [88]:
def parse_feature(feature):
    properties = feature['properties']
    return {
        'geometry': feature['geometry'],
        'properties': {
            'name': properties['DISPLAY_NAME'],
            'id': int(properties['MOVEMENT_ID']),
        },
        'type': 'Feature',
    }

In [96]:
sf_zones = {
    'type': 'FeatureCollection',
    'features': list(map(parse_feature, sf_features)),
}

In [99]:
with open('data/sf_zones.json', 'w') as f:
    json.dump(sf_zones, f)

# Make and export station csv

Again we start by filtering to the SF bounding box.

In [161]:
len(bikes)

312902

In [157]:
bikes_in_bbox = bikes.apply(lambda row: in_bbox([row['start_station_longitude'], row['start_station_latitude']]), axis=1)

In [159]:
bikes_in_sf = bikes[bikes_in_bbox]

In [162]:
len(bikes_in_sf)

233027

Now we can create the station csv.

In [163]:
bikes_latitudes = bikes_in_sf.groupby('start_station_id').start_station_latitude.unique()
len(bikes_latitudes)

124

In [164]:
bikes_longitudes = bikes_in_sf.groupby('start_station_id').start_station_longitude.unique()
len(bikes_longitudes)

124

In [165]:
station_ids = bikes_latitudes.index
sf_bikes = pd.DataFrame(columns=['id', 'latitude', 'longitude'])
for i, station_id in enumerate(station_ids):
    station_id = int(station_id)
    sf_bikes.loc[i] = [station_id, bikes_latitudes[station_id][0], bikes_longitudes[station_id][0]]
sf_bikes['id'] = sf_bikes['id'].astype('int')

In [166]:
len(sf_bikes)

124

In [168]:
sf_bikes.to_csv('data/sf_stations.csv', index=None)