In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import pandas as pd
import geopandas as gpd
import folium
from lovelyrita.data import read_data, column_map
from lovelyrita.clean import get_datetime, clean, impute_missing_times

plt.style.use('seaborn')

# Load neighborhood shapes

In [3]:
# neighborhoods = gpd.read_file("/data/lovely-rita/ZillowNeighborhoods/ZillowNeighborhoods-CA.shp")
# neighborhood = neighborhoods.loc[(neighborhoods.Name == 'Chinatown') &
#                                  (neighborhoods.City == 'Oakland')]

In [4]:
# hand draw neighborhood boundary
geometry = Polygon([[-122.272536, 37.802353],
                    [-122.274339, 37.799497],
                    [-122.269414, 37.797573],
                    [-122.267536, 37.800388]])

neighborhood = gpd.GeoDataFrame({'geometry': [geometry,],
                                 'name': ['Chinatown',]}, crs={'init' :'epsg:4326'})

# Load citations

In [5]:
data_paths = ["/data/lovely-rita/new/2012complete-output.csv",
              "/data/lovely-rita/new/2013complete-output.csv",
              "/data/lovely-rita/new/2014complete-output.csv",
              "/data/lovely-rita/new/2015complete-output.csv",
              "/data/lovely-rita/new/2016complete-output-2.csv"
             ]

In [6]:
column_map['[latitude]'] = 'latitude'
column_map['[longitude]'] = 'longitude'

citations = []
for path in data_paths:
    print(path)
    cit = read_data(path, column_map=column_map)
    cit = clean(cit)
    citations.append(cit)

/data/lovely-rita/new/2012complete-output.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  datetimes[j] = interpolated_datetimes[i]


/data/lovely-rita/new/2013complete-output.csv
/data/lovely-rita/new/2014complete-output.csv
/data/lovely-rita/new/2015complete-output.csv
/data/lovely-rita/new/2016complete-output-2.csv


In [7]:
citations = pd.concat(citations).reset_index(drop=True)

citations.latitude = citations.latitude.astype('float32')
citations.longitude = citations.longitude.astype('float32')

citation_points = []
for x, y in zip(citations.latitude, citations.longitude):
    if not x == 0:
        citation_points.append(Point(y, x))
    else:
        citation_points.append(None)

citations['geometry'] = citation_points

In [8]:
citations.drop(['latitude', 'longitude'], axis=1, inplace=True)


In [None]:
citations = citations.loc[~citations.geometry.isnull()]
citations = gpd.GeoDataFrame(citations)

# Select Chinatown citations

In [None]:
neighb = neighborhood.geometry.iloc[0]

selected_indices = [neighb.contains(c)
                    for c in citations.geometry.values]

selected_citations = citations.loc[selected_indices]

In [None]:
# show neighborhood boundary on map
map = folium.Map([neighb.centroid.y, neighb.centroid.x], zoom_start=16)
map.choropleth(neighborhood.to_crs({'init': 'epsg:4326'}).to_json(), fill_opacity=0.1, line_weight=3)
map

In [None]:
order = selected_citations.groupby('violation_desc_long').street.count().sort_values(ascending=False).index

In [None]:
for year in [2012, 2013, 2014, 2015, 2016]:
    year_citations = selected_citations.loc[selected_citations.ticket_issue_datetime.dt.year == year]
    counts = year_citations.groupby('violation_desc_long').street.count()
    counts = counts[order]

    fig, ax = plt.subplots(figsize=(10, 6))
    ax = counts.plot(kind='bar', title='Citations by type in Chinatown ({})'.format(year), ylim=[0, 900])
    _ = ax.set_xlabel('Citation description')
    _ = ax.set_ylabel('Number of citations')
    fig.subplots_adjust(top=0.95, bottom=0.35)
    fig.savefig('/data/lovely-rita/figures/chinatown_citations_{}.png'.format(year))
    