In [1]:
import numpy as np
import json
import bokeh
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.tile_providers import CARTODBPOSITRON, CARTODBPOSITRON_RETINA
from datetime import datetime

import math

In [10]:
def coords_to_plot(lat, lon):
    r_major = 6378137.000
    x = r_major * math.radians(lon)
    scale = x/lon
    y = 180.0/math.pi * math.log(math.tan(math.pi/4.0 + 
        lat * (math.pi/180.0)/2.0)) * scale
    x = int(x)
    y = int(y)
    return (x, y)

# Code violation cases

In [22]:
data = pd.read_csv('./seattle-code-violation-cases/code-violation-cases.csv')
data.head()

print('len:', len(data))
print('start:', data['Date Case Created'].min())
print('end:', data['Date Case Created'].max(), '\n')
print('nulls:')
print(data.isnull().sum())

data = data.loc[data['Case Type'] == 'TENANT RELOCATION ORDINANCE']
# data.loc[data['Case Type'] == 'TENANT RELOCATION ORDINANCE', 'Date Case Created'].sort_values()

data = data.dropna(subset=['Latitude', 'Longitude'])

print('nulls:')
print(data.isnull().sum())

len: 41485
start: 2003-09-17T00:00:00
end: 2018-04-27T00:00:00 

nulls:
Case Number                           0
Case Type                             0
Address                              45
Description                          61
Case Group                         1931
Date Case Created                     0
Last Inspection Date               2763
Last Inspection Result             2763
Status                                0
Permit and Complaint Status URL       0
Latitude                             87
Longitude                            87
Location                             10
dtype: int64
nulls:
Case Number                           0
Case Type                             0
Address                               0
Description                           2
Case Group                         1871
Date Case Created                     0
Last Inspection Date               2403
Last Inspection Result             2403
Status                                0
Permit and Complaint Status 

In [23]:
data['year'] = sorted(data['Date Case Created'].apply(lambda x: int(x[:4])))

In [24]:
for year in data['year'].unique():
    print(year, len(data.loc[data['year'] == year]))

2003 13
2004 207
2005 216
2006 227
2007 289
2008 120
2009 92
2010 61
2011 58
2012 108
2013 103
2014 184
2015 241
2016 210
2017 199
2018 75


In [37]:
def plot_by_year(df, year_col, lat_col, lon_col):
    
    p = figure(x_range=(-13630000, -13610000), y_range=(6038000, 6046000),
               x_axis_type="mercator", y_axis_type="mercator")
    p.add_tile(CARTODBPOSITRON_RETINA)
    
    for year in df[year_col].unique()[:3]:
        print(year)
        coords = df.loc[df['year'] == year, [lat_col, lon_col]]
        #print(coords.head())
        #coords['merc_coords'] = coords.apply(lambda x: coords_to_plot(x['lat_col'], x['lon_col']), axis=1).values
        coords = coords.apply(lambda x: coords_to_plot(x[lat_col], x[lon_col]), axis=1).values
        for coord in coords[:100]:
            
            #print(coord)
            
            x, y = coord[0], coord[1]

            p.circle(x = x, 
                     y = y,
                     size=20,
                     fill_alpha=.2,
                     #line_color = colors_dict[year],
                     #fill_color = colors_dict[year]
                    )
            
    output_notebook()
    show(p)

In [38]:
plot_by_year(data, 'year', 'Latitude', 'Longitude')

2003
2004
2005


In [None]:
p = figure(x_range=(-13630000, -13610000), y_range=(6038000, 6046000),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(CARTODBPOSITRON_RETINA)

colors_dict = {2014: '#008000', 2015: '#FF0000'}

for year in data['year'].unique():
    
    coords = data.loc[data['year'] == year, ['price', 'lat', 'long']]
    coords['merc'] = coords.apply(lambda x: coords_to_plot(x['lat'], x['long']), axis=1).values
    
    for coord in coords.index[:500]:
        
        x, y, price = coords.loc[coord]['merc'][0], coords.loc[coord]['merc'][1], coords.loc[coord]['price']

        p.circle(x = x,
                 y = y,
                 size=price/100000,
                 fill_alpha=.2,
                 line_color = colors_dict[year],
                 fill_color = colors_dict[year]
                )

output_notebook()
show(p)

# SPD Reports

In [None]:
data = pd.read_csv('./SPD_Reports.csv')
data['Offence Start Date'] = pd.to_datetime(data['Offense Start Date'])

print('len:', len(data))
print('start:', data['Offence Start Date'].min())
print('end:', data['Offence Start Date'].max(), '\n')
print('nulls:')
print(data.isnull().sum())

# 911 Incidents

In [None]:
# https://www.kaggle.com/sohier/seattle-police-department-911-incident-response (380 MB)

data = pd.read_csv('/Users/sam/Downloads/Seattle_Police_Department_911_Incident_Response.csv')

data.drop(columns=['Initial Type Description', 
                   'Initial Type Subgroup', 
                   'Initial Type Group', 
                   'At Scene Time'], inplace=True)

print('len:', len(data))

# Downsample
rand_ind = np.random.choice(data.index, 5000)
data = data.loc[rand_ind]

print('Downsample len:', len(data))
data['Event Clearance Date'] = pd.to_datetime(data['Event Clearance Date'])

print('start:', data['Event Clearance Date'].min())
print('end:', data['Event Clearance Date'].max(), '\n')
print('nulls:')
print(data.isnull().sum())

# King County House Sales

In [None]:
data = pd.read_csv('./kc_house_data.csv')

print(len(data))

data['year'] = data['date'].apply(lambda x: int(str(x)[:4]))
data['date'] = pd.to_datetime(data['date'])

print('start:', data['date'].min())
print('end:', data['date'].max(), '\n')

print('nulls:')
print(data.isnull().sum())

In [None]:
# p = figure(x_range=(-13630000, -13610000), y_range=(6038000, 6046000),
#            x_axis_type="mercator", y_axis_type="mercator")
# p.add_tile(CARTODBPOSITRON_RETINA)

# colors_dict = {2014: '#008000', 2015: '#FF0000'}

# for year in data['year'].unique():
    
#     coords = data.loc[data['year'] == year, ['price', 'lat', 'long']]
#     coords['merc'] = coords.apply(lambda x: coords_to_plot(x['lat'], x['long']), axis=1).values
    
#     for coord in coords.index[:500]:
        
#         x, y, price = coords.loc[coord]['merc'][0], coords.loc[coord]['merc'][1], coords.loc[coord]['price']

#         p.circle(x = x,
#                  y = y,
#                  size=price/100000,
#                  fill_alpha=.2,
#                  line_color = colors_dict[year],
#                  fill_color = colors_dict[year]
#                 )

# output_notebook()
# show(p)

In [None]:
def normalize(coords_price):
    cord_min = coords_price.min()
    cord_max = coords_price.max()
    def scale(cord):
        return (cord - cord_min)/(cord_max-cord_min)
    return coords_price.apply(lambda x: scale(x))

In [None]:
def make_json(row):
    return {'lat': row['lat'], 'long': row['long'], 'value': round(row['normed'], 6)} 

In [None]:
# Make sample data
coords = data.loc[data['year'] == 2014, ['price', 'lat', 'long']]
coords['normed'] = normalize(coords['price'])
coords = coords[:2000]

data = list(coords.apply(lambda x: make_json(x), axis=1).values)

with open('data.json', 'w') as fp:
    json.dump(data, fp)

# Not used

In [None]:
# # No lat/long
# data = pd.read_csv('/Users/sam/Downloads/seattle-use-of-force/use-of-force.csv')
# data

In [None]:
# # No lat/long
# data = pd.read_csv('/Users/sam/Downloads/seattle-rent-and-income-restricted-housing/rent-and-income-restricted-housing.csv')
# data

# Land use permits

In [None]:
data = pd.read_csv('/Users/sam/Downloads/seattle-land-use-permits/land-use-permits.csv')
len(data)

for col in [col for col in data.columns if 'Date' in col]:
    try:
        data[col] = pd.to_datetime(data[col])
    except:
        pass
    
    print(col, 'min:', data[col].min())
    print(col, 'max:', data[col].max())
    
data.dtypes

In [None]:
data.isnull().sum()

In [None]:
# Applied
# Decisions/Applied
# Issues/Decisions
# Is there Issues/Applied?

# Find denied permits

In [None]:
# Or find permits approved

In [None]:
# for col in ['Permit Type', 'Category', 'Value', 'Applicant Name', 'Contractor']:
#     print(col)
#     print(data[col].unique())

In [None]:
data['Category'].unique()

In [None]:
data.loc[data['Category'] == 'COMMERCIAL']

In [None]:
data.loc[data['Category'] == 'MULTIFAMILY']

In [None]:
data

In [None]:
# p = figure(x_range=(-13630000, -13610000), y_range=(6038000, 6046000),
#            x_axis_type="mercator", y_axis_type="mercator")
# p.add_tile(CARTODBPOSITRON_RETINA)

# colors_dict = {2014: '#008000', 2015: '#FF0000'}

# for year in data['year'].unique():
    
#     coords = data.loc[data['year'] == year, ['price', 'lat', 'long']]
#     coords['merc'] = coords.apply(lambda x: coords_to_plot(x['lat'], x['long']), axis=1).values
    
#     for coord in coords.index[:500]:
        
#         x, y, price = coords.loc[coord]['merc'][0], coords.loc[coord]['merc'][1], coords.loc[coord]['price']

#         p.circle(x = x,
#                  y = y,
#                  size=price/100000,
#                  fill_alpha=.2,
#                  line_color = colors_dict[year],
#                  fill_color = colors_dict[year]
#                 )

# output_notebook()
# show(p)