In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import geojson as gj

In [2]:
# Create DF for LAPD_Divisions_geojson
lapd_file = "../static/data/LAPD_Divisions.geojson"
lapd_df = gpd.read_file(lapd_file)

In [3]:
lapd_df.head(100)

Unnamed: 0,FID,APREC,PREC,AREA,PERIMETER,SHAPE_Length,SHAPE_Area,geometry
0,1,MISSION,19,699582600.0,190817.495739,0.576177,0.006363,"MULTIPOLYGON (((-118.50736 34.33464, -118.5038..."
1,2,DEVONSHIRE,17,1346816000.0,175591.702871,0.534007,0.012249,"MULTIPOLYGON (((-118.50736 34.33464, -118.5066..."
2,3,FOOTHILL,16,1297385000.0,203717.879169,0.627755,0.011797,"MULTIPOLYGON (((-118.41581 34.29384, -118.4156..."
3,4,TOPANGA,21,909032600.0,155505.795458,0.470165,0.00826,"MULTIPOLYGON (((-118.63166 34.23858, -118.6315..."
4,5,WEST VALLEY,10,936405700.0,136316.899694,0.407795,0.008507,"MULTIPOLYGON (((-118.56229 34.22035, -118.5618..."
5,6,NORTH HOLLYWOOD,15,634601100.0,187366.252732,0.566582,0.005765,"MULTIPOLYGON (((-118.34264 34.11645, -118.3427..."
6,7,VAN NUYS,9,489695000.0,116288.308461,0.347249,0.004449,"MULTIPOLYGON (((-118.47325 34.21651, -118.4731..."
7,8,NORTHEAST,11,815602200.0,178460.722082,0.541347,0.007404,"MULTIPOLYGON (((-118.32904 34.15022, -118.3290..."
8,9,HOLLYWOOD,6,371835300.0,115808.747099,0.353721,0.003375,"MULTIPOLYGON (((-118.34695 34.11831, -118.3469..."
9,10,WEST LOS ANGELES,8,1803659000.0,295206.415546,0.897951,0.016368,"MULTIPOLYGON (((-118.51758 34.02506, -118.5176..."


In [4]:
# Load json to dataframe
year = '2013' #[2013,2014,2015,2016,2017,2018,2019]
crime_year = f"crime_{year}"
file_to_load = f'../static/data/{crime_year}.json'

crime_df = pd.read_json(file_to_load).T

In [5]:
# # Sanity Checks
# crime_df.head()
# crime_df.groupby('Year').LON.count()
# crime_df.groupby('Year').LON.value_counts()

In [6]:
# Aggregate by Area
grouped_df = crime_df.groupby(['LAPD_Area']).agg({'Crime_Type':['count']}) 
grouped_df.columns = ['Crime_Counts'] 
grouped_df.reset_index(level=['LAPD_Area'],inplace=True)

In [7]:
# # Sanity check data types
# grouped_df.dtypes
# grouped_df.head(100)

Unnamed: 0,LAPD_Area,Crime_Counts
0,1,6330
1,2,5947
2,3,10126
3,4,5353
4,5,6249
5,6,6684
6,7,6222
7,8,6367
8,9,6800
9,10,6082


In [8]:
# Load json to dataframe
year = [2013,2014,2015,2016,2017,2018,2019]

yearArea_df = pd.DataFrame(columns = ['Year', 'LAPD_Area', 'Crime_Counts'])

for y in year:
    crime_year = f"crime_{y}"
    file_to_load = f'../static/data/{crime_year}.json'
    
    crime_df = pd.read_json(file_to_load).T

    # Aggregate by Year and Area
    grouped_df = crime_df.groupby(['Year','LAPD_Area']).agg({'Crime_Type':['count']}) 
    grouped_df.columns = ['Crime_Counts'] 
    grouped_df.reset_index(level=['Year','LAPD_Area'],inplace=True)
    yearArea_df = yearArea_df.append(grouped_df, ignore_index = True)
    

In [9]:
# yearArea_df.head(100)

In [10]:
# Join grouped_df (crime) with LAPD_divsions - join by area
final_df = yearArea_df.merge(lapd_df, left_on='LAPD_Area', right_on='PREC')
# final_df = lapd_df.merge(grouped_df, left_on='PREC', right_on='LAPD_Area').head()
# final_df.head(100)

Unnamed: 0,Year,LAPD_Area,Crime_Counts,FID,APREC,PREC,AREA,PERIMETER,SHAPE_Length,SHAPE_Area,geometry
0,2013,1,6330,21,CENTRAL,1,1.367477e+08,64187.406589,0.190474,0.001240,"MULTIPOLYGON (((-118.25196 34.07419, -118.2518..."
1,2014,1,6987,21,CENTRAL,1,1.367477e+08,64187.406589,0.190474,0.001240,"MULTIPOLYGON (((-118.25196 34.07419, -118.2518..."
2,2015,1,8880,21,CENTRAL,1,1.367477e+08,64187.406589,0.190474,0.001240,"MULTIPOLYGON (((-118.25196 34.07419, -118.2518..."
3,2016,1,9279,21,CENTRAL,1,1.367477e+08,64187.406589,0.190474,0.001240,"MULTIPOLYGON (((-118.25196 34.07419, -118.2518..."
4,2017,1,10306,21,CENTRAL,1,1.367477e+08,64187.406589,0.190474,0.001240,"MULTIPOLYGON (((-118.25196 34.07419, -118.2518..."
...,...,...,...,...,...,...,...,...,...,...,...
95,2017,14,10006,17,PACIFIC,14,7.176129e+08,246934.321606,0.757182,0.006504,"MULTIPOLYGON (((-118.42224 33.91633, -118.4275..."
96,2018,14,10291,17,PACIFIC,14,7.176129e+08,246934.321606,0.757182,0.006504,"MULTIPOLYGON (((-118.42224 33.91633, -118.4275..."
97,2019,14,10496,17,PACIFIC,14,7.176129e+08,246934.321606,0.757182,0.006504,"MULTIPOLYGON (((-118.42224 33.91633, -118.4275..."
98,2013,15,8220,6,NORTH HOLLYWOOD,15,6.346011e+08,187366.252732,0.566582,0.005765,"MULTIPOLYGON (((-118.34264 34.11645, -118.3427..."


In [11]:
# creating function to output a geojson format
def data2geojson(df, filename):
    features = []
    insert_features = lambda X: features.append(
            gj.Feature(geometry=X["geometry"],
                       #gj.Point((X["LON"],X["LAT"])),  ## old code and no longer needed
                            properties=dict(crime_counts=X["Crime_Counts"])))
    df.apply(insert_features, axis=1)
    with open(filename, 'w', encoding='utf8') as fp:
        gj.dump(gj.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False)

In [13]:
outputFilePath = '../static/data/'
outputFileName = 'crime'
dispFileName = f'{outputFilePath}{outputFileName}.geojson'

data2geojson(final_df, dispFileName)