### Import Dependencies

In [2]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist, euclidean

### Calculate Geometric Median for precincts in LA

In [9]:
# https://stackoverflow.com/questions/30299267/geometric-median-of-multidimensional-points
# https://www.pnas.org/content/pnas/97/4/1423.full.pdf

def geometric_median(X, eps=1e-5):
    y = np.mean(X, 0)

    while True:
        D = cdist(X, [y])
        nonzeros = (D != 0)[:, 0]

        Dinv = 1 / D[nonzeros]
        Dinvs = np.sum(Dinv)
        W = Dinv / Dinvs
        T = np.sum(W * X[nonzeros], 0)

        num_zeros = len(X) - np.sum(nonzeros)
        if num_zeros == 0:
            y1 = T
        elif num_zeros == len(X):
            return y
        else:
            R = (T - y) * Dinvs
            r = np.linalg.norm(R)
            rinv = 0 if r == 0 else num_zeros/r
            y1 = max(0, 1-rinv)*T + min(1, rinv)*y

        if euclidean(y, y1) < eps:
            return y1

        y = y1

In [108]:
def jsonFileLoader(file):
    #read json to df and transpose
    crime_df = pd.read_json(file).transpose()

    #group by precinct 
    gb = crime_df.groupby('LAPD_Area')    
    split_crime_df = [gb.get_group(x) for x in gb.groups]

    list1 = []
    geoJsonS = {}
    
    for i, e in enumerate(split_crime_df):
        prec_crime_np = split_crime_df[i][['LAT','LON']].to_numpy()
        
        y = list(geometric_median(prec_crime_np))
        x = str(split_crime_df[i]["LAPD_Area_Name"].values[0])
        
        geoJsonE = {}
        geoJsonE["type"] = "Feature"
        geoJsonE["geometry"] = {"type":'Point','coordinates':[y[1], y[0]]}
        geoJsonE['properties'] = {'name':x}
        
        list1.append(geoJsonE)
    geoJsonS["type"] = "FeatureCollection"    
    geoJsonS['features'] = list1
    return geoJsonS


In [None]:
for n in range(3,10):
    file_to_load = f'../static/data/crime_201{n}.json'

    fileName = f'crime_201{n}'
    f = open(f"../static/data/stats/{fileName}.geojson", "w")
    f.write(str(jsonFileLoader(file_to_load)))
    f.close()

In [89]:
file_to_load = "LA_Certificated_Dispensary_Locations.csv"

In [91]:
def csvFileLoader(file):
    disp_df = pd.read_csv(file_to_load)['LOCATION']
    return disp_df

In [92]:
csvFileLoader(file_to_load)

0      (33.7258, -118.2855)
1      (33.7744, -118.2629)
2      (33.7805, -118.2523)
3       (33.7816, -118.243)
4      (33.7871, -118.3081)
               ...         
415     (34.307, -118.4695)
416    (34.3075, -118.4614)
417    (34.3099, -118.4274)
418    (34.3106, -118.4755)
419    (34.3106, -118.4793)
Name: LOCATION, Length: 420, dtype: object