# Constants

In [312]:
R_MIN = 0.25
MIN_LOG_LIKELIHOOD = 100
ALPHA_P = 0.1
NUM_MONTE_CARLO_STIMULATIONS = 9

# Cleaning

In [313]:
import pandas as pd
import ast
import math
from math import radians, cos, sin, asin, sqrt
import numpy as np
from math import pi
from bounding import MinimumBoundingBox

In [314]:
data = pd.read_csv('Crimes_-_2018.csv').dropna()

In [315]:
filtered_data = data[['Primary Type', 'Location']]

In [316]:
filtered_data['Primary Type'].value_counts()

THEFT                                64024
BATTERY                              49713
CRIMINAL DAMAGE                      27699
ASSAULT                              20342
DECEPTIVE PRACTICE                   17221
OTHER OFFENSE                        16949
NARCOTICS                            12796
BURGLARY                             11689
MOTOR VEHICLE THEFT                   9934
ROBBERY                               9677
CRIMINAL TRESPASS                     6881
WEAPONS VIOLATION                     5444
OFFENSE INVOLVING CHILDREN            2179
PUBLIC PEACE VIOLATION                1364
CRIM SEXUAL ASSAULT                   1342
INTERFERENCE WITH PUBLIC OFFICER      1305
SEX OFFENSE                           1054
PROSTITUTION                           717
HOMICIDE                               601
ARSON                                  373
LIQUOR LAW VIOLATION                   265
CRIMINAL SEXUAL ASSAULT                224
GAMBLING                               201
STALKING   

In [317]:
filtered_data['row_num'] = [i for i in range(1, len(filtered_data) + 1)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['row_num'] = [i for i in range(1, len(filtered_data) + 1)]


In [318]:
filtered_data

Unnamed: 0,Primary Type,Location,row_num
152,OTHER OFFENSE,"(41.896063478, -87.667902742)",1
154,ASSAULT,"(41.769399886, -87.690705813)",2
155,BATTERY,"(41.838706452, -87.72595418)",3
156,OFFENSE INVOLVING CHILDREN,"(41.896551396, -87.773787663)",4
425,BATTERY,"(42.004384227, -87.674295522)",5
...,...,...,...
268672,ASSAULT,"(41.897598907, -87.769139392)",262826
268673,ROBBERY,"(41.732730806, -87.585114125)",262827
268674,ROBBERY,"(41.734234372, -87.589291682)",262828
268681,OFFENSE INVOLVING CHILDREN,"(41.732935859, -87.608977015)",262829


In [319]:
robery_data = filtered_data[filtered_data['Primary Type'] == 'DECEPTIVE PRACTICE']

In [320]:
def calculate_distance(lat1, lat2, lon1, lon2):
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))
    # Radius of earth in kilometers. Use 3956 for miles
    r = 6371
    return c*r

def step_1(data_array):
    possible_circles = set()
    for i in data_array:
        for j in data_array:
            if i[1] != j[1]:
                if type(i[0]) != tuple:
                    loc = ast.literal_eval(i[0])
                    possible_circles.add((loc[0],loc[1],calculate_distance(loc[0], ast.literal_eval(j[0])[0], loc[1], ast.literal_eval(j[0])[1])))
                else:
                    possible_circles.add((i[0][0], i[0][1], calculate_distance(i[0][0], j[0][0], i[0][1], j[0][1])))
    return possible_circles

In [321]:
def helper_to_remove_overlaps(c1, l1, l2, c2):
    d = calculate_distance(c1[0],c2[0],c1[1],c2[1])
    if d < c1[2] + c2[2]:
        if l1 > l2:
            return c1, l1
        else:
            return c2, l2

def helper_for_Area_s(circles):
    lat_min = float('inf')
    long_min = float('inf')
    lat_max = float('-inf')
    long_max = float('-inf')
    
    
    for i in circles:
        if i[0] < lat_min:
            lat_min = i[0]
        if i[0] > lat_max:
            lat_max = i[0]
        if i[1] < long_min:
            long_min = i[1]
        if i[1] > long_max:
            long_max = i[1]
    return calculate_distance(lat_min, lat_min, long_min, long_max)*calculate_distance(lat_max, lat_max, long_max, long_min)
    
def step_2(step_1_circles, data_array,  monte_flag = False):
    circle_likelihood = {}
#     area_s = MinimumBoundingBox([ast.literal_eval(i[1]) for i in data_array]).area
#     area_s = 607.4
    area_s = helper_for_Area_s(step_1_circles)
    for i in step_1_circles:
        B = len(data_array)*(pi*i[2]*i[2])/area_s
        curr_c = 0
        curr_i = 0
        for j in data_array:
            if type(j[0]) != tuple:
                if calculate_distance(i[0],ast.literal_eval(j[0])[0] ,i[1], ast.literal_eval(j[0])[1]) < i[2]:
                    curr_c += 1
            else:
                if calculate_distance(i[0],j[0][0] ,i[1], j[0][1]) < i[2]:
                    curr_c += 1
        if curr_c >= B:
            curr_i = 1
        if curr_i == 0:
            likelihood = 0
        else:
            likelihood = math.log(((curr_c/B)**curr_c)*(((len(data_array)-curr_c)/(len(data_array)-B))**(len(data_array)-curr_c))*curr_i)
        circle_likelihood[i] = likelihood
    final_circles = {}
    max_like = float('-inf')
    max_like_circle = None
    f = list(circle_likelihood.items())
    for i in range(len(f)):
        for j in range(i+1,len(f)):
            if i != j:
                x = helper_to_remove_overlaps(f[i][0],f[i][1],f[j][1],f[j][0])
                if x != None:
                    if f[i][0] in final_circles or f[j][0] in final_circles:
                        pass
                    final_circles[x[0]] = x[1]
                    if x[1] > max_like:
                        max_like_circle = x[0]
    if monte_flag:
        return max_like_circle, max_like
    fin = {'Circle': [], 'Likelihood': []}
    # TODO : CHANGE IT TO FINAL CIRCLES DOWN
    for i, j in final_circles.items():
        fin['Circle'].append(i)
        fin['Likelihood'].append(j)
    pd.DataFrame(fin).to_csv('circle_hood.csv')
    return final_circles

In [322]:
def step_3(step2_candidate_circles):
    # Generate Monte Carlo Samples : For now lt samples be monte_datasets: an array
    monte_datasets = []
    likehood_orderslist = {}
    for i in range(NUM_MONTE_CARLO_STIMULATIONS):
        lat_data = np.random.uniform(data['Latitude'].min(), data['Latitude'].max(), 10)
        long_data = np.random.uniform(data['Longitude'].min(), data['Longitude'].max(), 10)
        curr_data = [(lat_data[i], long_data[i]) for i in range(10)]
        dic = {'Location': curr_data, 'row_num': [i for i in range(10)]}
        monte_datasets.append(pd.DataFrame(dic))
        
        
    for i in monte_datasets:
        possible_circles = step_1(i.values)
        final_circle, final_likelihood = step_2(possible_circles,i.values ,monte_flag=True)
        likehood_orderslist[final_circle] = final_likelihood
    final_solution_circles = []   
    sorted_likelihood_orders_list = sorted(likehood_orderslist.items(), key=lambda x: [x[1],x[0]], reverse=True)
    for i,j in step2_candidate_circles.items():
        for k in range(len(sorted_likelihood_orders_list)):
            if j > sorted_likelihood_orders_list[k][1]:
                p = (k+1)/(NUM_MONTE_CARLO_STIMULATIONS+1)
                print(p)
                if p <= ALPHA_P:
                    final_solution_circles.append(i)
    
    return final_solution_circles

In [323]:
def circular_satscan():
    solution = step_3(step_2(step_1(robery_data[['Location', 'row_num']].values[:10]), robery_data[['Location', 'row_num']].values[:10]))
    print(solution)
    return solution

In [324]:
pd.DataFrame(circular_satscan(), columns=['Lat', 'Long', 'R']).to_csv('Answers.csv')

0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
0.1
0.2
0.3
0.4
0.5
0.6
0.7


In [325]:
import gmplot
gmap3 = gmplot.GoogleMapPlotter('41.8781', '-87.6298',3)
df_ans =  pd.read_csv('Answers.csv')
print(df_ans)
gmap3.scatter(df_ans['Lat'].values, df_ans['Long'].values)
for i in range(len(df_ans)):
    gmap3.circle(df_ans.values[i][1], df_ans.values[i][2], df_ans.values[i][3], face_alpha=0.5, ew=3, color='red')
gmap3.draw("map_DECEPTIVE_PRACTICE.html")

    Unnamed: 0        Lat       Long            R
0            0  41.896970 -87.631251  1288.160026
1            1  41.878639 -87.627691   798.740407
2            2  41.896970 -87.631251   117.338378
3            3  41.693309 -87.621014  1327.072848
4            4  41.756605 -87.576096   442.106008
..         ...        ...        ...          ...
84          84  41.962393 -87.652340   394.569673
85          85  41.901581 -87.626877    32.395993
86          86  41.896970 -87.631251    32.395993
87          87  41.962393 -87.652340   421.414670
88          88  41.962393 -87.652340   413.018262

[89 rows x 4 columns]


In [326]:
data['Latitude'].values

array([41.89606348, 41.76939989, 41.83870645, ..., 41.73423437,
       41.73293586, 41.86565067])

In [327]:
circles = pd.read_csv('circle_hood.csv')

In [328]:
radians(42.4)

0.7400196028455958

In [329]:
data['Latitude'].min()

41.644589713

In [330]:
data['Latitude'].max()

42.022671246

In [331]:
data['Longitude'].min()

-87.934272688

In [332]:
data['Longitude'].max()

-87.524529378

In [333]:
np.random.uniform(data['Latitude'].min(), data['Latitude'].max(), 100)

array([41.97627406, 41.8845652 , 41.87371512, 41.82107279, 41.83186642,
       41.91547344, 41.77371105, 41.65613567, 41.68501024, 41.74537573,
       41.77733002, 41.8045801 , 41.71356969, 41.80872648, 41.97418559,
       41.95426912, 41.72701703, 41.98372163, 41.98057252, 41.99308277,
       41.79558796, 41.74201512, 41.66765307, 41.94081583, 41.93695216,
       41.92094375, 41.69778402, 41.89402146, 41.89707173, 41.83410282,
       41.73019443, 42.01517911, 41.67600767, 41.99577463, 41.94275525,
       41.70710168, 41.85695839, 41.87523027, 41.79298233, 41.78313292,
       41.74081471, 41.90535585, 41.89377611, 41.91649728, 41.88908838,
       41.75696048, 41.69224118, 41.95527297, 41.99607722, 41.75669145,
       41.91715297, 41.83780173, 41.74110376, 41.89517993, 41.781604  ,
       41.75850713, 41.81314802, 41.75001949, 41.75256402, 42.00809143,
       41.87383882, 41.93216489, 41.82224249, 41.8935136 , 41.76549369,
       41.8507861 , 41.86424542, 41.90718631, 41.69049288, 41.84

In [334]:
x = np.random.uniform(data['Longitude'].min(), data['Longitude'].max(), 100)

In [335]:
dic = {}

In [336]:
dic['Location'] = x

In [337]:
pd.DataFrame(dic)

Unnamed: 0,Location
0,-87.706915
1,-87.659140
2,-87.901197
3,-87.689250
4,-87.645801
...,...
95,-87.709535
96,-87.566052
97,-87.620796
98,-87.856966


In [338]:
robery_data

Unnamed: 0,Primary Type,Location,row_num
444,DECEPTIVE PRACTICE,"(41.896970377, -87.631250927)",20
445,DECEPTIVE PRACTICE,"(41.695014818, -87.648357778)",21
804,DECEPTIVE PRACTICE,"(41.693309015, -87.621013915)",55
920,DECEPTIVE PRACTICE,"(41.867414944, -87.627231186)",83
1038,DECEPTIVE PRACTICE,"(41.901581306, -87.626876922)",117
...,...,...,...
266952,DECEPTIVE PRACTICE,"(41.763132564, -87.629020819)",261555
266973,DECEPTIVE PRACTICE,"(41.819240331, -87.724212138)",261576
267014,DECEPTIVE PRACTICE,"(41.881858277, -87.724091496)",261617
267799,DECEPTIVE PRACTICE,"(41.677466116, -87.665367764)",261997


In [None]:
gmap3.