<a href="https://colab.research.google.com/github/osgoodeb/SafeR/blob/main/SafeR_coordinate_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import math
import csv

Data dimensions: (3361, 5)
Data preview: 
         lat      long category  quantity    year
0  28.53632  77.24920   murder       0.0  2021.0
1  28.61268  77.08600   murder       1.0  2021.0
2  28.52989  77.20418   murder       1.0  2021.0
3  28.64361  77.23608   murder       0.0  2021.0
4  28.70257  77.19369   murder       0.0  2021.0


In [None]:
# Reading the csv to a dataframe, deleting any bad columns, and previewing the data
data = pd.read_csv('MainDataset.csv')
for column in data.columns:
  if 'Unnamed' in column:
    data.drop(columns = [column], inplace=True)
print('Data dimensions:', np.shape(data))
print('Data preview: \n', data.head())

In [None]:
# Determining the number of zones needed
lat_min = np.min(data['lat'])
lat_max = np.max(data['lat'])
lat_zones = math.ceil(110.574 * (lat_max - lat_min))
long_min = np.min(data['long'])
long_max = np.max(data['long'])
lat_avg_rad = (lat_min + lat_max) / 2 * math.pi / 180
long_zones = math.ceil(111.320 * math.cos(lat_avg_rad) * (long_max - long_min))
print('Vertical zones:', lat_zones)
print('Horizontal zones:', long_zones)
print('Total zones:', lat_zones * long_zones)

Vertical zones: 43
Horizontal zones: 41
Total zones: 1763


In [None]:
#Determining the cutoff lat/longs for the zones
lat_per_km = 1 / 110.574
long_per_km = 1 / 111.320 * math.cos(lat_avg_rad)
lat_cutoffs = []
for zone_num in range(lat_zones + 1):
  lat_cutoffs.append(lat_min + lat_per_km * zone_num)
long_cutoffs = []
for zone_num in range(long_zones + 1):
  long_cutoffs.append(long_min + long_per_km * zone_num)
print('Latitude cutoffs:', lat_cutoffs)
print('Longitude cutoffs:', long_cutoffs)

Latitude cutoffs: [28.46549, 28.47453371732957, 28.483577434659143, 28.49262115198871, 28.501664869318283, 28.510708586647855, 28.519752303977427, 28.528796021307, 28.537839738636567, 28.54688345596614, 28.55592717329571, 28.564970890625283, 28.574014607954854, 28.583058325284423, 28.592102042613995, 28.601145759943567, 28.61018947727314, 28.61923319460271, 28.62827691193228, 28.63732062926185, 28.646364346591422, 28.655408063920994, 28.664451781250563, 28.673495498580134, 28.682539215909706, 28.691582933239278, 28.70062665056885, 28.70967036789842, 28.71871408522799, 28.727757802557562, 28.736801519887134, 28.745845237216706, 28.754888954546274, 28.763932671875846, 28.772976389205418, 28.78202010653499, 28.79106382386456, 28.80010754119413, 28.809151258523702, 28.818194975853274, 28.827238693182846, 28.836282410512418, 28.845326127841986, 28.854369845171558]
Longitude cutoffs: [76.91555, 76.92343256063715, 76.93131512127431, 76.93919768191147, 76.94708024254862, 76.95496280318578, 76.

In [None]:
# Finding year, crime, and (manual) crime score values while dropping bad entries
years = data['year'].unique()[~np.isnan(data['year'].unique())]
print('Years:', years)
crimes = data['category'].unique()[~pd.isnull(data['category'].unique())]
print('Crime types:', crimes)
crime_scores = [50, 25, 10, 5]
print('Crime scores:', crime_scores)

In [None]:
# Creating new dataframes for each zone/year combination and adding the corresponding data to them
region_dataframes = {}
region_scores = {}
region_bottom_left_cords = {}
region_top_right_cords = {}
column_names = [column for column in data.columns]
for year in years:
  for long in range(len(long_cutoffs) - 1):
    for lat in range(len(lat_cutoffs) - 1):
      region_dataframes['lat{0}_long{1}_year{2}_data'.format(lat, long, year)] = pd.DataFrame(columns = column_names)
      region_scores['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] = 0
      region_bottom_left_cords['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] = str(lat_cutoffs[lat]) + ',' + str(long_cutoffs[long])
      region_top_right_cords['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] = str(lat_cutoffs[lat + 1]) + ',' + str(long_cutoffs[long + 1])
      lat_indices = set(np.where(data['lat'] > lat_cutoffs[lat])[0]) and set(np.where(data['lat'] < lat_cutoffs[lat + 1])[0])
      long_indices = set(np.where(data['long'] > long_cutoffs[long])[0]) and set(np.where(data['long'] < long_cutoffs[long + 1])[0])
      year_indices = set(np.where(data['year'] == year)[0])
      region_indices = [crime for crime in lat_indices if crime in long_indices and crime in year_indices]
      for indice in region_indices:
          region_dataframes['lat{0}_long{1}_year{2}_data'.format(lat, long, year)].loc[len(region_dataframes['lat{0}_long{1}_year{2}_data'.format(lat, long, year)].index)] = data.iloc[indice]
          if data['category'][indice] == crimes[0]:
            region_scores['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] += (crime_scores[0] * data['quantity'][indice])
          elif data['category'][indice] == crimes[1]:
            region_scores['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] += (crime_scores[1] * data['quantity'][indice])
          elif data['category'][indice] == crimes[2]:
            region_scores['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] += (crime_scores[2] * data['quantity'][indice])
          elif data['category'][indice] == crimes[3]:
            region_scores['lat{0}_long{1}_year{2}_score'.format(lat, long, year)] += (crime_scores[3] * data['quantity'][indice])

In [None]:
# Exporting to csv
with open('Region Data.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerows([region_scores.keys(), region_scores.values(), region_bottom_left_cords.values(), region_top_right_cords.values()])