In [1]:
from geopy.geocoders import Nominatim
from shapely.geometry import Point, LineString
import geopandas as gpd
import folium 
import matplotlib.pyplot as plt
import random
import pandas as pd

In [2]:
df = pd.read_csv('/Users/pauliuskomskis/Desktop/github/greenzone/data/2019/alaska-clean.csv')

In [3]:
df.head().shape

(5, 13)

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,city,population,violent_crime,murder,rape,robbery,assault,property_crime,burglary,larceny,gta,arson
0,0,Anchorage,287731,3581,32,540,621,2388,12261,1692,9038,1531,93
1,1,Bethel,6544,130,1,47,3,79,132,20,84,28,12
2,2,Bristol Bay Borough,852,2,0,0,0,2,20,5,8,7,0
3,3,Cordova,2150,0,0,0,0,0,7,1,6,0,0
4,4,Craig,1313,7,0,0,0,7,20,5,12,3,0


In [5]:
city_list = df['city']
city_list.shape

(26,)

In [6]:
def get_coordinates(city_list):
    """Takes a list of cities and returns a dictionary of the cities and their corresponding coordinates."""
    geolocator = Nominatim(user_agent='location script')
    dicto = {}
    
    for city in city_list:
        try:
            location = geolocator.geocode(city)
        except:
            raise Exception("There was a problem with the getCoordinates function")
        coordinate_values = (location.longitude, location.latitude)  #in geopandas, the x value corresponds to the longitude while the y value, the latitude(Just in case you were wondering why it was *location.longitude, location.latitude* and not the other way round )
        dicto[city] = coordinate_values #adding the coordinate pair to the dictionary at the end of every loop
    return dicto #finally retruns the dict 

In [7]:
#getting coordinates for each city in the list
city_coords_dict = get_coordinates(city_list)
city_coords_dict

{'Anchorage': (-149.894852, 61.2163129),
 'Bethel': (-161.7558333, 60.7922222),
 'Bristol Bay Borough': (-156.8753867, 58.7370341),
 'Cordova': (-4.7760138, 37.8845813),
 'Craig': (-107.5464541, 40.5152491),
 'Dillingham': (-158.4575, 59.0397222),
 'Fairbanks': (-147.716675, 64.837845),
 'Haines': (-135.3430573, 59.0831232),
 'Juneau': (-134.419734, 58.3019496),
 'Kenai': (-151.258333, 60.5544444),
 'Ketchikan': (-131.6466819, 55.3430696),
 'Kodiak': (-152.4072222, 57.79),
 'Kotzebue': (-162.5977621, 66.8982057),
 'Nome': (-165.39879944316317, 64.4989922),
 'North Pole': (0.0, 90.0),
 'North Slope Borough': (-153.8220681, 69.5335129),
 'Palmer': (-149.1110902, 61.5995703),
 'Petersburg': (30.380619357025516, 59.917857350000006),
 'Seward': (-100.8928434, 37.1850331),
 'Sitka': (-135.337612, 57.0524973),
 'Skagway': (-135.3116103, 59.4556412),
 'Soldotna': (-151.065349, 60.4847228),
 'Unalaska': (-166.5272262, 53.8722824),
 'Valdez': (-146.3493638, 61.1299396),
 'Wasilla': (-149.4425638

In [8]:
len(city_coords_dict)

26

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,city,population,violent_crime,murder,rape,robbery,assault,property_crime,burglary,larceny,gta,arson
0,0,Anchorage,287731,3581,32,540,621,2388,12261,1692,9038,1531,93
1,1,Bethel,6544,130,1,47,3,79,132,20,84,28,12
2,2,Bristol Bay Borough,852,2,0,0,0,2,20,5,8,7,0
3,3,Cordova,2150,0,0,0,0,0,7,1,6,0,0
4,4,Craig,1313,7,0,0,0,7,20,5,12,3,0


In [10]:
# crime rate per 100,000 is made as follows:
#(crime volume / population) * 100,000


#### 1. Add a total crime column

In [11]:
df['total_crime'] = df.sum(axis=1)

In [12]:
df.head()

Unnamed: 0.1,Unnamed: 0,city,population,violent_crime,murder,rape,robbery,assault,property_crime,burglary,larceny,gta,arson,total_crime
0,0,Anchorage,287731,3581,32,540,621,2388,12261,1692,9038,1531,93,319508
1,1,Bethel,6544,130,1,47,3,79,132,20,84,28,12,7081
2,2,Bristol Bay Borough,852,2,0,0,0,2,20,5,8,7,0,898
3,3,Cordova,2150,0,0,0,0,0,7,1,6,0,0,2167
4,4,Craig,1313,7,0,0,0,7,20,5,12,3,0,1371


#### 2. Making a new data frame that has only the neccesary columns for further operations

In [13]:
new_df = df.filter(['city','total_crime','population'], axis=1)
new_df

Unnamed: 0,city,total_crime,population
0,Anchorage,319508,287731
1,Bethel,7081,6544
2,Bristol Bay Borough,898,852
3,Cordova,2167,2150
4,Craig,1371,1313
5,Dillingham,2624,2405
6,Fairbanks,34722,31493
7,Haines,2492,2441
8,Juneau,34996,31810
9,Kenai,8915,7862


### 3.  Add a column of the crime rate severity via (total crime / population) 

In [14]:
new_df['crime_index'] = (new_df['total_crime'] / new_df['population'])

In [15]:
new_df.round(5)

Unnamed: 0,city,total_crime,population,crime_index
0,Anchorage,319508,287731,1.11044
1,Bethel,7081,6544,1.08206
2,Bristol Bay Borough,898,852,1.05399
3,Cordova,2167,2150,1.00791
4,Craig,1371,1313,1.04417
5,Dillingham,2624,2405,1.09106
6,Fairbanks,34722,31493,1.10253
7,Haines,2492,2441,1.02089
8,Juneau,34996,31810,1.10016
9,Kenai,8915,7862,1.13394


#### Next step is to create a final dataframe with following columns:
#### LATITUDE, LONGTITUDE, CRIME_INDEX

In [16]:
#lets make the coordinates dictionary into dataframe and orient it for
#latitudes and longitudes
coords_df = pd.DataFrame.from_dict(city_coords_dict, orient='index')
coords_df

Unnamed: 0,0,1
Anchorage,-149.894852,61.216313
Bethel,-161.755833,60.792222
Bristol Bay Borough,-156.875387,58.737034
Cordova,-4.776014,37.884581
Craig,-107.546454,40.515249
Dillingham,-158.4575,59.039722
Fairbanks,-147.716675,64.837845
Haines,-135.343057,59.083123
Juneau,-134.419734,58.30195
Kenai,-151.258333,60.554444


In [17]:
coords_df.rename(columns={0: 'latitude', 1: 'longitude'}, inplace=True)

In [18]:
coords_df

Unnamed: 0,latitude,longitude
Anchorage,-149.894852,61.216313
Bethel,-161.755833,60.792222
Bristol Bay Borough,-156.875387,58.737034
Cordova,-4.776014,37.884581
Craig,-107.546454,40.515249
Dillingham,-158.4575,59.039722
Fairbanks,-147.716675,64.837845
Haines,-135.343057,59.083123
Juneau,-134.419734,58.30195
Kenai,-151.258333,60.554444


### join original dataframe row of 'crime_index' to new df of 'coordinates'

In [49]:
coords_df['crime_index'] = new_df['crime_index'].values

In [50]:
coords_df

Unnamed: 0,latitude,longitude,crime_index
Anchorage,-149.894852,61.216313,1.11044
Bethel,-161.755833,60.792222,1.08206
Bristol Bay Borough,-156.875387,58.737034,1.053991
Cordova,-4.776014,37.884581,1.007907
Craig,-107.546454,40.515249,1.044174
Dillingham,-158.4575,59.039722,1.09106
Fairbanks,-147.716675,64.837845,1.102531
Haines,-135.343057,59.083123,1.020893
Juneau,-134.419734,58.30195,1.100157
Kenai,-151.258333,60.554444,1.133935
