In [18]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests
from IPython.display import display, HTML
from ipywidgets import interact_manual, Button, Output
import json
import warnings

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

In [19]:
### The 'geocode' function uses the Nominatim API to geocode a given location
### and retrieve geolocation information in JSON format
def geocode(location):
    query_string = {'q' : location, 'format': 'json'}
    url = 'https://nominatim.openstreetmap.org/search' # define the URL for the OpenStreetMap Nominatim API
    response = requests.get(url, params = query_string) # send GET request to the API with query_string parameters
    response.raise_for_status() 
    geodata = response.json() # parse the response as JSON file, which contains geolocation data
    return geodata


### The 'GetOffenders' function retrieves information about offenders based on
### a specified latitude and longitude using the authentified FamilyWatchdog API
def GetOffenders(lat, lon):
    apikey = '3CA27D62-D057-4D9B-BF1F-F22C2906FE51'
    lite = 0
    miles = 1 # set radius miles for search
    type = 'searchbyspecificlatlong'
    url = 'https://services.familywatchdog.us/rest/json.asp?' # URL for the API endpoint
    # query_string with defined parameters for the API request
    query_string = {
        'key' : apikey, 
        'lite' : lite, 
        'type' : type,
        'lat' : lat, 
        'long' : lon, 
        'miles' : miles 
    }
    response = requests.get(url, params = query_string)
    response.raise_for_status()
    dataset = response.json()
    return dataset

### call the GetOffenders function with New York City's latitude and longitude
fofo = GetOffenders(40.7127281, -74.0060152)


### The 'write_json' function writes data in JSON format
### by using the json.dump function converts python objects to json 
### It returns the file saving it into Jupyter NB folder
def write_json(data, filename):
    with open(filename, 'w') as f:
        file = json.dump(data, f, indent=4)
    return file


### The 'getOffenderInfo' function processes offender information from JSON file saved in Jupyter NB folder
### executed in previous function above
def getOffenderInfo(file):
    new_dict = [] # create an empty list to store offender information
    myjson = open(file, 'r') # open the JSON file in read mode
# citation; recieved tons of guidance on extracting data from a json file here: 
# https://www.youtube.com/watch?v=aj4L7U7alNU
    jsondata = myjson.read() 
    obj = json.loads(jsondata) # convert the JSON data into a Python object
    list= (obj['offenders'])  # access the 'offenders' list from parsed 'obj' data
    ## Iterate over each offender in the list
    for off in list:
        ## check if there are convictions and indexed for first key
        if off['convictions'] != []:
            off_charges = off['convictions'][0]['charge']
        else: 
            off_charges = []
        ## extract values in the description key of the first marking if markings exist
        if off['markings'] != []:
            off_markings = off['markings'][0]['description']
        else:
            off_markings = []
        ## extract personally identifiable infomation of offenders in the list
        name = off['name']
        dob = off['dob']
        sex = off['sex']
        race = off['race']
        hair = off['hair']
        height = off['height']
        weight = off['weight']
        convictlevel = off['convictiontype']
        homeaddress = off['street1'], off['city'], off['state'], off['zipcode']
        photo = off['photo']
        latitude = off['latitude']
        longitude = off['longitude']
        ## create a dictionary for each offender and insert elements into the list
        new_dict.append({'name': name, 
                         'DOB': dob, 
                         'sex': sex, 
                         'race': race, 
                         'hair': hair, 
                         'height': height, 
                         'weight': weight, 
                         'convictlevel': convictlevel, 
                         'homeaddress': homeaddress,
                         'charges': off_charges, 
                         'markings': off_markings, 
                         'photo': photo, 
                         'latitude': latitude, 
                         'longitude': longitude})
        ## write (using 'w') the collected offender information to a new JSON file 
        ## saves JSON file into Jupyter NB folder
        with open('offenderinformation_data.json', 'w') as f:
            newfile = json.dump(new_dict, f, indent=4)
    return newfile
    

### 'GetLocation' defines an interactive widget for the user to input a location
display(HTML('<h2>*** Run for a saved dataset of offenders within your city ***</h2>'))
@interact_manual(location="") 
def GetLocation(location):
    loco = geocode(location)
    for coord in loco:
        ## extract latitude and longitude from the coordinates
        lat = coord['lat']
        lon = coord['lon']
        fofo = GetOffenders(lat, lon)
        beo = write_json(fofo, 'original_offenders_dataset.json')
        ## process and extract location of offender from the saved JSON file
        ric = getOffenderInfo('original_offenders_dataset.json')
        return ric

  @interact_manual(location="")


interactive(children=(Text(value='', description='location'), Button(description='Run Interact', style=ButtonS…

In [20]:
import pandas as pd
# citation; recieved guidance on json pandas dataframe from: https://www.youtube.com/watch?v=RRSJjxJhVEM&t=352s

## define the variable to the JSON file containing offender information
off_file = 'offenderinformation_data.json'
with open(off_file) as f:
    data = json.load(f)

## created an empty DataFrame with specified column names 
all_offenders = pd.DataFrame(columns=['Name', 
                                      'DOB', 
                                      'Sex', 
                                      'Race', 
                                      'Hair', 
                                      'Height', 
                                      'Weight', 
                                      'Conviction Level', 
                                      'Home Address', 
                                      'Charge(s)', 
                                      'Marking(s)', 
                                      'Photo', 
                                      'Latitude', 
                                      'Longitude'])

## iterate over each entry in the JSON data
for i in range(0, len(data)):
    currentitem = data[i]
    ## extract information for each offender in JSON file and populate the curated dataframe
    all_offenders.loc[i] = [data[i]['name'], 
                            data[i]['DOB'], 
                            data[i]['sex'], 
                            data[i]['race'], 
                            data[i]['hair'], 
                            data[i]['height'], 
                            data[i]['weight'], 
                            data[i]['convictlevel'], 
                            data[i]['homeaddress'], 
                            data[i]['charges'], 
                            data[i]['markings'], 
                            data[i]['photo'], 
                            data[i]['latitude'], 
                            data[i]['longitude']]

display(HTML('<h2>************************************ A dataframe of each offender ************************************</h2>'))    
    
# display the dataframe of each offender from user input location containing offender information 
all_offenders

Unnamed: 0,Name,DOB,Sex,Race,Hair,Height,Weight,Conviction Level,Home Address,Charge(s),Marking(s),Photo,Latitude,Longitude
0,"GILLIAM, ERIC, M",1977-08-17,M,W,Black,,,3,"[100 BLOCK OF 2ND AVENUE, , AR, ]",SEXUAL ASSAULT - 2ND DEGREE,Tattoo Back,http://photo.familywatchdog.us/OffenderPhoto/O...,40.7271708,-73.9885098
1,"DOLLMAN, DANIEL, SCOTT",1970-09-21,M,W,Blonde,,,5,"[OUT OF STATE, HEMAN, AR, ]",LEWD OR LASCIVIOUS ACTS W / CHILD UNDER 14,"Tattoo Left Shld, Tattoo Right Shld, Tattoo Ri...",http://photo.familywatchdog.us/OffenderPhoto/O...,40.71304703,-74.0072326
2,ABRAHAM MALDONADO JR,07/23/1981,Male,White,Unknown/bald,5ft 06in,210,1,"[Last Reported Address - Out of State, New Yor...","SEX OFFENSE, OTHER STATE (Gross Sexual Imposit...",Tattoo - Right Arm,http://photo.familywatchdog.us/OffenderPhoto/O...,40.71648,-73.98845
3,MICHAEL BERTOLLINI,06/05/1983,Male,White,Brown,5ft 07in,135,5,"[Last Reported Address - Out of State, New Yor...","SEX OFFENSE, OTHER STATE (SEXUAL ABUSE 1ST DEG...",Tattoo - Left Arm,http://photo.familywatchdog.us/OffenderPhoto/O...,40.71648,-73.98845
4,Irving Hiram Lamboy,03/09/1971,Male,White,Black,5ft 07in,148,5,"[Last Reported Address - Out of State, NEW YOR...","LEWD,LASCIVIOUS CHILD U/16; F.S. 800.04 (PRINC...",[],http://photo.familywatchdog.us/OffenderPhoto/O...,40.7217861,-74.0094471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,"VIERA, JORGE",04/12/1983,MALE,WHITE,BROWN,5ft 11in,215,3,"[000 HOMELESS, NEW YORK CITY, NY, ]",SEXUAL BATTERY,"TATTOO, ARM, LE",http://photo.familywatchdog.us/OffenderPhoto/O...,40.71455,-74.00714
462,"BAYER, BRAD LEE",09/04/1980,MALE,WHITE,BROWN,5ft 09in,130,5,"[HOMELESS, NEW YORK CITY, NY, ]",SOLICITATION OF A MINOR: CLASS B OR C FELONY,"SCAR, GROIN AREA",http://photo.familywatchdog.us/OffenderPhoto/O...,40.71455,-74.00714
463,KEVIN DENNIS EALEY,11/6/1960,Male,Black,Gray or Partially Gray,6FT 1IN,185,1,"[8 EAST THIRD STREET, NEW YORK, NY, 10028]",Rape,[],http://photo.familywatchdog.us/OffenderPhoto/O...,40.725928,-73.99095
464,"DERUSHIA, ELVIS ELLSWORTH",09/06/1963,MALE,WHITE,BROWN,5ft 03in,132,1,"[18 WILLOW STREET, MECINA, NY, ]",SODOMY 2ND/D FELONY,"TATTOO, ARM, LE",http://photo.familywatchdog.us/OffenderPhoto/O...,40.7143528,-74.0059731


In [21]:
## created a column 'SexCat' for sex category and initializing all its values NaN if null
## np.nan from numpy
all_offenders['SexCat'] = np.nan 
    
## map variations of sex categories to 'Male' or 'Female' in the 'SexCat' column
## Accessed data values in sex dictionary key in JSON file; variation in data is 'Male', 'M', 'MALE', 'Female'
all_offenders['SexCat'][ all_offenders['Sex'] == 'Male'] = 'Male'
all_offenders['SexCat'][ all_offenders['Sex'] == 'M'] = 'Male'
all_offenders['SexCat'][ all_offenders['Sex'] == 'MALE'] = 'Male'
all_offenders['SexCat'][ all_offenders['Sex'] == 'Female'] = 'Female'

all_offenders['SexCat'].value_counts() # generate a count of each value in the 'SexCat' column; Male or Female

### filtered the offenders in the dateframe by risk level
risk1 = all_offenders[ all_offenders['Conviction Level'] == '1']
risk1['SexCat'].value_counts()

risk2 = all_offenders[ all_offenders['Conviction Level'] == '2']
risk2['SexCat'].value_counts()

risk3 = all_offenders[ all_offenders['Conviction Level'] == '3']
risk3['SexCat'].value_counts()

risk4 = all_offenders[ all_offenders['Conviction Level'] == '4']
risk4['SexCat'].value_counts()

### generated a count using value.counts() of offenders by male or female and risk level
sex_series = all_offenders['SexCat'].value_counts()
risk1_series = risk1['SexCat'].value_counts()
risk2_series = risk2['SexCat'].value_counts()
risk3_series = risk3['SexCat'].value_counts()
risk4_series = risk4['SexCat'].value_counts()

## created a dataframe summarizing sex category counts for all offenders in each risk level
sex_df = pd.DataFrame( {'All' :sex_series, 
                        'Risk Level 1' :risk1_series, 
                        'Risk Level 2' :risk2_series,
                        'Risk Level 3' :risk3_series,
                        'Risk Level 4' :risk4_series})

display(HTML('<h2>***************** How do female and male offenders compare on their risk level? *****************</h2>'))

# display the dataframe summary
sex_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_offenders['SexCat'][ all_offenders['Sex'] == 'Male'] = 'Male'


Unnamed: 0_level_0,All,Risk Level 1,Risk Level 2,Risk Level 3,Risk Level 4
SexCat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,5,2,,,
Male,459,211,1.0,12.0,77.0


In [22]:
## created a column 'RaceCat' for race category and initializing all its values NaN if null
all_offenders['RaceCat'] = np.nan
    
all_offenders['RaceCat'][ all_offenders['Race'] == 'White'] = 'White'
all_offenders['RaceCat'][ all_offenders['Race'] == 'W'] = 'White'
all_offenders['RaceCat'][ all_offenders['Race'] == 'Black'] = 'Black'
all_offenders['RaceCat'][ all_offenders['Race'] == 'Asian'] = 'Asian'

risk_1 = all_offenders[ all_offenders['Conviction Level'] == '1']
risk_1['RaceCat'].value_counts()

risk_2 = all_offenders[ all_offenders['Conviction Level'] == '2']
risk_2['RaceCat'].value_counts()

risk_3 = all_offenders[ all_offenders['Conviction Level'] == '3']
risk_3['RaceCat'].value_counts()

risk_4 = all_offenders[ all_offenders['Conviction Level'] == '4']
risk_4['RaceCat'].value_counts()

### generated a count using value.counts() of offenders by ethnic background and risk level
race_series = all_offenders['RaceCat'].value_counts()
risk1_seq = risk_1['RaceCat'].value_counts()
risk2_seq = risk_2['RaceCat'].value_counts()
risk3_seq = risk_3['RaceCat'].value_counts()
risk4_seq = risk_4['RaceCat'].value_counts()

## created a dataframe summarizing race category counts for all offenders in each risk level
race_df = pd.DataFrame( {'All' :race_series, 
                         'Risk Level 1' :risk1_seq, 
                         'Risk Level 2' :risk2_seq,
                         'Risk Level 3' :risk3_seq,
                         'Risk Level 4' :risk4_seq})

display(HTML('<h2>************* How does race/ethnic background come into play for conviction level? ***********</h2>'))

# display the dataframe summary
race_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_offenders['RaceCat'][ all_offenders['Race'] == 'White'] = 'White'


Unnamed: 0_level_0,All,Risk Level 1,Risk Level 2,Risk Level 3,Risk Level 4
RaceCat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Asian,7,3,,,1
Black,191,99,1.0,4.0,45
White,232,96,,7.0,22


In [14]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [15]:
!pip install folium



In [23]:
import pandas as pd
import folium

# citation; recieved guidance with this part of the program from:
# https://www.youtube.com/watch?v=FdqDgoG-SFM&t=891s

### 'set_marker_color' function defines the allocated marker color based on risk level
def set_marker_color(row):
    if row['Conviction Level'] == '1':
        return 'green'
    elif row['Conviction Level'] == '2':
        return 'blue'
    elif row['Conviction Level'] == '3':
        return 'pink'
    elif row['Conviction Level'] == '4':
        return 'purple'
    
# applied the 'set_marker_color' function to create a new 'color' column in all_offenders dataframe
all_offenders['color'] = all_offenders.apply(set_marker_color, axis=1)

# define the center coordinates of the US
CENTER_US = (39.8333333,-98.585522)

# created a folium Map object centered at 'CENTER_US' with a zoom level of 4
off_map = folium.Map(
    location=CENTER_US,
    zoom_start=4
)

### iterate over each row in the all_offenders dataframe (*refer to 3rd chunk)
for _, indiv in all_offenders.iterrows():
    ## created a folium Marker for each offender with specific location, popup, tooltip, and icon
    folium.Marker(
        location=[indiv['Latitude'], indiv['Longitude']],
        popup=(indiv['Name'], indiv['Charge(s)'], indiv['Home Address']),
        tooltip=(indiv['Name'], indiv['Charge(s)'], indiv['Home Address']),
        icon=folium.Icon(color=indiv['color'], prefix='fa', icon='circle'),
    ).add_to(off_map)
    

### displayed a header representing a map key indicating offenders in the area and the corresponding risk levels
display(HTML('<h2>**************************************** Offenders in your area ****************************************</h2>'))    
display(HTML('<body> Risk Level 1: green </body>'))
display(HTML('<body> Risk Level 2: blue </body>'))
display(HTML('<body> Risk Level 3: pink </body>'))
display(HTML('<body> Risk Level 4: purple </body>'))
            
## save the map into HTML file in Jupyter NB folder
off_map.save('nearest_offenders_in_the_area.html')

## display the offenders map of user input location with a radius of 
off_map

  icon=folium.Icon(color=indiv['color'], prefix='fa', icon='circle'),
