In [5]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv

## Testing API calls

In [6]:
load_dotenv()
api_key = os.getenv("DATA_GOV_API_KEY")

In [7]:
base = 'https://api.usa.gov/crime/fbi/cde/'
query = 'summarized/agency/AZ0072300/violent-crime?from=2018&to=2022&API_KEY='
key = api_key

In [8]:
response = requests.get(base + query + key)
data = response.json()
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,ori,data_year,offense,cleared,actual
0,AZ0072300,2018,violent-crime,3932,12111
1,AZ0072300,2019,violent-crime,3749,11803
2,AZ0072300,2020,violent-crime,3818,13646
3,AZ0072300,2022,violent-crime,3707,13515


crime rate = incident count / population * 100000

## Getting crime count for each city

In [10]:
# import agencies.csv
agencies = pd.read_csv('../cleansed data/agencies.csv')

In [12]:
agencies

Unnamed: 0,ori,agency_name,state_name,agency_type_name
0,AL0470100,Huntsville,Alabama,City
1,AK0010100,Anchorage,Alaska,Municipality
2,AZ0072300,Phoenix,Arizona,City
3,AR0600200,Little Rock,Arkansas,City
4,CA0194200,Los Angeles,California,City
...,...,...,...,...
227,VT0020100,Bennington,Vermont,City
228,WA0170200,Bellevue,Washington,City
229,WV0350100,Wheeling,West Virginia,City
230,WI0520200,Racine,Wisconsin,City


In [14]:
crime_count = pd.DataFrame()

base = 'https://api.usa.gov/crime/fbi/cde/'
key = api_key

dfs = []  # Create an empty list to store DataFrames

for ori in agencies['ori']:
    query = 'summarized/agency/' + ori + '/violent-crime?from=2018&to=2022&API_KEY='
    
    response = requests.get(base + query + key)
    data = response.json()
    df = pd.DataFrame(data) 
    
    dfs.append(df)  # Append each DataFrame to the list

crime_count = pd.concat(dfs, axis=0)  # Pass the list of DataFrames to pd.concat()


In [15]:
crime_count

Unnamed: 0,ori,data_year,offense,cleared,actual
0,AL0470100,2018,violent-crime,986,1423
1,AL0470100,2019,violent-crime,597,872
2,AL0470100,2020,violent-crime,485,701
3,AL0470100,2021,violent-crime,26,28
4,AL0470100,2022,violent-crime,82,654
...,...,...,...,...,...
0,WY0190200,2018,violent-crime,14,43
1,WY0190200,2019,violent-crime,18,44
2,WY0190200,2020,violent-crime,19,58
3,WY0190200,2021,violent-crime,14,50


In [37]:
# join agencies and crime_count on ori
crime_data = pd.merge(crime_count, agencies, on='ori')

In [38]:
city_population = pd.read_csv('../cleansed data/city_population.csv')

In [43]:
final_crime_data = crime_data.merge(city_population, 
                               left_on=['agency_name', 'state_name'], 
                               right_on=['City', 'State'],
                               how='inner')

In [44]:
final_crime_data = final_crime_data[['ori', 'agency_name', 'state_name', 
                         'data_year', 'offense', 'Population', 'cleared', 'actual']]

In [45]:
final_crime_data

Unnamed: 0,ori,agency_name,state_name,data_year,offense,Population,cleared,actual
0,AL0470100,Huntsville,Alabama,2018,violent-crime,215006,986,1423
1,AL0470100,Huntsville,Alabama,2019,violent-crime,215006,597,872
2,AL0470100,Huntsville,Alabama,2020,violent-crime,215006,485,701
3,AL0470100,Huntsville,Alabama,2021,violent-crime,215006,26,28
4,AL0470100,Huntsville,Alabama,2022,violent-crime,215006,82,654
...,...,...,...,...,...,...,...,...
1111,WY0190200,Rock Springs,Wyoming,2018,violent-crime,23082,14,43
1112,WY0190200,Rock Springs,Wyoming,2019,violent-crime,23082,18,44
1113,WY0190200,Rock Springs,Wyoming,2020,violent-crime,23082,19,58
1114,WY0190200,Rock Springs,Wyoming,2021,violent-crime,23082,14,50


In [48]:
final_crime_data['state_name'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
       'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan',
       'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska',
       'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming', 'North Carolina'],
      dtype=object)

In [49]:
final_crime_data.dtypes

ori            object
agency_name    object
state_name     object
data_year       int64
offense        object
Population     object
cleared         int64
actual          int64
dtype: object

In [54]:
# remove , at the end in Population
final_crime_data['Population'] = final_crime_data['Population'].str.replace(',', '')

In [56]:
# remove [2] at the end 
# eg. 99600[2] to 99600
final_crime_data['Population'] = final_crime_data['Population'].str.replace('[2]', '')

In [57]:
final_crime_data['Population'] = final_crime_data['Population'].astype(float)
final_crime_data['Population'] = final_crime_data['Population'].astype(int)

In [58]:
# create a new column calculating crime rate
final_crime_data['crime_rate'] = final_crime_data['actual'] / final_crime_data['Population'] * 100000

In [59]:
final_crime_data

Unnamed: 0,ori,agency_name,state_name,data_year,offense,Population,cleared,actual,crime_rate
0,AL0470100,Huntsville,Alabama,2018,violent-crime,215006,986,1423,661.841995
1,AL0470100,Huntsville,Alabama,2019,violent-crime,215006,597,872,405.570077
2,AL0470100,Huntsville,Alabama,2020,violent-crime,215006,485,701,326.037413
3,AL0470100,Huntsville,Alabama,2021,violent-crime,215006,26,28,13.022892
4,AL0470100,Huntsville,Alabama,2022,violent-crime,215006,82,654,304.177558
...,...,...,...,...,...,...,...,...,...
1111,WY0190200,Rock Springs,Wyoming,2018,violent-crime,23082,14,43,186.292349
1112,WY0190200,Rock Springs,Wyoming,2019,violent-crime,23082,18,44,190.624729
1113,WY0190200,Rock Springs,Wyoming,2020,violent-crime,23082,19,58,251.278052
1114,WY0190200,Rock Springs,Wyoming,2021,violent-crime,23082,14,50,216.619010


In [None]:
# output to csv
final_crime_data.to_csv('../cleansed data/fbi_crime_data.csv', index=False)