In [1]:
#Description:
#   Initial and simple analysis of FBI Crime Data prior to 7/11 class
#   Objective is to look at vicitim age, victim location (w/in US), and other related data to determine if there
#   are other angles the group should further explore. 
#
#Modification History:
#   DD-MMM-YYY  Author          Description
#   10-07-2019  Stacey Smith    INITIAL CREATION


In [33]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import csv

from pprint import pprint

# Import DATA.GOV API key
from api_keys import dg_key

# Output File (CSV)
output_data_file = "output_data/fbi_crime_data.csv"

In [34]:
#API Call to National-level victim-data-controller for homicide only 
# 
#FORMAT:  https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/{offense}/victim/national/{variable}?api_key=

age_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/age?api_key=" + dg_key
#count_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/count?api_key=" + dg_key
ethnicity_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/ethnicity?api_key=" + dg_key
sex_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/sex?api_key=" + dg_key
race_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/race?api_key=" + dg_key
relationship_query_url = "https://api.usa.gov/crime/fbi/sapi/api/data/nibrs/homicide/victim/national/relationship?api_key=" + dg_key



age_response = requests.get(age_query_url).json()
#count_response = requests.get(count_query_url).json()  ........... can't find any documentation on what the count means
ethnicity_response = requests.get(ethnicity_query_url).json()
sex_response = requests.get(sex_query_url).json()
race_response = requests.get(race_query_url).json()
relationship_response = requests.get(relationship_query_url).json()


In [66]:
pprint(race_response)

{'pagination': {'count': 27, 'page': 0, 'pages': 1, 'per_page': 0},
 'results': [{'american_indian': 17,
              'asian': 25,
              'black': 1607,
              'data_year': 2008,
              'native_hawaiian': 0,
              'unknown': 93,
              'white': 1508},
             {'american_indian': 28,
              'asian': 36,
              'black': 1815,
              'data_year': 2011,
              'native_hawaiian': 0,
              'unknown': 76,
              'white': 1581},
             {'american_indian': 6,
              'asian': 26,
              'black': 954,
              'data_year': 2003,
              'native_hawaiian': 0,
              'unknown': 52,
              'white': 1094},
             {'american_indian': 14,
              'asian': 35,
              'black': 1802,
              'data_year': 2006,
              'native_hawaiian': 0,
              'unknown': 66,
              'white': 1485},
             {'american_indian': 27,
             

In [62]:
#Age data looks pretty straightforward - count of victims by age, by year 

age_data = pd.DataFrame(age_response['results'])
age_data.sort_values(by='data_year', ascending=False)

age_data.head()

Unnamed: 0,data_year,range_0_9,range_10_19,range_20_29,range_30_39,range_40_49,range_50_59,range_60_69,range_70_79,range_80_89,range_90_99,unknown
0,2008,181,352,963,644,483,305,116,61,40,8,97
1,2011,191,334,1097,700,536,341,165,66,49,10,47
2,2003,112,211,655,395,359,173,69,55,31,7,65
3,2006,159,388,1007,651,535,324,112,73,36,6,111
4,2009,168,371,995,686,519,331,166,81,43,11,79


In [63]:
#Ethnicity data counts by hispanic vs. non-hispanic.  Not sure what multiple means or what ethnicities are encompassed by "unknown"
#Federal policy classifies "hispanic" as ethnicity, not race

eth_data = pd.DataFrame(ethnicity_response['results'])
eth_data.sort_values(by='data_year', ascending=False)

eth_data.head()

Unnamed: 0,data_year,hispanic,multiple,not_hispanic,unknown
0,2008,276,0,1908,545
1,2011,227,0,2236,604
2,2003,146,0,1469,214
3,2006,236,0,1937,624
4,2009,262,0,2136,477


In [65]:
# Sex data - male, female and unknown by year

sex_data = pd.DataFrame(sex_response['results'])
sex_data.sort_values(by='data_year', ascending=False)

sex_data.head()

Unnamed: 0,data_year,female_count,male_count,unknown
0,2008,773,2449,28
1,2011,904,2619,13
2,2003,634,1491,7
3,2006,829,2548,25
4,2009,863,2579,8


In [67]:
# Race data - american_indian, asian, black, native_hawaiian, and white by year.  Not sure what "unknown" encompasses.

race_data = pd.DataFrame(race_response['results'])
race_data.sort_values(by='data_year', ascending=False)

race_data.head()

Unnamed: 0,american_indian,asian,black,data_year,native_hawaiian,unknown,white
0,17,25,1607,2008,0,93,1508
1,28,36,1815,2011,0,76,1581
2,6,26,954,2003,0,52,1094
3,14,35,1802,2006,0,66,1485
4,27,31,1710,2009,0,54,1628
