# fbiPy - Prisoners in the US
----

## Dataset: FBI

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import scipy.stats as st
from datetime import date
import json
# Import API key
from config import apikey
import ctypes  
import datetime

In [None]:
def append_dict_to_df(current_dict, current_index, current_df):
    single_df = pd.DataFrame(current_dict, index=[current_index])
    if current_df.empty:
        current_df = single_df.copy()
    else:
        current_df = current_df.append(single_df)        
    current_index += 1
    return current_df,current_index

def get_offender_data(crime_type,variable,agency, year):
    endpoint = f"api/nibrs/{crime_type}/offender/agencies/{agency}/{variable}"
    done = False
    max_attempts = 5
    current_attemp = 1
    wait_time = 5
    df = pd.DataFrame()
    # Build query URL
    query_url = f"{base_url}{endpoint}?api_key={apikey}"
    # Call API and get response
    while (not done) & (current_attemp <= max_attempts):
        try:
            response = requests.get(query_url)
            if response.status_code != 404:
                data_json = response.json()
                df_idx = 0
                if len(data_json['data'])>0:
                    for data_nbr in range(len(data_json['data'])):
                        my_dict = data_json['data'][data_nbr]
                        df,df_idx = append_dict_to_df(my_dict, df_idx, df)
                    if len(df)>0:
                        df = df.loc[df['data_year'] == year]   
            done = True
            break
        except ConnectionAbortedError as err: 
            print(err)
            print(f"Attempt # {current_attemp} failed. Waiting {wait_time} seconds before next attempt.")
            time.sleep(wait_time)
            current_attemp =+ 1    
    return df

In [None]:
# Config information.
base_url = "https://api.usa.gov/crime/fbi/sapi/"

In [None]:
start_time = datetime.datetime.now()
print(f'Process Start Time: {start_time}')

# AGENCIES
endpoint = "api/agencies"

# Build query URL
query_url = f"{base_url}{endpoint}?api_key={apikey}"

# Call API and get response
response = requests.get(query_url)
data_json = response.json()
agencies_df = pd.DataFrame()
agencies_idx = 0

crime_type_list = ["property-crime", "violent-crime"]
variable = "age"
year_list = [2017, 2018, 2019]
agency_crimes_stat = pd.DataFrame()
crimes_idx = 0
total_agencies = len(agencies_df)

for state in data_json:
    for agency in data_json[state]:
        agency_dict = data_json[state][agency]
        agencies_df,agencies_idx = append_dict_to_df(agency_dict, agencies_idx, agencies_df)        
        current_agency = agency_dict['ori']
        print(f"Agency {current_agency}")        
        for crime_type in crime_type_list:
            for year in year_list:
                sub_df = get_offender_data(crime_type,variable,current_agency,year)
                if len(sub_df)>0:
                    print(f"          CrimeType: {crime_type}, Year: {year}, Processing Data")
                    total_crimes = 0
                    for i in range(len(sub_df)):
                        total_crimes += sub_df.iloc[i,0]
                    print(f"                    Total crimes {total_crimes}, Year: {year}")
                    my_dict = {
                        'year': year,
                        "crime_type": crime_type,
                        'agency': current_agency,
                        'agency_name': agency_dict['agency_name'],
                        'county': agency_dict['county_name'],
                        'state': agency_dict['state_abbr'],
                        'state_name': agency_dict['state_name'],
                        'latitude': agency_dict['latitude'],
                        'longitude': agency_dict['longitude'],
                        'age 0-9': sub_df.iloc[0,0],
                        'age 10-19': sub_df.iloc[1,0],
                        'age 20-29': sub_df.iloc[2,0],
                        'age 30-39': sub_df.iloc[3,0],
                        'age 40-49': sub_df.iloc[4,0],
                        'age 50-59': sub_df.iloc[5,0],
                        'age 60-69': sub_df.iloc[6,0],
                        'age 70-79': sub_df.iloc[7,0],
                        'age 80-89': sub_df.iloc[8,0],
                        'age 90-Older': sub_df.iloc[9,0],
                        'age Unknown': sub_df.iloc[10,0],
                        'total_crimes': total_crimes
                    }
                    agency_crimes_stat,crimes_idx = append_dict_to_df(my_dict, crimes_idx, agency_crimes_stat)
                else:
                    print(f"          CrimeType: {crime_type}, Year: {year}, No Data")        

In [None]:
# Output File (CSV)
output_data_file = "output_data/agencies.csv"

# Export file as a CSV
agencies_df.to_csv(output_data_file, index=False, header=True)   
print(agencies_df)

# Output File (CSV)
output_data_file = "output_data/agency_crimes_stat.csv"
# Export file as a CSV
agency_crimes_stat.to_csv(output_data_file, index=False, header=True)
end_time = datetime.datetime.now()
print(f'Process End Time: {end_time}')
elapsed_time = end_time - start_time
print(f'Elapsed Time: {elapsed_time}')
# Show DataFrame
agency_crimes_stat