#Juvenile Arrests across the Country in 2019

---




In [None]:
import os
if 'COLAB_GPU' in os.environ:
    from google.colab import  drive
    drive.mount('/drive')
    data_path = '/drive/Shared drives/Capstone/notebooks/data'
else:
    data_path = 'data'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

RAW_JV_DATA_PATH = f'{data_path}/raw/crime_data/juvenile'
STATE_NAME_ABBREVIATIONS = f'{data_path}/raw/crime_data/state_name_abbreviations.csv'
STATE_COUNTY_FIPS = f'{data_path}/processed/state_county_fips.csv'
JV_FIPS_LOOKUP = f'{data_path}/raw/crime_data/juvenile/Juvenile_Fips_Lookup.csv'
JV_ARRESTS_FIPS_OUTPUT = f'{data_path}/processed/juvenile_arrests_fips.csv'

In [None]:
path = RAW_JV_DATA_PATH

states = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

s_2018_only = ['AZ', 'KS', 'MA', 'NC', 'OK']

# these dont have data in the last 5 years
no_juvy_data = ['ID','KY','LA','NV','NH','ND']

state_footers = {}

all_states_df = None

for s in states:
  
  if s in no_juvy_data:
    continue

  print(s)
  
  y = 2019

  if s in s_2018_only:
    y = 2018
    print(f'{s} is only 2018 data')

  state_path = path + f'/{s}_juvy_{y}.csv'

  try:
    state_df = pd.read_csv(state_path, skiprows=5)

    state_df.columns = ['Reporting County', 'Total', '10 Through Upper Age',
          '0 Through Upper Age', 'Delinquency-Petition', 'Delinquency-Non-petition', 'Status-Petition',
          'Status-Non-petition', 'Dependency-Petition', 'Dependency-Non-petition', 'All Reported Cases']

    state_df['STATE']=s

    footer_start = state_df[state_df['Reporting County']=='Number of Reported Cases'].index[0]

    # save the footers to help build a readme
    footer_df = state_df.iloc[footer_start:]

    state_footers[s] = footer_df

    state_df = state_df.iloc[:footer_start]

    if all_states_df is None:
      all_states_df = state_df
    else:
      all_states_df = all_states_df.append(state_df)
  except Exception as e:
    print(f'problem parsing {s} - {e}')

all_states_df

# State name lookup

In [None]:
# get a lookup of the state names
df_states = pd.read_csv(STATE_NAME_ABBREVIATIONS)

state_name_abbreviations = {}

for i in range(len(df_states)):
  state_name_abbreviations[df_states.iloc[i]['Abbreviation']]=df_states.iloc[i]['Name']

state_name_abbreviations

# Let's use the footers to help build a readme about the data 

In [None]:
#let's build a readme from this 
for s in states:

  y = 2019

  if s in s_2018_only:
    y = 2018

  print(f"## {state_name_abbreviations[s]} [{y}]")

  if s in no_juvy_data:
    print('No data present for this state in the previous five years')
    continue

  # get the footer
  state_footer_df = state_footers[s]
  
  # reset the indexes
  state_footer_df = state_footer_df.reset_index()
  
  # get the position where 'Source:' starts
  i = state_footer_df[state_footer_df['Reporting County'].str.contains('Source:')].index[0]
  
  # get everything from there down
  source_df = state_footer_df.iloc[i:]['Reporting County']

  #now lets print that information
  for r in range(len(source_df)):
    row =  source_df.iloc[r]

    print(row+"<br>")
  
  print()

This helped build the JuvenileCrime.md file about the states. Interstingly, this little side project helped track some invalid data that was automatically pulled.

# Back to processing data
Need to get the FIPS code

In [None]:
state_county_fips_path = STATE_COUNTY_FIPS

state_county_fips_df = pd.read_csv(state_county_fips_path)

state_county_fips_df

# County cleanup
It turns out there are some disrepencies between how counties are named in the juvy data vs. what we know in the FIPS codes. And, in CT, we need to map the court locations to the counties.

In [None]:
# bring in the mapped names
juvy_fips_lookup_path = JV_FIPS_LOOKUP

juvy_fips_lookup_df = pd.read_csv(juvy_fips_lookup_path)
juvy_fips_lookup = {}

for i in range(len(juvy_fips_lookup_df)):
  r = juvy_fips_lookup_df.iloc[i]

  juvy_fips_lookup[r['JuvenileName']]= r['ActualName']

juvy_fips_lookup

In [None]:

def set_fips_value(row):
  county = row['Reporting County'].upper()
  state=row['STATE']

  state_county = state+'-'+county

  if state_county in juvy_fips_lookup:
    #set the state_county value to the mapped value
    state_county = juvy_fips_lookup[state_county]

  FIPS = state_county_fips_df[state_county_fips_df['State_Abbreviation_County']==state_county]['FIPS']

  try:
    fips_code = FIPS.values[0]

    row['FIPS'] = fips_code
  except Exception as e:
    print(f'{state_county}-{e}')

  return row

state_df['FIPS'] = ''

state_fips_df = all_states_df.apply(set_fips_value,axis=1)

In [None]:
def get_value(s):

  # '*' Denotes case counts greater than zero and less than five.	
	
  # '--'	Data are either not available or are not reliable for publication.
  
  # get rid of the comma in the string so the conversion will work
  s = s.replace(',','')

  if s == '*':
    return 2.5
  
  if s == '--':
    return 0

  try:
    return float(s)
  except Exception as e:
    print(e)

def add_up_arrests(row):

  # Dependency case: Those cases covering neglect or inadequate care on the part of parents or guardians, such as abandonment or desertion; 
  # abuse or cruel treatment; improper or inadequate conditions in the home; and insufficient care or support resulting from death, absence, 
  # or physical or mental incapacity of parents.
  
  # so lets not count that against overpolicing  

  dp = get_value(row['Delinquency-Petition'])	
  dnp = get_value(row['Delinquency-Non-petition'])
  sp = get_value(row['Status-Petition'])
  snp = get_value(row['Status-Non-petition'])

  row['Total_Juvenile_Arrests'] = (dp+dnp+sp+snp)

  return row

state_fips_df['Total_Juvenile_Arrests'] = 0

state_fips_df = state_fips_df.apply(add_up_arrests,axis=1)

 

# Clean up CT as a last step

In [None]:
# since this is just combined with the larger data, it doesn't have to be too rich. Just enough to lookup for the join on the FIPS code
state_fips_grouped_df = state_fips_df.groupby('FIPS')['Total_Juvenile_Arrests'].sum()
state_fips_grouped_df 

In [None]:
# lets just make sure that is accurate

double_check_df = state_fips_grouped_df.to_frame().reset_index()
double_check_df

for i in range(len(double_check_df)):
  fips_code = double_check_df.iloc[i]['FIPS']
  arrests = double_check_df.iloc[i]['Total_Juvenile_Arrests']

  check_df = state_fips_df[(state_fips_df['FIPS']==fips_code) & (state_fips_df['Total_Juvenile_Arrests']==arrests)]

  if len(check_df) != 1:
    print(f'Need to check {fips_code}')

Need to check 9001.0
Need to check 9003.0
Need to check 9009.0


In [None]:
state_fips_df[(state_fips_df['FIPS']==9001) | (state_fips_df['FIPS']==9003) | (state_fips_df['FIPS']==9009)]

Looks like the cities we would expect from CT

In [None]:
state_fips_grouped_df.to_csv(JV_ARRESTS_FIPS_OUTPUT)