#Juvenile Arrests across the County in 2019


In [None]:
import pandas as pd
data_path = 'data'

Mounted at /drive


In [None]:
import pandas as pd
path = f'{data_path}/raw/crime_data/juvenile'

states = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

s_2018_only = ['AZ', 'KS', 'MA', 'NC', 'OK']

# these dont have data in the last 5 years
no_juvy_data = ['ID','KY','LA','NV','NH','ND']

state_footers = {}

all_states_df = None

for s in states:
  
  if s in no_juvy_data:
    continue

  print(s)
  
  y = 2019

  if s in s_2018_only:
    y = 2018
    print(f'{s} is only 2018 data')

  state_path = path + f'/{s}_juvy_{y}.csv'

  try:
    state_df = pd.read_csv(state_path, skiprows=5)

    state_df.columns = ['Reporting County', 'Total', '10 Through Upper Age',
          '0 Through Upper Age', 'Delinquency-Petition', 'Delinquency-Non-petition', 'Status-Petition',
          'Status-Non-petition', 'Dependency-Petition', 'Dependency-Non-petition', 'All Reported Cases']

    state_df['STATE']=s

    footer_start = state_df[state_df['Reporting County']=='Number of Reported Cases'].index[0]

    # save the footers to help build a readme
    footer_df = state_df.iloc[footer_start:]

    state_footers[s] = footer_df

    state_df = state_df.iloc[:footer_start]

    if all_states_df is None:
      all_states_df = state_df
    else:
      all_states_df = all_states_df.append(state_df)
  except Exception as e:
    print(f'problem parsing {s} - {e}')

all_states_df

AL
AK
AZ
AZ is only 2018 data
AR
CA
CO
CT
DE
FL
GA
HI
IL
IN
IA
KS
KS is only 2018 data
ME
MD
MA
MA is only 2018 data
MI
MN
MS
MO
MT
NE
NJ
NM
NY
NC
NC is only 2018 data
OH
OK
OK is only 2018 data
OR
PA
RI
SC
SD
TN
TX
UT
VT
VA
WA
WV
WI
WY


Unnamed: 0,Reporting County,Total,10 Through Upper Age,0 Through Upper Age,Delinquency-Petition,Delinquency-Non-petition,Status-Petition,Status-Non-petition,Dependency-Petition,Dependency-Non-petition,All Reported Cases,STATE
0,Autauga,55900,6200,13000,45,5,8,*,16,--,--,AL
1,Baldwin,223200,22400,47600,399,181,85,66,76,--,--,AL
2,Barbour,24700,2400,5100,52,5,18,62,*,--,--,AL
3,Bibb,22400,2100,4600,68,*,30,*,0,--,--,AL
4,Blount,57800,6200,13300,28,12,22,34,22,--,--,AL
...,...,...,...,...,...,...,...,...,...,...,...,...
18,Sweetwater,42300,5200,11000,69,--,34,--,56,--,--,WY
19,Teton,23500,1900,4200,12,--,*,--,6,--,--,WY
20,Uinta,20200,2700,5800,16,--,*,--,11,--,--,WY
21,Washakie,7800,900,1800,10,--,*,--,12,--,--,WY


# State name lookup

In [None]:
# get a lookup of the state names
df_states = pd.read_csv('/content/drive/Shareddrives/Capstone/notebooks/data/raw/crime_data/state_name_abbreviations.csv')

state_name_abbreviations = {}

for i in range(len(df_states)):
  state_name_abbreviations[df_states.iloc[i]['Abbreviation']]=df_states.iloc[i]['Name']

state_name_abbreviations

{'AK': 'Alaska',
 'AL': 'Alabama',
 'AR': 'Arkansas',
 'AZ': 'Arizona',
 'CA': 'California',
 'CO': 'Colorado',
 'CT': 'Connecticut',
 'DE': 'Delaware',
 'FL': 'Florida',
 'GA': 'Georgia',
 'HI': 'Hawaii',
 'IA': 'Iowa',
 'ID': 'Idaho',
 'IL': 'Illinois',
 'IN': 'Indiana',
 'KS': 'Kansas',
 'KY': 'Kentucky',
 'LA': 'Louisiana',
 'MA': 'Massachusetts',
 'MD': 'Maryland',
 'ME': 'Maine',
 'MI': 'Michigan',
 'MN': 'Minnesota',
 'MO': 'Missouri',
 'MS': 'Mississippi',
 'MT': 'Montana',
 'NC': 'North Carolina',
 'ND': 'North Dakota',
 'NE': 'Nebraska',
 'NH': 'New Hampshire',
 'NJ': 'New Jersey',
 'NM': 'New Mexico',
 'NV': 'Nevada',
 'NY': 'New York',
 'OH': 'Ohio',
 'OK': 'Oklahoma',
 'OR': 'Oregon',
 'PA': 'Pennsylvania',
 'RI': 'Rhode Island',
 'SC': 'South Carolina',
 'SD': 'South Dakota',
 'TN': 'Tennessee',
 'TX': 'Texas',
 'UT': 'Utah',
 'VA': 'Virginia',
 'VT': 'Vermont',
 'WA': 'Washington',
 'WI': 'Wisconsin',
 'WV': 'West Virginia',
 'WY': 'Wyoming'}

# Let's use the footers to help build a readme about the data 

In [None]:
#let's build a readme from this 
for s in states:

  y = 2019

  if s in s_2018_only:
    y = 2018

  print(f"## {state_name_abbreviations[s]} [{y}]")

  if s in no_juvy_data:
    print('No data present for this state in the previous five years')
    continue

  # get the footer
  state_footer_df = state_footers[s]
  
  # reset the indexes
  state_footer_df = state_footer_df.reset_index()
  
  # get the position where 'Source:' starts
  i = state_footer_df[state_footer_df['Reporting County'].str.contains('Source:')].index[0]
  
  # get everything from there down
  source_df = state_footer_df.iloc[i:]['Reporting County']

  #now lets print that information
  for r in range(len(source_df)):
    row =  source_df.iloc[r]

    print(row+"<br>")
  
  print()

## Alabama [2019]
Source: State of Alabama, Administrative Office of Courts<br>
Mode: Automated data file<br>
1. Delinquency figures are cases disposed.<br>
2. Status figures are cases disposed.<br>
3. Dependency figures are cases disposed.<br>
Suggested citation:Hockenberry, S., Smith, J., and Kang, W. (2021).<br>
Easy Access to State and County Juvenile Court Case Counts, 2019. Online. Available: https://www.ojjdp.gov/ojstatbb/ezaco/.<br>

## Alaska [2019]
Source: Alaska Division of Juvenile Justice<br>
Mode: Automated data file<br>
1. Delinquency figures are cases disposed.<br>
Suggested citation:Hockenberry, S., Smith, J., and Kang, W. (2021).<br>
Easy Access to State and County Juvenile Court Case Counts, 2019. Online. Available: https://www.ojjdp.gov/ojstatbb/ezaco/.<br>

## Arizona [2018]
Source: Supreme Court, State of Arizona, Administrative Office of the Courts<br>
Mode: Automated data file<br>
1. Delinquency figures are cases disposed.<br>
2. Status figures are cases dispose

This helped build the JuvenileCrime.md file about the states. Interstingly, this little side project helped track some invalid data that was automatically pulled.

# Back to processing data
Need to get the FIPS code

In [None]:
state_county_fips_path = f'{data_path}/processed/state_county_fips.csv'

state_county_fips_df = pd.read_csv(state_county_fips_path)

state_county_fips_df

Unnamed: 0.1,Unnamed: 0,NAME,state,county,County_Name,State_Name,State_Abbreviation,State_Abbreviation_County,FIPS
0,1,"Sebastian County, Arkansas",5,131,SEBASTIAN,Arkansas,AR,AR-SEBASTIAN,5131
1,2,"Sevier County, Arkansas",5,133,SEVIER,Arkansas,AR,AR-SEVIER,5133
2,3,"Sharp County, Arkansas",5,135,SHARP,Arkansas,AR,AR-SHARP,5135
3,4,"Stone County, Arkansas",5,137,STONE,Arkansas,AR,AR-STONE,5137
4,5,"Union County, Arkansas",5,139,UNION,Arkansas,AR,AR-UNION,5139
...,...,...,...,...,...,...,...,...,...
3216,3217,"Eau Claire County, Wisconsin",55,35,EAU CLAIRE,Wisconsin,WI,WI-EAU CLAIRE,55035
3217,3218,"Florence County, Wisconsin",55,37,FLORENCE,Wisconsin,WI,WI-FLORENCE,55037
3218,3219,"Fond du Lac County, Wisconsin",55,39,FOND DU LAC,Wisconsin,WI,WI-FOND DU LAC,55039
3219,3220,"Forest County, Wisconsin",55,41,FOREST,Wisconsin,WI,WI-FOREST,55041


# County cleanup
It turns out there are some disrepencies between how counties are named in the juvy data vs. what we know in the FIPS codes. And, in CT, we need to map the court locations to the counties.

In [None]:
# for i in range(len(all_states_df)):
#   county = all_states_df.iloc[i]['Reporting County'].upper()
#   state= all_states_df.iloc[i]['STATE']

#   state_county = state+'-'+county

#   #FIPS = state_county_fips_df[state_county_fips_df['State_Abbreviation_County']==state_county]['FIPS']

#   #if len(FIPS)==0:
#   #print(f'{state_county} not found')

#  # print(state_county)


In [None]:
# bring in the mapped names
juvy_fips_lookup_path = f'{data_path}/raw/crime_data/juvenile/Juvenile_Fips_Lookup.csv'

juvy_fips_lookup_df = pd.read_csv(juvy_fips_lookup_path)
juvy_fips_lookup = {}

for i in range(len(juvy_fips_lookup_df)):
  r = juvy_fips_lookup_df.iloc[i]

  juvy_fips_lookup[r['JuvenileName']]= r['ActualName']

juvy_fips_lookup

{'AK-ALEUTIANS EAST': 'AK-ALEUTIANS EAST BOROUGH',
 'AK-ALEUTIANS WEST': 'AK-ALEUTIANS WEST CENSUS AREA',
 'AK-ANCHORAGE': 'AK-ANCHORAGE MUNICIPALITY',
 'AK-BETHEL': 'AK-BETHEL CENSUS AREA',
 'AK-BRISTOL BAY': 'AK-BRISTOL BAY BOROUGH',
 'AK-DENALI': 'AK-DENALI BOROUGH',
 'AK-DILLINGHAM': 'AK-DILLINGHAM CENSUS AREA',
 'AK-FAIRBANKS NORTH STAR': 'AK-FAIRBANKS NORTH STAR BOROUGH',
 'AK-HAINES': 'AK-HAINES BOROUGH',
 'AK-HOONAH-ANGOON': 'AK-HOONAH-ANGOON CENSUS AREA',
 'AK-JUNEAU': 'AK-JUNEAU CITY AND BOROUGH',
 'AK-KENAI PENINSULA': 'AK-KENAI PENINSULA BOROUGH',
 'AK-KETCHIKAN GATEWAY': 'AK-KETCHIKAN GATEWAY BOROUGH',
 'AK-KODIAK ISLAND': 'AK-KODIAK ISLAND BOROUGH',
 'AK-LAKE AND PENINSULA': 'AK-LAKE AND PENINSULA BOROUGH',
 'AK-MATANUSKA-SUSITNA': 'AK-MATANUSKA-SUSITNA BOROUGH',
 'AK-NOME': 'AK-NOME CENSUS AREA',
 'AK-NORTH SLOPE': 'AK-NORTH SLOPE BOROUGH',
 'AK-NORTHWEST ARCTIC': 'AK-NORTHWEST ARCTIC BOROUGH',
 'AK-PETERSBURG': 'AK-PETERSBURG CENSUS AREA',
 'AK-PRINCE OF WALES-HYDER': '

In [None]:

def set_fips_value(row):
  county = row['Reporting County'].upper()
  state=row['STATE']

  state_county = state+'-'+county

  if state_county in juvy_fips_lookup:
    #set the state_county value to the mapped value
    state_county = juvy_fips_lookup[state_county]

  FIPS = state_county_fips_df[state_county_fips_df['State_Abbreviation_County']==state_county]['FIPS']

  try:
    fips_code = FIPS.values[0]

    row['FIPS'] = fips_code
  except Exception as e:
    print(f'{state_county}-{e}')

  return row

state_df['FIPS'] = ''

state_fips_df = all_states_df.apply(set_fips_value,axis=1)

# IN 46102



Unnamed: 0,Reporting County,Total,10 Through Upper Age,0 Through Upper Age,Delinquency-Petition,Delinquency-Non-petition,Status-Petition,Status-Non-petition,Dependency-Petition,Dependency-Non-petition,All Reported Cases,STATE,FIPS
0,Autauga,55900,6200,13000,45,5,8,*,16,--,--,AL,1001
1,Baldwin,223200,22400,47600,399,181,85,66,76,--,--,AL,1003
2,Barbour,24700,2400,5100,52,5,18,62,*,--,--,AL,1005
3,Bibb,22400,2100,4600,68,*,30,*,0,--,--,AL,1007
4,Blount,57800,6200,13300,28,12,22,34,22,--,--,AL,1009
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18,Sweetwater,42300,5200,11000,69,--,34,--,56,--,--,WY,56037
19,Teton,23500,1900,4200,12,--,*,--,6,--,--,WY,56039
20,Uinta,20200,2700,5800,16,--,*,--,11,--,--,WY,56041
21,Washakie,7800,900,1800,10,--,*,--,12,--,--,WY,56043


In [None]:
def get_value(s):

  # '*' Denotes case counts greater than zero and less than five.	
	
  # '--'	Data are either not available or are not reliable for publication.
  
  # get rid of the comma in the string so the conversion will work
  s = s.replace(',','')

  if s == '*':
    return 2.5
  
  if s == '--':
    return 0

  try:
    return float(s)
  except Exception as e:
    print(e)

def add_up_arrests(row):

  # Dependency case: Those cases covering neglect or inadequate care on the part of parents or guardians, such as abandonment or desertion; 
  # abuse or cruel treatment; improper or inadequate conditions in the home; and insufficient care or support resulting from death, absence, 
  # or physical or mental incapacity of parents.
  
  # so lets not count that against overpolicing  

  dp = get_value(row['Delinquency-Petition'])	
  dnp = get_value(row['Delinquency-Non-petition'])
  sp = get_value(row['Status-Petition'])
  snp = get_value(row['Status-Non-petition'])

  row['Total_Juvenile_Arrests'] = (dp+dnp+sp+snp)

  return row

state_fips_df['Total_Juvenile_Arrests'] = 0

state_fips_df = state_fips_df.apply(add_up_arrests,axis=1)

 

Unnamed: 0,Reporting County,Total,10 Through Upper Age,0 Through Upper Age,Delinquency-Petition,Delinquency-Non-petition,Status-Petition,Status-Non-petition,Dependency-Petition,Dependency-Non-petition,All Reported Cases,STATE,FIPS,Total_Juvenile_Arrests
0,Autauga,55900,6200,13000,45,5,8,*,16,--,--,AL,1001,60.5
1,Baldwin,223200,22400,47600,399,181,85,66,76,--,--,AL,1003,731.0
2,Barbour,24700,2400,5100,52,5,18,62,*,--,--,AL,1005,137.0
3,Bibb,22400,2100,4600,68,*,30,*,0,--,--,AL,1007,103.0
4,Blount,57800,6200,13300,28,12,22,34,22,--,--,AL,1009,96.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18,Sweetwater,42300,5200,11000,69,--,34,--,56,--,--,WY,56037,103.0
19,Teton,23500,1900,4200,12,--,*,--,6,--,--,WY,56039,14.5
20,Uinta,20200,2700,5800,16,--,*,--,11,--,--,WY,56041,18.5
21,Washakie,7800,900,1800,10,--,*,--,12,--,--,WY,56043,12.5


# Clean up CT as a last step

In [None]:
# since this is just combined with the larger data, it doesn't have to be too rich. Just enough to lookup for the join on the FIPS code
state_fips_grouped_df = state_fips_df.groupby('FIPS')['Total_Juvenile_Arrests'].sum()
state_fips_grouped_df 

FIPS
1001      60.5
1003     731.0
1005     137.0
1007     103.0
1009      96.0
         ...  
56037    103.0
56039     14.5
56041     18.5
56043     12.5
56045      7.5
Name: Total_Juvenile_Arrests, Length: 2832, dtype: float64

In [None]:
# lets just make sure that is accurate

double_check_df = state_fips_grouped_df.to_frame().reset_index()
double_check_df

for i in range(len(double_check_df)):
  fips_code = double_check_df.iloc[i]['FIPS']
  arrests = double_check_df.iloc[i]['Total_Juvenile_Arrests']

  check_df = state_fips_df[(state_fips_df['FIPS']==fips_code) & (state_fips_df['Total_Juvenile_Arrests']==arrests)]

  if len(check_df) != 1:
    print(f'Need to check {fips_code}')

Need to check 9001.0
Need to check 9003.0
Need to check 9009.0


In [None]:
state_fips_df[(state_fips_df['FIPS']==9001) | (state_fips_df['FIPS']==9003) | (state_fips_df['FIPS']==9009)]

Unnamed: 0,Reporting County,Total,10 Through Upper Age,0 Through Upper Age,Delinquency-Petition,Delinquency-Non-petition,Status-Petition,Status-Non-petition,Dependency-Petition,Dependency-Non-petition,All Reported Cases,STATE,FIPS,Total_Juvenile_Arrests
0,Bridgeport,--,--,--,713,602,10,90,--,--,--,CT,9001,1415.0
1,Hartford,--,--,--,530,406,*,27,--,--,--,CT,9003,965.5
3,New Britain,--,--,--,463,356,14,45,--,--,--,CT,9003,878.0
4,New Haven,--,--,--,667,409,*,18,--,--,--,CT,9009,1096.5
6,Stamford,--,--,--,209,170,0,18,--,--,--,CT,9001,397.0
8,Waterbury,--,--,--,762,423,0,37,--,--,--,CT,9009,1222.0


Looks like the cities we would expect from CT

In [None]:
state_fips_grouped_df.to_csv(f'{data_path}/processed/juvenile_arrests_fips.csv')