In [169]:
# Import required libraries and dependencies
import pandas as pd
import numpy as np
from prophet import Prophet
import datetime as dt
import scipy.stats as sts
import hvplot.pandas
from methods.dataframe_methods import clean_columns 


US crime rates 1960-2014

In [170]:
# Read the CSV file into a `pandas` DataFrame - US Crime Rates 1960-2014 Data Set
us_crime_1960_2014_df = pd.read_csv("resources/US_Crime_Rates_1960_2014.csv", low_memory=False)

# Update column names and set max columns
us_crime_1960_2014_df.columns = us_crime_1960_2014_df.columns.str.lower()
us_crime_1960_2014_df.columns = us_crime_1960_2014_df.columns.str.replace(' ', '_')
pd.set_option("display.max_columns", None)

# Display df head and info
display(us_crime_1960_2014_df.head())
display(us_crime_1960_2014_df.info())

Unnamed: 0,year,population,total,violent,property,murder,forcible_rape,robbery,aggravated_assault,burglary,larceny_theft,vehicle_theft
0,1960,179323175,3384200,288460,3095700,9110,17190,107840,154320,912100,1855400,328200
1,1961,182992000,3488000,289390,3198600,8740,17220,106670,156760,949600,1913000,336000
2,1962,185771000,3752200,301510,3450700,8530,17550,110860,164570,994300,2089600,366800
3,1963,188483000,4109500,316970,3792500,8640,17650,116470,174210,1086400,2297800,408300
4,1964,191141000,4564600,364220,4200400,9360,21420,130390,203050,1213200,2514400,472800


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
 0   year                55 non-null     int64
 1   population          55 non-null     int64
 2   total               55 non-null     int64
 3   violent             55 non-null     int64
 4   property            55 non-null     int64
 5   murder              55 non-null     int64
 6   forcible_rape       55 non-null     int64
 7   robbery             55 non-null     int64
 8   aggravated_assault  55 non-null     int64
 9   burglary            55 non-null     int64
 10  larceny_theft       55 non-null     int64
 11  vehicle_theft       55 non-null     int64
dtypes: int64(12)
memory usage: 5.3 KB


None

In [171]:
# Create copy
us_crime_1960_2014_df_copy = us_crime_1960_2014_df.copy()

In [267]:
# clean columns
cleaned_crime_1960_2014_copy = clean_columns(us_crime_1960_2014_df_copy)
cleaned_crime_1960_2014_copy.head()

# Rename columns
cleaned_crime_1960_2014_copy = cleaned_crime_1960_2014_copy.rename(columns={"violent": "total_violent", "property": "total_nonviolent"})
cleaned_crime_1960_2014_copy.head()

Unnamed: 0,year,population,total,total_violent,total_nonviolent,murder,forcible_rape,robbery,aggravated_assault,burglary,larceny_theft,vehicle_theft
0,1960,179323175,3384200,288460,3095700,9110,17190,107840,154320,912100,1855400,328200
1,1961,182992000,3488000,289390,3198600,8740,17220,106670,156760,949600,1913000,336000
2,1962,185771000,3752200,301510,3450700,8530,17550,110860,164570,994300,2089600,366800
3,1963,188483000,4109500,316970,3792500,8640,17650,116470,174210,1086400,2297800,408300
4,1964,191141000,4564600,364220,4200400,9360,21420,130390,203050,1213200,2514400,472800


In [283]:
# Reset index to year
crime_rates_df = cleaned_crime_1960_2014_copy.set_index('year')
crime_rates_df.tail()

Unnamed: 0_level_0,population,total,total_violent,total_nonviolent,murder,forcible_rape,robbery,aggravated_assault,burglary,larceny_theft,vehicle_theft
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010,309330219,10363873,1251248,9112625,14772,85593,369089,781844,2168457,6204601,739565
2011,311587816,10258774,1206031,9052743,14661,84175,354772,752423,2185140,6151095,716508
2012,313873685,10219059,1217067,9001992,14866,85141,355051,762009,2109932,6168874,723186
2013,316497531,9850445,1199684,8650761,14319,82109,345095,726575,1931835,6018632,700294
2014,318857056,9475816,1197987,8277829,14249,84041,325802,741291,1729806,5858496,689527


In [269]:
# Convert all numeric columns(-year) to df[‘column_name’] = df[‘column_name’] / 1000000
crime_rates_df = crime_rates_df / 1000000
crime_rates_df.head()

Unnamed: 0_level_0,population,total,total_violent,total_nonviolent,murder,forcible_rape,robbery,aggravated_assault,burglary,larceny_theft,vehicle_theft
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1960,179.323175,3.3842,0.28846,3.0957,0.00911,0.01719,0.10784,0.15432,0.9121,1.8554,0.3282
1961,182.992,3.488,0.28939,3.1986,0.00874,0.01722,0.10667,0.15676,0.9496,1.913,0.336
1962,185.771,3.7522,0.30151,3.4507,0.00853,0.01755,0.11086,0.16457,0.9943,2.0896,0.3668
1963,188.483,4.1095,0.31697,3.7925,0.00864,0.01765,0.11647,0.17421,1.0864,2.2978,0.4083
1964,191.141,4.5646,0.36422,4.2004,0.00936,0.02142,0.13039,0.20305,1.2132,2.5144,0.4728


In [270]:
# remove population, violent, and property crime columns.
crime_rates = crime_rates_df.drop(columns=["population", "total_violent", "total_nonviolent"])
crime_rates.head()

Unnamed: 0_level_0,total,murder,forcible_rape,robbery,aggravated_assault,burglary,larceny_theft,vehicle_theft
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1960,3.3842,0.00911,0.01719,0.10784,0.15432,0.9121,1.8554,0.3282
1961,3.488,0.00874,0.01722,0.10667,0.15676,0.9496,1.913,0.336
1962,3.7522,0.00853,0.01755,0.11086,0.16457,0.9943,2.0896,0.3668
1963,4.1095,0.00864,0.01765,0.11647,0.17421,1.0864,2.2978,0.4083
1964,4.5646,0.00936,0.02142,0.13039,0.20305,1.2132,2.5144,0.4728


In [271]:
#plot all crimes
crime_rates.hvplot.line(
    title = "US Crime Rates 1960-2014",
    xlabel = "Year",
    ylabel = "Number of Crimes (in Millions)",
    width = 1000,
    height = 500,
    grid = True
)

In [272]:
# group crimes by non-violent 
nonviolent_crimes = crime_rates_df[["total_nonviolent", "burglary", "larceny_theft", "vehicle_theft"]]
nonviolent_crimes.head()

# Show nonviolent_crimes
nonviolent_crimes.head()


Unnamed: 0_level_0,total_nonviolent,burglary,larceny_theft,vehicle_theft
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1960,3.0957,0.9121,1.8554,0.3282
1961,3.1986,0.9496,1.913,0.336
1962,3.4507,0.9943,2.0896,0.3668
1963,3.7925,1.0864,2.2978,0.4083
1964,4.2004,1.2132,2.5144,0.4728


In [273]:
# plot non-violent crimes
nonviolent_crimes.hvplot.line(
    title="Non-Violent Crimes 1960-2014",
    xlabel="Year",
    ylabel="Number of Crimes (in Millions)",
    width=1000,
    height=400,
    grid=True
)

In [274]:
# Slice the data to show only total non-violent crimes
total_nonviolent_crimes = crime_rates_df[["total_nonviolent"]]
total_nonviolent_crimes.head()

Unnamed: 0_level_0,total_nonviolent
year,Unnamed: 1_level_1
1960,3.0957
1961,3.1986
1962,3.4507
1963,3.7925
1964,4.2004


In [275]:
# plot only total non-violent crimes
total_nonviolent_crimes.hvplot.line(
    title="Total Non-Violent Crimes 1960-2014",
    x='year',
    xlabel="Year",
    ylabel="Number of Crime (in Millions)",
    width=1000,
    height=400,
    grid=True
)

In [276]:
# group crimes by violent
violent_crimes = crime_rates_df[['total_violent', 'murder', 'forcible_rape', 'robbery', 'aggravated_assault',]]
violent_crimes.head()

Unnamed: 0_level_0,total_violent,murder,forcible_rape,robbery,aggravated_assault
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1960,0.28846,0.00911,0.01719,0.10784,0.15432
1961,0.28939,0.00874,0.01722,0.10667,0.15676
1962,0.30151,0.00853,0.01755,0.11086,0.16457
1963,0.31697,0.00864,0.01765,0.11647,0.17421
1964,0.36422,0.00936,0.02142,0.13039,0.20305


In [277]:
#Create visual for violent crimes 
violent_crimes.hvplot( 
    title="Violent Crimes 1960-2014",
    x='year',
    xlabel="Year",
    ylabel="Number of Crimes (in Millions)",
    width=1000,
    height=400,
    grid=True
    )

In [278]:
# Show only total violent crimes
total_violent_crimes = crime_rates_df[['total_violent']]
total_violent_crimes.head()

Unnamed: 0_level_0,total_violent
year,Unnamed: 1_level_1
1960,0.28846
1961,0.28939
1962,0.30151
1963,0.31697
1964,0.36422


In [279]:

total_violent_crimes.hvplot.line(
    title="Total Violent Crimes 1960-2014",
    x ='year',
    xlabel="Year",
    ylabel="Number of Crimes (in Millions)",
    width=1000,
    height=400,
    grid=True
)
    

In [280]:
# Plot total non-violent and violent crimes
total_crimes = crime_rates_df[["total_violent", "total_nonviolent"]]
total_crimes.head()


Unnamed: 0_level_0,total_violent,total_nonviolent
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1960,0.28846,3.0957
1961,0.28939,3.1986
1962,0.30151,3.4507
1963,0.31697,3.7925
1964,0.36422,4.2004


In [281]:
# Plot total crimes
total_crimes.hvplot.line(
    title="Total Violent vs Non-Violent Crimes 1960-2014",
    x ='year',
    xlabel="Year",
    ylabel="Number of Crimes (in Millions)",
    width=1000,
    height=400,
    grid=True   
)

In [187]:
#Important considerations:
#Rape is much less likely to be reported than other violent crimes. There is a small rise in 1980 which does**
#parallel with the term 'date rape' being introduced.

# While I could not find exact data on how likely minorities are to report crimes I would imagine that it was 
# considerably less likely for minorities to report crimes in the 1960's which may contribute in some way to the 
# substantial rise in crime over the years. Consider that minorities becoming more likely to report crime may not 
# an overnight change but rather a change that took a long time to occur and at a different pace in different 
# areas of the country. 
# Again I could not find data on this nor have I read any articles, I am just speculating that this may be one
# of many different factors contributing the the crime rate rise.**



