In [3]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress
import random
import os

# Study data files
file_to_load = os.path.join("fullFBIdataset.csv")

# Read crime CSV
crime_data = pd.read_csv(file_to_load)
crimeDF = pd.DataFrame(crime_data)

crimeDF = crimeDF.dropna()
crimeDF

Unnamed: 0,key,id,name,type,variable,value,year,crime_type,place_name
0,1600000US0100124--2014--Violent crime--count,1600000US0100124,"Abbeville, Alabama",place,count,8.000000,2014,Violent crime,"Abbeville, AL"
1,1600000US0100124--2014--Violent crime--rate,1600000US0100124,"Abbeville, Alabama",place,rate,302.686341,2014,Violent crime,"Abbeville, AL"
2,1600000US0100124--2014--Murder and nonnegligen...,1600000US0100124,"Abbeville, Alabama",place,count,0.000000,2014,Murder and nonnegligent manslaughter,"Abbeville, AL"
3,1600000US0100124--2014--Murder and nonnegligen...,1600000US0100124,"Abbeville, Alabama",place,rate,0.000000,2014,Murder and nonnegligent manslaughter,"Abbeville, AL"
4,1600000US0100124--2014--Rape (revised definiti...,1600000US0100124,"Abbeville, Alabama",place,count,1.000000,2014,Rape (revised definition),"Abbeville, AL"
...,...,...,...,...,...,...,...,...,...
2512718,0500000US53031--2018--All Crimes--count,0500000US53031,"Jefferson County, Washington",county,count,537.000000,2018,All Crimes,Jefferson
2512903,0500000US53007--2018--All Crimes--rate,0500000US53007,"Chelan County, Washington",county,rate,1273.809681,2018,All Crimes,Chelan
2512904,0500000US53007--2018--All Crimes--count,0500000US53007,"Chelan County, Washington",county,count,965.000000,2018,All Crimes,Chelan
2512999,0500000US53073--2018--All Crimes--rate,0500000US53073,"Whatcom County, Washington",county,rate,1315.425345,2018,All Crimes,Whatcom


In [6]:
NYcrimeDF = crimeDF.loc[crimeDF["name"] == "New York, New York"]
filePath = os.path.join("Output CSVs", "NYcrime.csv")
NYcrimeDF.to_csv(filePath, index=False)
NYcrimeDF

Unnamed: 0,key,id,name,type,variable,value,year,crime_type,place_name
19210,1600000US3651000--2008--Forcible rape--rate,1600000US3651000,"New York, New York",place,rate,10.664973,2008,Forcible rape,"New York, NY"
26840,1600000US3651000--2007--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,count,496.000000,2007,Murder and nonnegligent manslaughter,"New York, NY"
48524,1600000US3651000--2007--Motor vehicle theft--rate,1600000US3651000,"New York, New York",place,rate,161.261362,2007,Motor vehicle theft,"New York, NY"
54679,1600000US3651000--2009--Larceny--rate,1600000US3651000,"New York, New York",place,rate,1339.450609,2009,Larceny,"New York, NY"
55371,1600000US3651000--2010--Motor vehicle theft--c...,1600000US3651000,"New York, New York",place,count,10319.000000,2010,Motor vehicle theft,"New York, NY"
...,...,...,...,...,...,...,...,...,...
2147027,1600000US3651000--2015--Rape (revised definiti...,1600000US3651000,"New York, New York",place,count,2244.000000,2015,Rape (revised definition),"New York, NY"
2147028,1600000US3651000--2015--Rape (revised definiti...,1600000US3651000,"New York, New York",place,count,2244.000000,2015,Rape (revised definition),"New York, NY"
2155198,1600000US3651000--2007--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,6.033919,2007,Murder and nonnegligent manslaughter,"New York, NY"
2155825,1600000US3651000--2013--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,3.989935,2013,Murder and nonnegligent manslaughter,"New York, NY"


In [9]:
NYcrimeRates = NYcrimeDF.loc[crimeDF["variable"] == "rate"]
NYcrimeRates

Unnamed: 0,key,id,name,type,variable,value,year,crime_type,place_name
19210,1600000US3651000--2008--Forcible rape--rate,1600000US3651000,"New York, New York",place,rate,10.664973,2008,Forcible rape,"New York, NY"
48524,1600000US3651000--2007--Motor vehicle theft--rate,1600000US3651000,"New York, New York",place,rate,161.261362,2007,Motor vehicle theft,"New York, NY"
54679,1600000US3651000--2009--Larceny--rate,1600000US3651000,"New York, New York",place,rate,1339.450609,2009,Larceny,"New York, NY"
71724,1600000US3651000--2009--Violent crime--rate,1600000US3651000,"New York, New York",place,rate,551.809465,2009,Violent crime,"New York, NY"
72984,1600000US3651000--2011--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,6.271406,2011,Murder and nonnegligent manslaughter,"New York, NY"
...,...,...,...,...,...,...,...,...,...
2056107,1600000US3651000--2016--Robbery--rate,1600000US3651000,"New York, New York",place,rate,181.442169,2016,Robbery,"New York, NY"
2103138,1600000US3651000--2016--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,3.910392,2016,Murder and nonnegligent manslaughter,"New York, NY"
2155198,1600000US3651000--2007--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,6.033919,2007,Murder and nonnegligent manslaughter,"New York, NY"
2155825,1600000US3651000--2013--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,rate,3.989935,2013,Murder and nonnegligent manslaughter,"New York, NY"


In [11]:
# = NYcrimeDF.loc[crimeDF["name"] == "New York, New York"]
crimeTypes = NYcrimeRates["crime_type"].unique()
crimeTypes

array(['Forcible rape', 'Motor vehicle theft', 'Larceny', 'Violent crime',
       'Murder and nonnegligent manslaughter', 'Aggravated assault',
       'All Crimes', 'Burglary', 'Robbery', 'Property crime',
       'Rape (revised definition)', 'Rape (legacy definition)'],
      dtype=object)

In [24]:
NYcrimeCounts = NYcrimeDF.loc[NYcrimeDF["variable"] == "count"]
NYcrimeCounts

NYcounts2015 = NYcrimeCounts.loc[NYcrimeCounts["year"] == 2016]
NYcounts2015 = NYcounts2015.drop_duplicates()
NYcounts2015
# this showed me that "Violent crime" is already an aggregation of the violent categories, we can just
#   use this per year, per city going forward.

Unnamed: 0,key,id,name,type,variable,value,year,crime_type,place_name
1725980,1600000US3651000--2016--Robbery--count,1600000US3651000,"New York, New York",place,count,15544.0,2016,Robbery,"New York, NY"
1726203,1600000US3651000--2016--Property crime--count,1600000US3651000,"New York, New York",place,count,125278.0,2016,Property crime,"New York, NY"
1743001,1600000US3651000--2016--Aggravated assault--count,1600000US3651000,"New York, New York",place,count,30873.0,2016,Aggravated assault,"New York, NY"
1882104,1600000US3651000--2016--Rape (revised definiti...,1600000US3651000,"New York, New York",place,count,2372.0,2016,Rape (revised definition),"New York, NY"
1905138,1600000US3651000--2016--Motor vehicle theft--c...,1600000US3651000,"New York, New York",place,count,6369.0,2016,Motor vehicle theft,"New York, NY"
1926540,1600000US3651000--2016--Larceny--count,1600000US3651000,"New York, New York",place,count,106868.0,2016,Larceny,"New York, NY"
1934086,1600000US3651000--2016--Murder and nonnegligen...,1600000US3651000,"New York, New York",place,count,335.0,2016,Murder and nonnegligent manslaughter,"New York, NY"
1972786,1600000US3651000--2016--Burglary--count,1600000US3651000,"New York, New York",place,count,12041.0,2016,Burglary,"New York, NY"
1987773,1600000US3651000--2016--All Crimes--count,1600000US3651000,"New York, New York",place,count,348804.0,2016,All Crimes,"New York, NY"
2080694,1600000US3651000--2016--Violent crime--count,1600000US3651000,"New York, New York",place,count,49124.0,2016,Violent crime,"New York, NY"


In [27]:
years = [2013, 2014, 2015, 2016]

NYviolentCrimeRates = NYcrimeRates.loc[NYcrimeRates["crime_type"] == "Violent crime"]
NYviolentCrimeRates = NYviolentCrimeRates.drop_duplicates()
NYviolentCrimeRatesYrs = NYviolentCrimeRates.loc[NYviolentCrimeRates["year"].isin(years)]
NYviolentCrimeRatesYrs

Unnamed: 0,key,id,name,type,variable,value,year,crime_type,place_name
756076,1600000US3651000--2014--Violent crime--rate,1600000US3651000,"New York, New York",place,rate,596.700141,2014,Violent crime,"New York, NY"
1451016,1600000US3651000--2013--Violent crime--rate,1600000US3651000,"New York, New York",place,rate,623.906787,2013,Violent crime,"New York, NY"
1690857,1600000US3651000--2015--Violent crime--rate,1600000US3651000,"New York, New York",place,rate,585.765574,2015,Violent crime,"New York, NY"
1979904,1600000US3651000--2016--Violent crime--rate,1600000US3651000,"New York, New York",place,rate,573.41515,2016,Violent crime,"New York, NY"
