In [292]:
# Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [293]:
# Load raw datasets
major_felony_raw = pd.read_csv('seven-major-felony-offenses-2000-2020.csv')
nonmajor_felony_raw = pd.read_csv('non-seven-major-felony-offenses-2000-2020.csv')

In [294]:
# Cleaning Major Felony Dataset

# Column names did not import properly (they imported as a row in the data), so we renamed the Columns and dropped that row along with NaN values
major_felony = major_felony_raw.copy().drop([0, 1, 2])
new_colnames_major = [i for i in major_felony_raw.iloc[3]]
major_felony.columns = new_colnames_major
major_felony = major_felony.drop([3])

# Drop Statistical Notes and NaN values
major_felony = major_felony.drop([12, 13, 14, 15, 16, 17, 18, 19])

# Drop Columns 2000 - 2009 and 2020
major_felony = major_felony.drop(['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', 2020.0], axis=1)

# Reset Indeces
major_felony = major_felony.reset_index(drop=True)

# Rename Columns
major_felony = major_felony.rename(columns={"OFFENSE": "offense", '2009': 2009, '2010': 2010, '2011': 2011, '2012': 2012, '2013': 2013, '2014': 2014, '2015': 2015, '2016': 2016, '2017': 2017, '2018': 2018, 2019.0: 2019})

# Make All Data Entries the Same Type (Int)
major_felony[2019.0] = [int(i) for i in major_felony[2019.0]] 
major_felony.iloc[7, 1:11] = [int(i.replace(',', '')) for i in major_felony.iloc[7, 1:11]] 
for x in range(0, 7):
    major_felony.iloc[x, 1:11] = [int(i) for i in major_felony.iloc[x, 1:11]]

# Make Offense Types Lowercase
major_felony.loc[:, "offense"] = major_felony.loc[:, "offense"].str.lower()

# Drop Totals Row 
major_felony = major_felony.drop([7])

print(major_felony)

                           offense   2009   2010   2011   2012   2013   2014  \
0  murder & non-negl. manslaughter    471    536    515    419    335    333   
1                             rape   1205   1373   1420   1445   1378   1352   
2                          robbery  18601  19486  19717  20144  19128  16539   
3                   felony assault  16773  16956  18482  19381  20297  20207   
4                         burglary  19430  18600  18720  19168  17429  16765   
5                    grand larceny  39580  37835  38501  42497  45368  43862   
6   grand larceny of motor vehicle  10670  10329   9314   8093   7400   7664   

    2015   2016   2017   2018   2019  
0    352    335    292    295    319  
1   1438   1438   1449   1794   1755  
2  16931  15500  13956  12913  13371  
3  20270  20847  20052  20208  20698  
4  15125  12990  12083  11687  10783  
5  44005  44279  43150  43558  43250  
6   7332   6327   5676   5428   5430  


In [295]:
# Cleaning NON-Major Felony Dataset

# Column names did not import properly (they imported as a row in the data), so we renamed the Columns and dropped that row along with NaN values
nonmajor_felony = nonmajor_felony_raw.copy().drop([0, 1])
new_colnames_nonmajor = [i for i in nonmajor_felony_raw.iloc[2]]
nonmajor_felony.columns = new_colnames_nonmajor
nonmajor_felony = nonmajor_felony.drop([2])

# Drop Statistical Notes and NaN values
nonmajor_felony = nonmajor_felony.drop([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

# Drop Columns 2000 - 2009 and 2020
nonmajor_felony = nonmajor_felony.drop([2000.0, 2001.0, 2002.0, 2003.0, 2004.0, 2005.0, 2006.0, 2007.0, 2008.0, 2020.0], axis=1)

# Reset Indeces
nonmajor_felony = nonmajor_felony.reset_index(drop=True)

# Rename Columns
nonmajor_felony = nonmajor_felony.rename(columns={"OFFENSE": "offense"})
nonmajor_felony = nonmajor_felony.rename(columns={2009.0: 2009, 2010.0: 2010, 2011.0: 2011, 2012.0: 2012, 2013.0: 2013, 2014.0: 2014, 2015.0: 2015, 2016.0: 2016, 2017.0: 2017, 2018.0: 2018, 2019.0: 2019})

# Make All Data Entries the Same Type (Int)
nonmajor_felony[2009] = nonmajor_felony[2009].astype(int)
nonmajor_felony[2010] = nonmajor_felony[2010].astype(int)
nonmajor_felony[2011] = nonmajor_felony[2011].astype(int)
nonmajor_felony[2012] = nonmajor_felony[2012].astype(int)
nonmajor_felony[2013] = nonmajor_felony[2013].astype(int)
nonmajor_felony[2014] = nonmajor_felony[2014].astype(int)
nonmajor_felony[2015] = nonmajor_felony[2015].astype(int)
nonmajor_felony[2016] = nonmajor_felony[2016].astype(int)
nonmajor_felony[2017] = nonmajor_felony[2017].astype(int)
nonmajor_felony[2018] = nonmajor_felony[2018].astype(int)
nonmajor_felony[2019] = nonmajor_felony[2019].astype(int)

# Make Offense Types Lowercase
nonmajor_felony.loc[:, "offense"] = nonmajor_felony.loc[:, "offense"].str.lower()

# Drop Totals Row 
nonmajor_felony = nonmajor_felony.drop([8])

print(nonmajor_felony)

                                     offense   2009   2010   2011   2012  \
0       felony possession of stolen property    823    864    823    711   
1         forgery/theft_fraud/identity theft  10225  10055   9876  10233   
2                                      arson   1474   1467   1264   1253   
3                      felony sex crimes (3)    914   1053   1028   1380   
4                 felony dangerous drugs (1)  26025  22913  21305  19680   
5               felony dangerous weapons (2)   5952   5413   5037   4979   
6  fel. criminal mischief & related offenses   6397   6006   6374   6737   
7                         other felonies (4)  11950  11616  11533  11929   

    2013   2014   2015   2016   2017   2018   2019  
0    689   1090    904    769   1019    951    733  
1   9493   9520  10400  11079  10063   9413   8684  
2   1187   1205   1026    802    680    741    711  
3   1073   1135   1152   1336   1367   1615   1562  
4  19571  17113  15279  14712  13348  11236   9587

In [300]:
# Combine the two datasets

# Add Column to indicate whether the offense is a Major felony or Nonmajor felony
major_felony['major_nonmajor'] = [1, 1, 1, 1, 1, 1, 1]
nonmajor_felony['major_nonmajor'] = [0, 0, 0, 0, 0, 0, 0, 0]

felony_data = major_felony.copy().append(nonmajor_felony.copy(), ignore_index = True)

print(felony_data)

                                      offense   2009   2010   2011   2012  \
0             murder & non-negl. manslaughter    471    536    515    419   
1                                        rape   1205   1373   1420   1445   
2                                     robbery  18601  19486  19717  20144   
3                              felony assault  16773  16956  18482  19381   
4                                    burglary  19430  18600  18720  19168   
5                               grand larceny  39580  37835  38501  42497   
6              grand larceny of motor vehicle  10670  10329   9314   8093   
7        felony possession of stolen property    823    864    823    711   
8          forgery/theft_fraud/identity theft  10225  10055   9876  10233   
9                                       arson   1474   1467   1264   1253   
10                      felony sex crimes (3)    914   1053   1028   1380   
11                 felony dangerous drugs (1)  26025  22913  21305  19680   