In [131]:
# Importing dependencies required for our analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress
import scipy.stats as st
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [132]:
# Read csv file
crime_df = pd.read_csv("Crime Stats Cleaned v2.csv")

In [133]:
# Display data
crime_df.head()

Unnamed: 0,Year,County,Month,County Pop,County Pop ('000s),Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LTtotal_sum
0,2019.0,Alameda County,1.0,1668412,1668,50,0,1,19,30,195,23,48,124
1,2019.0,Alameda County,2.0,1668412,1668,49,1,2,19,27,187,34,47,106
2,2019.0,Alameda County,3.0,1668412,1668,52,1,1,20,30,153,20,23,110
3,2019.0,Alameda County,4.0,1668412,1668,60,0,3,15,42,174,41,34,99
4,2019.0,Alameda County,5.0,1668412,1668,62,0,1,23,38,180,31,38,111


In [134]:
# Check data types
crime_df.dtypes

Year                  float64
County                 object
Month                 float64
County Pop             object
County Pop ('000s)     object
Violent_sum            object
Homicide_sum           object
ForRape_sum            object
Robbery_sum            object
AggAssault_sum         object
Property_sum           object
Burglary_sum           object
VehicleTheft_sum       object
LTtotal_sum            object
dtype: object

In [135]:
# Rename columns 
crime_df.rename(columns={'County Pop':'Country_Pop',"County Pop ('000s)":"County_Pop_('000s)",'LTtotal_sum':'LarcenyTheft_sum'} , inplace = True)
crime_df

Unnamed: 0,Year,County,Month,Country_Pop,County_Pop_('000s),Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LarcenyTheft_sum
0,2019.0,Alameda County,1.0,1668412,1668,50,0,1,19,30,195,23,48,124
1,2019.0,Alameda County,2.0,1668412,1668,49,1,2,19,27,187,34,47,106
2,2019.0,Alameda County,3.0,1668412,1668,52,1,1,20,30,153,20,23,110
3,2019.0,Alameda County,4.0,1668412,1668,60,0,3,15,42,174,41,34,99
4,2019.0,Alameda County,5.0,1668412,1668,62,0,1,23,38,180,31,38,111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34582,2022.0,Yuba County,9.0,84310,84,0,0,0,0,0,0,0,0,0
34583,2022.0,Yuba County,10.0,84310,84,0,0,0,0,0,2,0,0,2
34584,2022.0,Yuba County,11.0,84310,84,0,0,0,0,0,0,0,0,0
34585,2022.0,Yuba County,12.0,84310,84,0,0,0,0,0,1,0,0,1


In [136]:
# Remove leading and trailing white spaces, replace empty values with nan then drop nan values 
# convert data types to int and string

crime_df_cleaned = crime_df.dropna()
for c in crime_df.columns:
    if c!="County":
        if crime_df[c].dtype == "object":
            crime_df_cleaned[c] = crime_df_cleaned[c].str.strip()
            crime_df_cleaned[c].replace('', np.nan, inplace=True)
        #Removed all the nan rows    
        crime_df_cleaned = crime_df_cleaned.dropna()
        #convert to int
        crime_df_cleaned[c] = crime_df_cleaned[c].astype("int64")

crime_df_cleaned["County"] = crime_df_cleaned["County"].astype('string')
crime_df_cleaned

Unnamed: 0,Year,County,Month,Country_Pop,County_Pop_('000s),Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LarcenyTheft_sum
0,2019,Alameda County,1,1668412,1668,50,0,1,19,30,195,23,48,124
1,2019,Alameda County,2,1668412,1668,49,1,2,19,27,187,34,47,106
2,2019,Alameda County,3,1668412,1668,52,1,1,20,30,153,20,23,110
3,2019,Alameda County,4,1668412,1668,60,0,3,15,42,174,41,34,99
4,2019,Alameda County,5,1668412,1668,62,0,1,23,38,180,31,38,111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34581,2022,Yuba County,8,84310,84,0,0,0,0,0,0,0,0,0
34582,2022,Yuba County,9,84310,84,0,0,0,0,0,0,0,0,0
34583,2022,Yuba County,10,84310,84,0,0,0,0,0,2,0,0,2
34584,2022,Yuba County,11,84310,84,0,0,0,0,0,0,0,0,0


In [137]:
#check data types 
crime_df_cleaned.dtypes

Year                   int64
County                string
Month                  int64
Country_Pop            int64
County_Pop_('000s)     int64
Violent_sum            int64
Homicide_sum           int64
ForRape_sum            int64
Robbery_sum            int64
AggAssault_sum         int64
Property_sum           int64
Burglary_sum           int64
VehicleTheft_sum       int64
LarcenyTheft_sum       int64
dtype: object

In [138]:
#Identify unique values on each crime :'Violent_sum','Homicide_sum','ForRape_sum','Robbery_sum','AggAssault_sum','Property_sum','Burglary_sum','VehicleTheft_sum','LarcenyTheft_sum'
unique_v = crime_df_cleaned["Violent_sum"].unique()
print('Violent' ,unique_v)
# unique_h = crime_df_cleaned["Homicide_sum"].unique()
# print('Homicide',unique_h)
# unique_r = crime_df_cleaned["ForRape_sum"].unique()
# print('Rape',unique_r)
# unique_rob = crime_df_cleaned["Robbery_sum"].unique()
# print('Robbery',unique_rob)
# unique_agg = crime_df_cleaned["AggAssault_sum"].unique()
# print('AggAssault',unique_agg)
# unique_pro = crime_df_cleaned["Property_sum"].unique()
# print('Property',unique_pro)
# unique_bur = crime_df_cleaned["Burglary_sum"].unique()
# print('Buglary',unique_bur)
# unique_v = crime_df_cleaned["VehicleTheft_sum"].unique()
# print('Vehicle',unique_v)
# unique_larceny = crime_df_cleaned["LarcenyTheft_sum"].unique()
# print('Larceny',unique_rob)

Violent [  50   49   52   60   62   53   51   54   43   46   70    8   13   12
   19   16   15   17   11   14    5    3    6    4    2    1   37   56
   44   63   48   55   24   10    9   29   21   34   38   47   35   33
   26   41   39   58   45   57   40   23   22    7  472  368  438  465
  457  500  453  477  516  491  428  455    0   36   32   42   28   27
   20   25   18   30   59   31   64   73   66   89   71  100  110   93
   80  107   92   84   79   -4   95   87  120   99  111   76   81  227
  178  212  221  274  282  324  283  218  238  209  170  188  233  222
  265  335  263  242  239  245  253  125  138  132  147  156  167  163
  165  148  157  131  137  408  374  427  476  418  570  538  499  482
  436  483  117   91   94  103   82   75   65   69   68  193  183  198
  197  173  201  205 2352 2055 2503 2440 2485 2607 2784 2559 2478 2474
 2258 2405   61  104   98   67   83   -1  109  126  154  136  135  129
  101  130  105  121  123  102   74  112   86  108  106  146  139  11

In [139]:
#filtering the negative values for all the crimes
crime_cols = ['Violent_sum','Homicide_sum','ForRape_sum','Robbery_sum','AggAssault_sum','Property_sum','Burglary_sum','VehicleTheft_sum','LarcenyTheft_sum']
for c in crime_cols:
    crime_df_cleaned = crime_df_cleaned [ crime_df_cleaned[c]>=0]

crime_df_cleaned

Unnamed: 0,Year,County,Month,Country_Pop,County_Pop_('000s),Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LarcenyTheft_sum
0,2019,Alameda County,1,1668412,1668,50,0,1,19,30,195,23,48,124
1,2019,Alameda County,2,1668412,1668,49,1,2,19,27,187,34,47,106
2,2019,Alameda County,3,1668412,1668,52,1,1,20,30,153,20,23,110
3,2019,Alameda County,4,1668412,1668,60,0,3,15,42,174,41,34,99
4,2019,Alameda County,5,1668412,1668,62,0,1,23,38,180,31,38,111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34581,2022,Yuba County,8,84310,84,0,0,0,0,0,0,0,0,0
34582,2022,Yuba County,9,84310,84,0,0,0,0,0,0,0,0,0
34583,2022,Yuba County,10,84310,84,0,0,0,0,0,2,0,0,2
34584,2022,Yuba County,11,84310,84,0,0,0,0,0,0,0,0,0


In [140]:
#crime_df_cleaned.to_csv("crime_df_cleaned_lastV.csv")
# newdf = pd.read_csv('crime_df_cleaned_lastV.csv')

In [141]:
newdf = crime_df_cleaned

In [142]:
newdf.columns


Index(['Year', 'County', 'Month', 'Country_Pop', 'County_Pop_('000s)',
       'Violent_sum', 'Homicide_sum', 'ForRape_sum', 'Robbery_sum',
       'AggAssault_sum', 'Property_sum', 'Burglary_sum', 'VehicleTheft_sum',
       'LarcenyTheft_sum'],
      dtype='object')

In [143]:
newdf.head(5)

Unnamed: 0,Year,County,Month,Country_Pop,County_Pop_('000s),Violent_sum,Homicide_sum,ForRape_sum,Robbery_sum,AggAssault_sum,Property_sum,Burglary_sum,VehicleTheft_sum,LarcenyTheft_sum
0,2019,Alameda County,1,1668412,1668,50,0,1,19,30,195,23,48,124
1,2019,Alameda County,2,1668412,1668,49,1,2,19,27,187,34,47,106
2,2019,Alameda County,3,1668412,1668,52,1,1,20,30,153,20,23,110
3,2019,Alameda County,4,1668412,1668,60,0,3,15,42,174,41,34,99
4,2019,Alameda County,5,1668412,1668,62,0,1,23,38,180,31,38,111


In [144]:
newdf = newdf.rename(columns = {'Year': 'year',
                                'County': 'county',
                                'Month': 'month',
                                'Country_Pop': 'county_pop',
                                "County_Pop_('000s)": "county_pop_('000s')",
                                'Violent_sum': 'violent',
                                'Homicide_sum': 'homicide',
                                'ForRape_sum': 'rape',
                                'Robbery_sum': 'robbery',
                                'AggAssault_sum': 'agg_assault',
                                'Property_sum': 'property',
                                'Burglary_sum': 'burglary',
                                'VehicleTheft_sum': 'vehicle_theft',
                                'LarcenyTheft_sum': 'larceny_theft'})
newdf.columns

Index(['year', 'county', 'month', 'county_pop', 'county_pop_('000s')',
       'violent', 'homicide', 'rape', 'robbery', 'agg_assault', 'property',
       'burglary', 'vehicle_theft', 'larceny_theft'],
      dtype='object')

In [145]:
newdf.drop("county_pop_('000s')", axis=1, inplace=True)
newdf

Unnamed: 0,year,county,month,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
0,2019,Alameda County,1,1668412,50,0,1,19,30,195,23,48,124
1,2019,Alameda County,2,1668412,49,1,2,19,27,187,34,47,106
2,2019,Alameda County,3,1668412,52,1,1,20,30,153,20,23,110
3,2019,Alameda County,4,1668412,60,0,3,15,42,174,41,34,99
4,2019,Alameda County,5,1668412,62,0,1,23,38,180,31,38,111
...,...,...,...,...,...,...,...,...,...,...,...,...,...
34581,2022,Yuba County,8,84310,0,0,0,0,0,0,0,0,0
34582,2022,Yuba County,9,84310,0,0,0,0,0,0,0,0,0
34583,2022,Yuba County,10,84310,0,0,0,0,0,2,0,0,2
34584,2022,Yuba County,11,84310,0,0,0,0,0,0,0,0,0


In [146]:
newdf['month'] = newdf['month'].replace({1: 'January',
                                        2: 'February',
                                        3: 'March',
                                        4: 'April',
                                        5: 'May',
                                        6: 'June',
                                        7: 'July',
                                        8: 'August',
                                        9: 'September',
                                        10: 'October',
                                        11: 'November',
                                        12: 'December'})
newdf.head(5)

Unnamed: 0,year,county,month,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
0,2019,Alameda County,January,1668412,50,0,1,19,30,195,23,48,124
1,2019,Alameda County,February,1668412,49,1,2,19,27,187,34,47,106
2,2019,Alameda County,March,1668412,52,1,1,20,30,153,20,23,110
3,2019,Alameda County,April,1668412,60,0,3,15,42,174,41,34,99
4,2019,Alameda County,May,1668412,62,0,1,23,38,180,31,38,111


In [172]:
df = newdf.groupby(['year', 'county']).sum()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
year,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019,Alameda County,417103000,9706,96,704,4900,4006,66349,6493,10216,49640
2019,Alpine County,26712,13,0,2,0,11,32,13,4,15
2019,Amador County,2818203,108,1,9,8,90,638,165,37,436
2019,Butte County,26247120,1024,7,180,167,670,5322,1111,868,3343
2019,Calaveras County,2767140,145,1,34,9,101,838,297,69,472
...,...,...,...,...,...,...,...,...,...,...,...
2022,Tulare County,74496864,2087,44,184,383,1476,9876,1512,2136,6228
2022,Tuolumne County,1963116,375,0,70,14,291,728,198,61,469
2022,Ventura County,177344865,1645,18,175,368,1084,11124,1619,1306,8199
2022,Yolo County,21323040,596,4,82,99,411,5215,639,602,3974


In [181]:
# Reset index
df.reset_index(inplace = True)

In [149]:
# Convert a crime column into a t list and iderate through in a list of comprehension 
crime_column = list(df.columns)
crime_column_list = [i for i in crime_column if i not in ('year', 'county', 'month', 'county_pop')]
# Filter by year 2019
y_2019 = df.loc[df['year'] == 2019]
y_2019.set_index('county', inplace=True)
# Fileter by year 2020
y_2020 = df.loc[df['year'] == 2020]
y_2020.set_index('county', inplace=True)

# Get all the counties population summary
summary_df = y_2019['county_pop'].to_frame()
summary_df.reset_index(inplace=True)

# Display(summary_df)
for c in crime_column_list:
    df_crimes = (y_2020[c]-y_2019[c])/y_2019[c]
    df_crimes.sort_values(ascending=False)
    df_crimes = df_crimes.to_frame()
    df_crimes.reset_index(inplace=True)
    #print(df_crimes.dropna().sort_values(ascending=False).head(9))
    # Merge Data Frames and comprare between 2019 and 2020
    summary_df =  summary_df.merge(df_crimes)
summary_df
    
    
 

Unnamed: 0,county,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
0,Alameda County,417103000,-0.006285,0.489583,-0.028409,-0.153265,0.165502,-0.121419,0.08332,0.415035,-0.258602
1,Alpine County,26712,0.461538,,-1.0,,0.727273,-0.5625,-0.692308,-1.0,-0.333333
2,Amador County,2818203,0.444444,1.0,0.111111,0.75,0.444444,-0.087774,-0.175758,0.216216,-0.080275
3,Butte County,26247120,0.043945,0.714286,-0.288889,-0.143713,0.173134,-0.183578,-0.247525,-0.229263,-0.150464
4,Calaveras County,2767140,0.089655,-1.0,-0.441176,-0.222222,0.306931,-0.085919,-0.178451,0.043478,-0.04661
5,Colusa County,1288440,0.051724,inf,-0.4,0.111111,0.205882,-0.04611,-0.082569,-0.196429,0.021978
6,Contra Costa County,344712017,-0.063457,-0.188679,-0.115265,-0.237748,0.065052,-0.168773,-0.090654,0.126396,-0.248952
7,Del Norte County,1004508,0.825688,0.0,3.153846,0.384615,0.573529,-0.207711,-0.302789,-0.106383,-0.16996
8,El Dorado County,11390363,0.137615,-1.0,0.137931,0.037037,0.190476,0.013856,-0.187702,-0.082759,0.092456
9,Fresno County,261435914,0.253958,0.709091,0.131086,0.270153,0.25193,0.060875,0.053899,0.164513,0.03902


In [150]:
# Filter data by 2019
y_2019 = df.loc[df['year'] == 2019]
y_2019.set_index('county', inplace=True)
y_2019.head(5)

Unnamed: 0_level_0,year,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alameda County,2019,417103000,9706,96,704,4900,4006,66349,6493,10216,49640
Alpine County,2019,26712,13,0,2,0,11,32,13,4,15
Amador County,2019,2818203,108,1,9,8,90,638,165,37,436
Butte County,2019,26247120,1024,7,180,167,670,5322,1111,868,3343
Calaveras County,2019,2767140,145,1,34,9,101,838,297,69,472


In [151]:
# Filter data by 2020
y_2020 = df.loc[df['year'] == 2020]
y_2020.set_index('county', inplace=True)
y_2020.head(5)

Unnamed: 0_level_0,year,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alameda County,2020,438853272,9645,143,684,4149,4669,58293,7034,14456,36803
Alpine County,2020,26856,19,0,0,0,19,14,4,0,10
Amador County,2020,2885976,156,2,10,14,130,582,136,45,401
Butte County,2020,24252816,1069,12,128,143,786,4345,836,669,2840
Calaveras County,2020,2778480,158,0,19,7,132,766,244,72,450


In [163]:
# Filter data by 2021
y_2021 = df.loc[df['year'] == 2021]
y_2021.set_index('county', inplace=True)
y_2021.head(5)

Unnamed: 0_level_0,year,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alameda County,2021,430685294,10510,146,671,4568,5125,61001,5907,15496,39598
Alpine County,2021,27170,19,0,2,0,17,23,8,1,14
Amador County,2021,2921224,135,4,9,13,109,716,133,77,506
Butte County,2021,22268520,1110,13,151,120,826,4785,818,916,3051
Calaveras County,2021,2773140,185,2,35,4,144,677,183,109,385


In [164]:
# Filter data by 2022
y_2022 = df.loc[df['year'] == 2021]
y_2022.set_index('county', inplace=True)
y_2022.head(5)

Unnamed: 0_level_0,year,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alameda County,2021,430685294,10510,146,671,4568,5125,61001,5907,15496,39598
Alpine County,2021,27170,19,0,2,0,17,23,8,1,14
Amador County,2021,2921224,135,4,9,13,109,716,133,77,506
Butte County,2021,22268520,1110,13,151,120,826,4785,818,916,3051
Calaveras County,2021,2773140,185,2,35,4,144,677,183,109,385


In [165]:
# Compare property crime between 2019 and 2020
df_property = (y_2020['property']-y_2019['property'])/y_2019['property']
df_property.sort_values(ascending=False)

county
Shasta County             0.421467
San Benito County         0.213983
Santa Barbara County      0.208020
Modoc County              0.181818
Mendocino County          0.125100
Mariposa County           0.098901
Napa County               0.066605
Fresno County             0.060875
Siskiyou County           0.052381
Sutter County             0.044358
Ventura County            0.035539
Orange County             0.032197
Kings County              0.016221
El Dorado County          0.013856
Monterey County          -0.000905
Yuba County              -0.001627
Imperial County          -0.026467
Marin County             -0.031233
Yolo County              -0.032812
Placer County            -0.036139
San Mateo County         -0.041052
Sonoma County            -0.041268
Los Angeles County       -0.045629
Colusa County            -0.046110
Lake County              -0.050489
Kern County              -0.056942
Plumas County            -0.063694
Calaveras County         -0.085919
Santa Clara C

In [166]:
# Compare violenr crim between 2020 and 2019
df_violent = (y_2020['violent']-y_2019['violent'])/y_2019['violent']
df_violent.sort_values(ascending=False)
df_violent

county
Alameda County           -0.006285
Alpine County             0.461538
Amador County             0.444444
Butte County              0.043945
Calaveras County          0.089655
Colusa County             0.051724
Contra Costa County      -0.063457
Del Norte County          0.825688
El Dorado County          0.137615
Fresno County             0.253958
Glenn County              0.670330
Humboldt County           0.006390
Imperial County          -0.011885
Inyo County               0.351064
Kern County               0.117383
Kings County              0.023778
Lake County               0.011527
Lassen County            -0.219331
Los Angeles County       -0.030925
Madera County            -0.303614
Marin County              0.207364
Mariposa County          -0.227848
Mendocino County          0.104167
Merced County             0.076316
Modoc County             -0.139535
Mono County              -0.373134
Monterey County           0.032798
Napa County              -0.262654
Nevada County

In [167]:
# Compare property crime between 2020 and 2021
df_property = (y_2021['property']-y_2020['property'])/y_2020['property']
df_property.sort_values(ascending=False)


county
Sierra County             1.941176
Alpine County             0.642857
Amador County             0.230241
Merced County             0.224490
Mariposa County           0.210000
Madera County             0.172608
San Francisco County      0.148770
Trinity County            0.116162
Butte County              0.101266
San Diego County          0.100069
San Luis Obispo County    0.094577
Santa Barbara County      0.092330
Humboldt County           0.092090
Lassen County             0.070621
Los Angeles County        0.067685
Siskiyou County           0.061840
Sacramento County         0.053166
Inyo County               0.049844
Kern County               0.047257
Alameda County            0.046455
Sonoma County             0.046007
Fresno County             0.020360
Stanislaus County         0.011344
Placer County             0.001883
Solano County             0.000573
Tehama County            -0.003864
Lake County              -0.009434
San Joaquin County       -0.016288
Tuolumne Coun

In [169]:
y_2021

Unnamed: 0_level_0,year,county_pop,violent,homicide,rape,robbery,agg_assault,property,burglary,vehicle_theft,larceny_theft
county,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alameda County,2021,430685294,10510,146,671,4568,5125,61001,5907,15496,39598
Alpine County,2021,27170,19,0,2,0,17,23,8,1,14
Amador County,2021,2921224,135,4,9,13,109,716,133,77,506
Butte County,2021,22268520,1110,13,151,120,826,4785,818,916,3051
Calaveras County,2021,2773140,185,2,35,4,144,677,183,109,385
Colusa County,2021,1314780,55,0,8,12,35,316,75,52,189
Contra Costa County,2021,347826102,3997,50,253,1003,2691,22480,2728,4315,15437
Del Norte County,2021,991908,230,1,44,33,152,570,165,93,312
El Dorado County,2021,11615400,531,6,98,43,384,2318,512,162,1644
Fresno County,2021,257299968,7405,87,406,1070,5842,25108,5937,4994,14177


In [168]:
# Compare property crime between 2021 and 2022
df_property = (y_2022['property']-y_2022['property'])/y_2022['property']
df_property.sort_values(ascending=False)

county
Alameda County            0.0
Santa Cruz County         0.0
Plumas County             0.0
Riverside County          0.0
Sacramento County         0.0
San Benito County         0.0
San Bernardino County     0.0
San Diego County          0.0
San Francisco County      0.0
San Joaquin County        0.0
San Luis Obispo County    0.0
San Mateo County          0.0
Santa Barbara County      0.0
Santa Clara County        0.0
Shasta County             0.0
Alpine County             0.0
Sierra County             0.0
Siskiyou County           0.0
Solano County             0.0
Sonoma County             0.0
Stanislaus County         0.0
Sutter County             0.0
Tehama County             0.0
Trinity County            0.0
Tulare County             0.0
Tuolumne County           0.0
Ventura County            0.0
Yolo County               0.0
Placer County             0.0
Orange County             0.0
Nevada County             0.0
Napa County               0.0
Amador County             0.0
But

In [158]:
df_violent = (y_2020['violent']-y_2019['violent'])/y_2019['violent']
df_violent.sort_values(ascending=False)
df_violent

county
Alameda County           -0.006285
Alpine County             0.461538
Amador County             0.444444
Butte County              0.043945
Calaveras County          0.089655
Colusa County             0.051724
Contra Costa County      -0.063457
Del Norte County          0.825688
El Dorado County          0.137615
Fresno County             0.253958
Glenn County              0.670330
Humboldt County           0.006390
Imperial County          -0.011885
Inyo County               0.351064
Kern County               0.117383
Kings County              0.023778
Lake County               0.011527
Lassen County            -0.219331
Los Angeles County       -0.030925
Madera County            -0.303614
Marin County              0.207364
Mariposa County          -0.227848
Mendocino County          0.104167
Merced County             0.076316
Modoc County             -0.139535
Mono County              -0.373134
Monterey County           0.032798
Napa County              -0.262654
Nevada County

In [159]:
y_2019.loc['Shasta County']

year                 2019
county_pop       11497280
violent               623
homicide                6
rape                   63
robbery                77
agg_assault           477
property             2413
burglary              630
vehicle_theft         581
larceny_theft        1202
Name: Shasta County, dtype: int64

In [160]:
y_2020.loc['Shasta County']

year                 2020
county_pop       12889944
violent               977
homicide               12
rape                   99
robbery               126
agg_assault           740
property             3430
burglary              936
vehicle_theft         466
larceny_theft        2028
Name: Shasta County, dtype: int64

In [None]:
# Create a list of County names
county_names = y_19["county"].unique()
county_names_list = list(county_names)


In [None]:
crime_columns = list(y_19.columns)
# item_list = [e for e in item_list if e not in ('item', 5)]
crime_columns_list = [i for i in crime_columns if i not in ('year', 'county', 'month', 'county_pop')]
crime_columns_list

In [None]:
crime_in_county = 

In [None]:
# Create A loop to extarct specific data
county_19 = [county_names_list]
#crime_19 = 
for i, row in y_19.iterrows():
    if robbery in county_19[3]
    print()
  

In [None]:
march_count_19 = y_19.loc[(y_19["county"] == 'Alameda County') & (y_19["month"] == 'March')].sum()
march_count_19

In [None]:
newdf_2019 = newdf.groupby(['year', 'month']).sum()
newdf_2019.head(5)                   

In [None]:
newdf1 = newdf_2019["year"].unique()
year_n = pd.to_datetime(newdf1, format='%y').strftime('%B')

#year_n

# year_n1 = list(year_n)

#year_19 = newdf['year'].unique('2019')
# year_20 = newdf['year'].unique()
# year_21 = newdf['year'].unique()
# year_22 = newdf['year'].unique()

print(len(year_n))
# print(len(year_20))
# print(len(year_21))
# print(len(year_22))

In [None]:
county_pop = newdf['county_pop'].unique()
county_count = newdf['county'].unique()
# y_2019 = newdf.groupby(['year', 'month']).sum()
# newdf_2019
print(len(county_pop))
print(len(county_count))
