In [15]:
#main data file and import script for class group 6  project 1
# Author AO ottoaara@gmail.com. 952.693.1378 
# 5.5.23
# data source mpls open data https://opendata.minneapolismn.gov/datasets/cityoflakes::crime-data/explore?location=19.216823%2C-46.664577%2C4.00
# CSV option under Crime Data Header
import pandas as pd
import csv
import json
#crime_data_main  = full data set (75 Megs so seperated out into groups for ease of dev)
#ran_crime_data_df is a light 1000 rows of data randomly choosen from crime_data_man to work with
#crime_data_2019 = data for 2019 
#crime_data_2020 ...
#crime_data_2021 ...
#crime_data_2022...
##crime_data_2023 = data for 2023 
filename = "Crime_Data.csv"
csvdatapath = "Data/" + filename

In [16]:
# download local from repo to desktop (clone to your desktop)
#this reads the csv file 
crime_data_main = pd.read_csv(csvdatapath)

In [17]:
#columns listed for ease of co-development use. 
crime_data_main.columns

Index(['X', 'Y', 'Type', 'Case_Number', 'Case_NumberAlt', 'Reported_Date',
       'Occurred_Date', 'NIBRS_Crime_Against', 'NIBRS_Group', 'NIBRS_Code',
       'Offense_Category', 'Offense', 'Problem_Initial', 'Problem_Final',
       'Address', 'Precinct', 'Neighborhood', 'Ward', 'Latitude', 'Longitude',
       'wgsXAnon', 'wgsYAnon', 'Crime_Count', 'OBJECTID'],
      dtype='object')

In [18]:
#checkign data types 
crime_data_main.dtypes

X                      float64
Y                      float64
Type                    object
Case_Number             object
Case_NumberAlt          object
Reported_Date           object
Occurred_Date           object
NIBRS_Crime_Against     object
NIBRS_Group             object
NIBRS_Code              object
Offense_Category        object
Offense                 object
Problem_Initial         object
Problem_Final           object
Address                 object
Precinct               float64
Neighborhood            object
Ward                   float64
Latitude               float64
Longitude              float64
wgsXAnon               float64
wgsYAnon               float64
Crime_Count              int64
OBJECTID                 int64
dtype: object

In [19]:
#get count of one column to prep for null value check 
crime_data_main["X"].count()

226120

In [20]:
# Remove rows containing all null values- cleaning data set
crime_data_clean = crime_data_main.dropna(how='any')

In [21]:
# removing rows with null values.  
crime_data_clean["X"].count()

151701

In [22]:
# checking values of crime category to reduce our data set down further to violent crimes 
crime_data_clean["Offense_Category"].unique()

array(['Subset of NIBRS Assault Offenses', 'Assault Offenses ',
       'Larceny/Theft Offenses ', 'Sex Offenses ',
       'Drug/Narcotic Offenses ', 'Robbery ', 'Fraud Offenses ',
       'Burglary/Breaking & Entering ', 'Motor Vehicle Theft ',
       'Stolen Property Offenses ', 'Weapon Law Violations ',
       'Counterfeiting/Forgery ', 'Extortion/Blackmail ',
       'Kidnapping/Abduction ', 'Embezzlement ',
       'Subset of NIBRS Robbery', 'Pornography/Obscene Material ',
       'Prostitution Offenses ', 'Arson ', 'Homicide Offenses ',
       'Destruction/Damage/Vandalism of Property ', 'Animal Cruelty ',
       'Human Trafficking ', 'Bribery ', 'Gambling Offenses ',
       'Gunshot Wound Victims', 'Shots Fired Calls'], dtype=object)

In [24]:
# reducing the data sets to only have violent crimes in them.  Note Included motor vehicle theft due to the
# high volumme of car-jackings occuring in mpls. 

crime_categories = [
    'Subset of NIBRS Assault Offenses',
    'Assault Offenses',
    'Larceny/Theft Offenses',
    'Sex Offenses',
    'Robbery',
    'Burglary/Breaking & Entering',
    'Motor Vehicle Theft',
    'Weapon Law Violations',
    'Kidnapping/Abduction',
    'Homicide Offenses',
    'Human Trafficking',
    'Gunshot Wound Victims',
    'Shots Fired Calls'
]

crime_data_clean = crime_data_clean[crime_data_clean['Offense_Category'].isin(crime_categories)]


In [25]:
#changing Occured_Date filed to data/time from object
crime_data_clean['Occurred_Date'] = pd.to_datetime(crime_data_clean['Occurred_Date'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_data_clean['Occurred_Date'] = pd.to_datetime(crime_data_clean['Occurred_Date'])


In [26]:
#checking to make sure date/time conv worked 
crime_data_clean.dtypes

X                                  float64
Y                                  float64
Type                                object
Case_Number                         object
Case_NumberAlt                      object
Reported_Date                       object
Occurred_Date          datetime64[ns, UTC]
NIBRS_Crime_Against                 object
NIBRS_Group                         object
NIBRS_Code                          object
Offense_Category                    object
Offense                             object
Problem_Initial                     object
Problem_Final                       object
Address                             object
Precinct                           float64
Neighborhood                        object
Ward                               float64
Latitude                           float64
Longitude                          float64
wgsXAnon                           float64
wgsYAnon                           float64
Crime_Count                          int64
OBJECTID   

In [27]:
#building random data set 
n_samples = min(1000, len(crime_data_clean))
ran_crime_data_df = crime_data_clean.sample(n=n_samples, random_state=42)

In [28]:
#building year data sets
crime_data_2019 = crime_data_clean[crime_data_clean['Occurred_Date'].dt.year == 2019]
crime_data_2020 = crime_data_clean[crime_data_clean['Occurred_Date'].dt.year == 2020]
crime_data_2021 = crime_data_clean[crime_data_clean['Occurred_Date'].dt.year == 2021]
crime_data_2022 = crime_data_clean[crime_data_clean['Occurred_Date'].dt.year == 2022]
crime_data_2023 = crime_data_clean[crime_data_clean['Occurred_Date'].dt.year == 2023]

array(['Subset of NIBRS Assault Offenses', 'Assault Offenses ',
       'Larceny/Theft Offenses ', 'Sex Offenses ',
       'Drug/Narcotic Offenses ', 'Robbery ', 'Fraud Offenses ',
       'Burglary/Breaking & Entering ', 'Motor Vehicle Theft ',
       'Stolen Property Offenses ', 'Weapon Law Violations ',
       'Counterfeiting/Forgery ', 'Extortion/Blackmail ',
       'Kidnapping/Abduction ', 'Embezzlement ',
       'Subset of NIBRS Robbery', 'Pornography/Obscene Material ',
       'Prostitution Offenses ', 'Arson ', 'Homicide Offenses ',
       'Destruction/Damage/Vandalism of Property ', 'Animal Cruelty ',
       'Human Trafficking ', 'Bribery ', 'Gambling Offenses ',
       'Gunshot Wound Victims', 'Shots Fired Calls'], dtype=object)

In [38]:
# grouping with smaller data set to look at where the crimes are happening - counting number of offenses 
#grouped_data = ran_crime_data_df.groupby(['Neighborhood', 'Address']).size().reset_index(name='Offense_Count')
grouped_data = ran_crime_data_df.groupby(['Neighborhood', 'Address', 'Offense_Category']).size().reset_index(name='Offense_Count')


In [39]:
#sorting grouped data to show the worst parts of town ie sorting by places more than one offense occurred
sorted_grouped_data = grouped_data.sort_values(by='Offense_Count', ascending=False)


In [40]:
# viewing sorted data frame. 
sorted_grouped_data

Unnamed: 0,Neighborhood,Address,Offense_Category,Offense_Count
205,Hawthorne,0021XX ALDRICH AVE N,Shots Fired Calls,14
350,Lowry Hill East,0028XX DUPONT AVE S,Subset of NIBRS Assault Offenses,11
204,Hawthorne,0021XX ALDRICH AVE N,Gunshot Wound Victims,9
251,Jordan,0014XX 29TH AVE N,Subset of NIBRS Assault Offenses,8
526,Steven's Square - Loring Heights,0018XX 1ST AVE S,Subset of NIBRS Assault Offenses,8
...,...,...,...,...
239,Holland,0007XX LOWRY AVE NE,Gunshot Wound Victims,1
238,Holland,0006XX 26TH AVE NE,Subset of NIBRS Assault Offenses,1
237,Holland,0006XX 26TH AVE NE,Gunshot Wound Victims,1
236,Holland,0006XX 19TH AVE NE,Shots Fired Calls,1
