# Crime Data Analysis

- Your analysis here
  
---

In [None]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import hvplot.pandas

# Files to Load
data_2010_2019 = Path("Crime_Data_from_2010_to_2019 (1).csv")
data_2020_2023 = Path("Crime_Data_from_2020_to_Present_20231016.csv")

# Read data(2010-2019) and data (2020- present)Data File and store into Pandas DataFrames
pre_covid_data = pd.read_csv(data_2010_2019)
post_covid_data = pd.read_csv(data_2020_2023)

# Combine the data into a single dataset.  
Data_complete = pd.concat([pre_covid_data, post_covid_data], ignore_index=True)
Data_complete.head()

Data Cleaning


In [None]:
# Create a Year column from the dataframe
Data_complete['Crime Year'] = pd.to_datetime(Data_complete['Date Rptd']).dt.year

In [None]:
# Remove unused columns
Data_complete.drop(Data_complete.columns[[1, 2, 3, 4, 6, 7, 10, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,28]], axis=1, inplace=True)

In [None]:
# Remove NaN entries
Data_complete = Data_complete.dropna()

# Clean data for duplicate crime records
Data_complete = Data_complete.drop_duplicates(subset=['DR_NO'])

# Remove unrecorded victim age data
Data_complete = Data_complete[Data_complete['Vict Age'] > 0]

In [None]:
# Change the name of the columns to complete the data cleaning
Data_complete.rename(columns={
                    'DR_NO':'DR Number',
                    'Crime Year': 'Crime Year',
                    'AREA NAME': 'Area Name',
                    'Crm Cd': 'Crime Code',
                    'Crm Cd Desc':'Type of Crime',
                    'Vict Age': 'Victim Age',
                    'Vict Sex': 'Victim Gender',
                    'Vict Descent': 'Victim Ethnicity',
                    'Premis Desc': 'Scene of Crime',
                    'LAT': 'Latitude',
                    'LON': 'Longitude'
                    
},inplace= True)

# Remove coordinates outside of Los Angeles
Data_complete = Data_complete[(Data_complete['Longitude'] < -108) & (Data_complete['Longitude'] > -128)]
Data_complete = Data_complete[(Data_complete['Latitude'] < 44) & (Data_complete['Latitude'] > 24)]

Data_complete.head()

## Data Analysis

In [None]:
count = len(Data_complete["DR Number"])
print(count)

In [None]:
unique_demo = Data_complete["Victim Ethnicity"].value_counts()
print(unique_demo)

In [None]:
unique_demo = Data_complete["Victim Ethnicity"].value_counts()
print(unique_demo)

In [None]:
unique_demo = Data_complete["Area Name"].value_counts()
print(unique_demo)

## Annual Crime Analysis

In [None]:
# Get the total number of crimes by year
yearly_total = Data_complete["DR Number"].groupby(Data_complete["Crime Year"]).count().reset_index()
yearly_total.rename(columns={"DR Number": "Total Crimes"}, inplace=True)

# Add a column that calculates the yearly rate of change in crime
yearly_total['Percentage Change'] = yearly_total['Total Crimes'].pct_change() * 100

# Convert NaN first row to a dash
yearly_total['Percentage Change'].iloc[0] = '-'
yearly_total.set_index('Crime Year', inplace=True)
yearly_total

In [None]:
# Create a Summary Statistics table of the total crimes over the entire dataset
yearly_sum_stats = yearly_total.describe()
yearly_sum_stats

In [None]:
# Obtain the first and third quartiles
Q1 = yearly_sum_stats.loc['25%']
Q3 = yearly_sum_stats.loc['75%']

# Calculate the IQR
IQR = Q3 - Q1

# Calculate the lower and upper bounds
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Print lower and upper Bounds to find the range for determining outliers
print("Lower Bound:", lower_bound)
print("Upper Bound:", upper_bound)

In [None]:
# Bar plot of the total crime count over the length of the dataset
yearly_total_plot = yearly_total
mean_total = yearly_sum_stats.loc['mean', 'Total Crimes']

# Use list comprehension to change the covid years to red
covid_color = ["r" if year >= 2020 else "b" for year in yearly_total_plot.index]
yearly_total_plot.plot.bar(y = "Total Crimes", 
                       color= covid_color,
                       legend = False)

plt.axhline(y=mean_total, color='black', linestyle='--')
plt.text(9.5, 169000, "Mean")
plt.xlabel("")
plt.xticks(rotation=45)
plt.ylabel("Total Crimes by Year")
plt.title("Totals Crimes in Los Angeles (2010-2023)")
plt.savefig("output_data/TotalCrimeYTY.png", bbox_inches="tight")
plt.show()

In [None]:
# Line graph showing the percentage change in total crimes year-to-year
yearly_total_index_reset = yearly_total.reset_index()
percent_plot = yearly_total_index_reset.iloc[1:]
plt.figure(figsize=(7, 5))
plt.plot(percent_plot["Crime Year"], percent_plot["Percentage Change"], marker='o', linestyle='-')
plt.title("Percentage Change in Total Crimes (2010-2023)")
plt.xlabel("Year")
plt.ylabel("Percentage Change")
plt.xticks(percent_plot["Crime Year"], rotation=45)
plt.grid(True)
plt.savefig("output_data/PercentChangeYTY.png", bbox_inches="tight")
plt.show()

In [None]:
# Linear Regression plot
slope, intercept, rvalue, pvalue, stderr = stats.linregress(yearly_total_index_reset["Crime Year"], yearly_total_index_reset["Total Crimes"])
regress = yearly_total_index_reset["Crime Year"] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept, 2))
plt.scatter(yearly_total_index_reset["Crime Year"], yearly_total_index_reset["Total Crimes"])
plt.plot(yearly_total_index_reset["Crime Year"], regress, "r-")
plt.xticks(yearly_total_index_reset["Crime Year"], rotation=45)
plt.ylabel("Total Crimes")
plt.title("Linear Regression Plot of Total Crime in Los Angeles (2010-2023)")
plt.annotate(line_eq, (min(yearly_total_index_reset["Crime Year"]), max(yearly_total_index_reset["Total Crimes"])-1), fontsize=12, color="red")
plt.savefig("output_data/LinearRegressionYTY.png", bbox_inches="tight")
print(f"The r-value is: {rvalue ** 2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# T-Test of Pre-Covid and Covid-Era Total Crimes
# Slice the dataframe to create Pre-Covid and Covid-Era
pre_covid_slice = yearly_total_index_reset[(yearly_total_index_reset['Crime Year'] >= 2010) & (yearly_total_index_reset['Crime Year'] <= 2019)]
covid_slice = yearly_total_index_reset[(yearly_total_index_reset['Crime Year'] >= 2020) & (yearly_total_index_reset['Crime Year'] <= 2023)]

# Calculate the mean for before and after covid
pre_covid_mean = pre_covid_slice['Total Crimes'].mean()
covid_mean = covid_slice['Total Crimes'].mean()

print(pre_covid_mean)
print(covid_mean)
# Perform a t-test to compare the means of the two groups
stats.ttest_ind(pre_covid_slice['Total Crimes'], covid_slice['Total Crimes'], equal_var=False)

In [None]:
# Define function to pull the most common occurring value in each column
def highest_occurance(column):
    return column.value_counts().idxmax()

# Group the data by 'Crime Year' and apply the most_frequent_value function to each column
highest_occurance = Data_complete.groupby('Crime Year').agg({
                    'Area Name': highest_occurance,
                    'Crime Code': highest_occurance,
                    'Type of Crime': highest_occurance,
                    'Victim Age': highest_occurance,
                    'Victim Gender': highest_occurance,
                    'Victim Ethnicity': highest_occurance,
                    'Scene of Crime': highest_occurance
                    }).reset_index()

In [None]:
# Create Yearly Summary table by merging the two new yearly analysis dataframes
yearly_summary_df = pd.merge(yearly_total, highest_occurance, on='Crime Year')
yearly_summary_df.set_index('Crime Year', inplace=True)
yearly_summary_df

## Crimes Summary

In [None]:
Total_crime_count = len(Data_complete['DR Number'])
Total_crime_count

In [None]:
# Calculate the total number of Crimes 
##Total_crime_count = Data_complete.len['Type of Crime']


# unique kind of the crimes:
#Kind_of_crimes = clean_crime_data.groupby(["Crm Cd Desc","crime_year"], as_index = false).count()

# Pick which highest frequency (5)
#highest_frequency_crime = Kind_of_crimes.sort_values(ascending= False)

# Inside the home and outside the home( COVID people were home)



In [None]:
#Plot a line graph showing the overall crimes trend change over the years
#plot.line

In [None]:
# Calculate the kind of crimes (e.g different kind of crimes) per year??
#the distribution of crime types over the years
#clean_crime_data.loc(2017)
#df_2017= 
#df_2018
#df_2019
#df_2020
#df_2021
#df_2022
#crime_type


In [None]:
# Create a dataframe with crime_type and year????
#crime_summary = pd.DataFrame({
    

In [None]:
# Plot a bar plot with multiple columns over the different years for total number of crimes(value count)


In [None]:
# Plot another histogram for average of pre covid and post covid crimes (Total and one for each crime).

## Area Summary

In [None]:
#  select all of the different Areas


In [None]:
# Divide areas into Central, Valley, South, West

In [None]:
# Calculate the total crimes per area per year
per_area_crime_counts = 

In [None]:
#calculate crime types per area
# Five highest crimes 

In [None]:
# Geoplot the area 

In [None]:
# Make a data frame with Columns for Average crime per area


# Display Data Frame

In [None]:
# Highest Crime Area(By Total Crimes)

In [None]:
# Lowest Crime Area (By Total Crime)

In [None]:
#Bar chart four areas, four years and total number of crimes

In [None]:
# Identify Hot Spots
#Crime in each area acroos the years

In [None]:
# Any change in the Hot spots overs the years


### Female Vs Male Victims

In [None]:
# Generate a pie plot showing the distribution of female versus male victims using Pandas
gender_counts = Data_complete['Victim Gender'].value_counts()
grouped_crimes = {}

# Create a pie plot
plt.figure(figsize=(6, 6))
plt.pie(gender_counts, labels=gender_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Male vs. Female Victims')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle

# Display the plot
plt.show()

#NEED VICTIM GENDER CLEANED

#Types of Crime: Distribution of Types of Crime, Age vs Type of Crime, Sex vs Type of Crime

In [None]:
#Count all of the unique values in 'Types of Crime' column
crime_counts = Data_complete['Type of Crime'].value_counts()
print(crime_counts)

In [None]:
#find all the unique values in 'Types of Crime' column
for Crimes in Data_complete['Type of Crime']:
    print(Crimes)

# add to csv and save
import csv
types_of_crime = Data_complete['Type of Crime']

with open('Types_of_Crimes_list.csv', 'w') as f:
    types_of_crime.to_csv(f, header=False)




In [None]:
def categorize_crime(crime):
    def categorize_abortion_illegal(crime):
        abortion_illegal_keywords = [
            "ABORTION/ILLEGAL",
            # Add more keywords related to "Abortion/Illegal" here
        ]
        for keyword in abortion_illegal_keywords:
            if keyword in crime:
                return "ABORTION"
        return None
        pass
   #-------------------------------------------- 
    def categorize_theft(crime):
        theft_keywords = [
            "ATTEMPTED ROBBERY",
            "BIKE - ATTEMPTED STOLEN",
            "BIKE - STOLEN",
            "BOAT - STOLEN",
            "BUNCO, ATTEMPT",
            "BUNCO, GRAND THEFT",
            "BUNCO, PETTY THEFT",
            "BURGLARY",
            "BURGLARY FROM VEHICLE",
            "BURGLARY FROM VEHICLE, ATTEMPTED",
            "BURGLARY, ATTEMPTED",
            "COUNTERFEIT",
            "CREDIT CARDS, FRAUD USE ($950 & UNDER",
            "CREDIT CARDS, FRAUD USE ($950.01 & OVER)",
            "DEFRAUDING INNKEEPER/THEFT OF SERVICES, $950 & UNDER",
            "DEFRAUDING INNKEEPER/THEFT OF SERVICES, OVER $950.01",
            "DISHONEST EMPLOYEE - GRAND THEFT",
            "DISHONEST EMPLOYEE - PETTY THEFT",
            "DISHONEST EMPLOYEE ATTEMPTED THEFT",
            "DOCUMENT FORGERY / STOLEN FELONY",
            "EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)",
            "EMBEZZLEMENT, PETTY THEFT ($950 & UNDER)",
            "GRAND THEFT / AUTO REPAIR",
            "GRAND THEFT / INSURANCE FRAUD",
            "PETTY THEFT - AUTO REPAIR",
            "PICKPOCKET",
            "PICKPOCKET, ATTEMPT",
            "PURSE SNATCHING",
            "PURSE SNATCHING - ATTEMPT",
            "ROBBERY",
            "SHOPLIFTING - ATTEMPT",
            "SHOPLIFTING - PETTY THEFT ($950 & UNDER)",
            "SHOPLIFTING-GRAND THEFT ($950.01 & OVER)",
            "THEFT FROM MOTOR VEHICLE - ATTEMPT",
            "THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)",
            "THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)",
            "THEFT FROM PERSON - ATTEMPT",
            "THEFT OF IDENTITY",
            "THEFT PLAIN - ATTEMPT",
            "THEFT PLAIN - PETTY ($950 & UNDER)",
            "THEFT, COIN MACHINE - ATTEMPT",
            "THEFT, COIN MACHINE - GRAND ($950.01 & OVER)",
            "THEFT, COIN MACHINE - PETTY ($950 & UNDER)",
            "THEFT, PERSON",
            "THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD",
            "VEHICLE - ATTEMPT STOLEN",
            "VEHICLE - STOLEN",
            "VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC)",
            "DOCUMENT FORGERY / STOLEN FELONY",
            "DOCUMENT WORTHLESS ($200 & UNDER)",
            "DOCUMENT WORTHLESS ($200.01 & OVER)",



            # Add more keywords related to "Abortion/Illegal" here
        ]

        for keyword in theft_keywords:
            if keyword in crime:
                return "THEFT"
        return None
        pass
    #-------------------------------------------
    def categorize_assault(crime):
        assault_keywords = ["ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER",
            "ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",
            "BATTERY - SIMPLE ASSAULT",
            "BATTERY ON A FIREFIGHTER",
            "BATTERY POLICE (SIMPLE)",
            "OTHER ASSAULT",




        ]

        for keyword in assault_keywords:
            if keyword in crime:
                return "ASSAULT"
        return None
        
   #-------------------------------------------- 
    def categorize_sex_offenses(crime):
        sexual_offenses_keywords = ["SODOMY/SEXUAL CONTACT B/W PENIS OF ONE PERS TO ANUS OTH",
            "SEX OFFENDER REGISTRANT OUT OF COMPLIANCE",
            "SEX,UNLAWFUL(INC MUTUAL CONSENT, PENETRATION W/ FRGN OBJ",
            "SEXUAL PENETRATION W/FOREIGN OBJECT",
            "RAPE, ATTEMPTED",
            "RAPE, FORCIBLE",
            "PIMPING",
            "PROWLER",
            "PEEPING TOM",
            "ORAL COPULATION",
            "LEWD CONDUCT",
            "LEWD/LASCIVIOUS ACTS WITH CHILD",
            "LETTERS, LEWD  -  TELEPHONE CALLS, LEWD",
            "INDECENT EXPOSURE",
            "INCEST (SEXUAL ACTS BETWEEN BLOOD RELATIVES)",
            "HUMAN TRAFFICKING - COMMERCIAL SEX ACTS",
            "BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM",
            "BATTERWITH SEXUAL CONTACT",
        ]

        for keyword in sexual_offenses_keywords:
            if keyword in crime:
                return "SEXUAL OFFENSES"
        pass
        return None
        
#--------------------------------------------------------
    def categorize_domestic_violence(crime):
        domestic_violence_keywords = ["INTIMATE PARTNER - AGGRAVATED",
                                      "INTIMATE PARTNER - SIMPLE",
        ]

        for keyword in domestic_violence_keywords:
            if keyword in crime:
                return "DOMESTIC VIOLENCE"
        return None
     #--------------------------------------------------------       
    def categorize_child_abuse(crime):
        child_abuse_keywords = ["CHILD ABANDONMENT",
                    "CHILD ABUSE (SEXUAL)",
                    "CHILD ABUSE (PHYSICAL)",
                    "CHILD ANNOYING (17YRS & UNDER)",
                    "CHILD NEGLECT (SEE 300 W.I.C.)",
                    "CHILD PORNOGRAPHY",
                    "CHILD STEALING",
                    "CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)",
        ]
        for keyword in child_abuse_keywords:
            if keyword in crime:
                return "CHILD ABUSE"
        return None
#------------------------------------------------------------------
    def categorize_homicide(crime):
        homicide_keywords = ["CRIMINAL HOMICIDE",
                             "MANSLAUGHTER, NEGLIGENT",
                             "LYNCHING",
                             "LYNCHING - ATTEMPTED",


        ]
        for keyword in homicide_keywords:
            if keyword in crime:
                return "HOMICIDE"
            
        return None
    #--------------------------------------------------------        
    def categorize_drugs(crime):
        drugs_keywords = ["DRUGS, TO A MINOR",
                          "DRUNK ROLL",
                          "DRUNK ROLL - ATTEMPT",
        ]

        for keyword in drugs_keywords:
            if keyword in crime:
                return "DRUG & ALCOHOL OFFENSES"
        return None
    #--------------------------------------------------------
    
    def categorize_other(crime):
        other_keywords = [
                        "VANDALISM",
                        "BIGAMY",
                "BLOCKING DOOR INDUCTION CENTER",
                "BOMB SCARE",
                "BRIBERY",
                "CONSPIRACY",
                "CONTEMPT OF COURT",
                "CONTRIBUTING",
                "CRUELTY TO ANIMALS",
                "CRIMINAL THREATS - NO WEAPON DISPLAYED",
                "DISRUPT SCHOOL",
                "DISTURBING THE PEACE",
                "EXTORTION",
                "FAILURE TO DISPERSE",
                "FAILURE TO YIELD",
                "FALSE IMPRISONMENT",
                "FALSE POLICE REPORT",
                "OTHER MISCELLANEOUS CRIME",
                "ILLEGAL DUMPING",
                "RESISTING ARREST",
                "TELEPHONE PROPERTY - DAMAGE",
                "THREATENING PHONE CALLS/LETTERS",
                "TRAIN WRECKING",
                "TRESPASSING",
                "UNAUTHORIZED COMPUTER ACCESS",
                "VIOLATION OF COURT ORDER",
                "VIOLATION OF RESTRAINING ORDER",
                "VIOLATION OF TEMPORARY RESTRAINING ORDER",
                "FIREARMS EMERGENCY PROTECTIVE ORDER (FIREARMS EPO)",
"FIREARMS RESTRAINING ORDER (FIREARMS RO)",
"FIREARMS TEMPORARY RESTRAINING ORDER (TEMP FIREARMS RO)",
"INCITING A RIOT",
"DRIVING WITHOUT OWNER CONSENT (DWOC)",
"RECKLESS DRIVING",
"STALKING",
"THROWING OBJECT AT MOVING VEHICLE",
"ARSON",




                    # Add more keywords for other categories here
        ]

        for keyword in other_keywords:
            if keyword in crime:
                return "OTHER"
        return None
    #--------------------------------------------------------
    def categorize_weapons_charges(crime):
        weapons_charges_keywords = ["ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER",
                                    "BRANDISH WEAPON",
                                    "REPLICA FIREARMS(SALE,DISPLAY,MANUFACTURE OR DISTRIBUTE)",
"SHOTS FIRED AT INHABITED DWELLING",
"SHOTS FIRED AT MOVING VEHICLE, TRAIN OR AIRCRAFT",
"WEAPONS POSSESSION/BOMBING",
        ]
        for keyword in weapons_charges_keywords:
            if keyword in crime:
                return "WEAPONS CHARGES"
        return None
    #--------------------------------------------------------        
    def categorize_vandalism(crime):
        vandalism_keywords = ["VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)","VANDALISM - MISDEAMEANOR ($399 OR UNDER)",]
        for keyword in vandalism_keywords:
            if keyword in crime:
                return "VANDALISM"
        return None
#------------------------------------------------------------------
    def categorize_kidnapping(crime):
        kidnapping_keywords = ["KIDNAPPING",
                               "KIDNAPPING - GRAND ATTEMPT",
                               "HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE",

        ]
        for keyword in kidnapping_keywords:
            if keyword in crime:
                return "KIDNAPPING"
        return None




    category = categorize_theft(crime) or categorize_abortion_illegal(crime) or categorize_assault(crime) or categorize_vandalism(crime) or categorize_child_abuse(crime) or categorize_domestic_violence(crime) or categorize_drugs(crime) or categorize_homicide(crime) or categorize_kidnapping(crime) or categorize_sex_offenses(crime) or categorize_weapons_charges(crime) or categorize_other(crime)
    if category:
        return category
    
    
    else:
        category = categorize_other(crime)
        if category:
            return category
    return "Uncategorized"

# List of crime descriptions
crime_descriptions = ["ABORTION/ILLEGAL",
"ARSON",
"ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER",
"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",
"ATTEMPTED ROBBERY",
"BATTERY - SIMPLE ASSAULT",
"BATTERY ON A FIREFIGHTER",
"BATTERY POLICE (SIMPLE)",
"BATTERWITH SEXUAL CONTACT",
"BEASTIALITY, CRIME AGAINST NATURE SEXUAL ASSLT WITH ANIM",
"BIGAMY",
"BIKE - ATTEMPTED STOLEN",
"BIKE - STOLEN",
"BLOCKING DOOR INDUCTION CENTER",
"BOAT - STOLEN",
"BOMB SCARE",
"BRANDISH WEAPON",
"BRIBERY",
"BUNCO, ATTEMPT",
"BUNCO, GRAND THEFT",
"BUNCO, PETTY THEFT",
"BURGLARY",
"BURGLARY FROM VEHICLE",
"BURGLARY FROM VEHICLE, ATTEMPTED",
"BURGLARY, ATTEMPTED",
"CHILD ABANDONMENT",
"CHILD ABUSE (PHYSICAL)",
"CHILD ABUSE (PHYSICAL)",
"CHILD ANNOYING (17YRS & UNDER)",
"CHILD NEGLECT (SEE 300 W.I.C.)",
"CHILD PORNOGRAPHY",
"CHILD STEALING",
"CONSPIRACY",
"CONTEMPT OF COURT",
"CONTRIBUTING",
"COUNTERFEIT",
"CREDIT CARDS, FRAUD USE ($950 & UNDER",
"CREDIT CARDS, FRAUD USE ($950.01 & OVER)",
"CRIMINAL HOMICIDE",
"CRIMINAL THREATS - NO WEAPON DISPLAYED",
"CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)",
"CRUELTY TO ANIMALS",
"DEFRAUDING INNKEEPER/THEFT OF SERVICES, $950 & UNDER",
"DEFRAUDING INNKEEPER/THEFT OF SERVICES, OVER $950.01",
"DISCHARGE FIREARMS/SHOTS FIRED",
"DISHONEST EMPLOYEE - GRAND THEFT",
"DISHONEST EMPLOYEE - PETTY THEFT",
"DISHONEST EMPLOYEE ATTEMPTED THEFT",
"DISRUPT SCHOOL",
"DISTURBING THE PEACE",
"DOCUMENT FORGERY / STOLEN FELONY",
"DOCUMENT WORTHLESS ($200 & UNDER)",
"DOCUMENT WORTHLESS ($200.01 & OVER)",
"DRIVING WITHOUT OWNER CONSENT (DWOC)",
"DRUGS, TO A MINOR",
"DRUNK ROLL",
"DRUNK ROLL - ATTEMPT",
"EMBEZZLEMENT, GRAND THEFT ($950.01 & OVER)",
"EMBEZZLEMENT, PETTY THEFT ($950 & UNDER)",
"EXTORTION",
"FAILURE TO DISPERSE",
"FAILURE TO YIELD",
"FALSE IMPRISONMENT",
"FALSE POLICE REPORT",
"FIREARMS EMERGENCY PROTECTIVE ORDER (FIREARMS EPO)",
"FIREARMS RESTRAINING ORDER (FIREARMS RO)",
"FIREARMS TEMPORARY RESTRAINING ORDER (TEMP FIREARMS RO)",
"GRAND THEFT / AUTO REPAIR",
"GRAND THEFT / INSURANCE FRAUD",
"HUMAN TRAFFICKING - COMMERCIAL SEX ACTS",
"HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE",
"ILLEGAL DUMPING",
"INCEST (SEXUAL ACTS BETWEEN BLOOD RELATIVES)",
"INCITING A RIOT",
"INDECENT EXPOSURE",
"INTIMATE PARTNER - AGGRAVATED",
"INTIMATE PARTNER - SIMPLE",
"KIDNAPPING",
"KIDNAPPING - GRAND ATTEMPT",
"LETTERS, LEWD  -  TELEPHONE CALLS, LEWD",
"LEWD CONDUCT",
"LEWD/LASCIVIOUS ACTS WITH CHILD",
"LYNCHING",
"LYNCHING - ATTEMPTED",
"MANSLAUGHTER, NEGLIGENT",
"ORAL COPULATION",
"OTHER ASSAULT",
"OTHER MISCELLANEOUS CRIME",
"PANDERING",
"PEEPING TOM",
"PETTY THEFT - AUTO REPAIR",
"PICKPOCKET",
"PICKPOCKET, ATTEMPT",
"PIMPING",
"PROWLER",
"PURSE SNATCHING",
"PURSE SNATCHING - ATTEMPT",
"RAPE, ATTEMPTED",
"RAPE, FORCIBLE",
"RECKLESS DRIVING",
"REPLICA FIREARMS(SALE,DISPLAY,MANUFACTURE OR DISTRIBUTE)",
"RESISTING ARREST",
"ROBBERY",
"SEX OFFENDER REGISTRANT OUT OF COMPLIANCE",
"SEX,UNLAWFUL(INC MUTUAL CONSENT, PENETRATION W/ FRGN OBJ",
"SEXUAL PENETRATION W/FOREIGN OBJECT",
"SHOPLIFTING - ATTEMPT",
"SHOPLIFTING - PETTY THEFT ($950 & UNDER)",
"SHOPLIFTING-GRAND THEFT ($950.01 & OVER)",
"SHOTS FIRED AT INHABITED DWELLING",
"SHOTS FIRED AT MOVING VEHICLE, TRAIN OR AIRCRAFT",
"SODOMY/SEXUAL CONTACT B/W PENIS OF ONE PERS TO ANUS OTH",
"STALKING",
"TELEPHONE PROPERTY - DAMAGE",
"THEFT FROM MOTOR VEHICLE - ATTEMPT",
"THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND OVER)",
"THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER)",
"THEFT FROM PERSON - ATTEMPT",
"THEFT OF IDENTITY",
"THEFT PLAIN - ATTEMPT",
"THEFT PLAIN - PETTY ($950 & UNDER)",
"THEFT, COIN MACHINE - ATTEMPT",
"THEFT, COIN MACHINE - GRAND ($950.01 & OVER)",
"THEFT, COIN MACHINE - PETTY ($950 & UNDER)",
"THEFT, PERSON",
"THEFT-GRAND ($950.01 & OVER)EXCPT,GUNS,FOWL,LIVESTK,PROD",
"THREATENING PHONE CALLS/LETTERS",
"THROWING OBJECT AT MOVING VEHICLE",
"TILL TAP - GRAND THEFT ($950.01 & OVER)",
"TILL TAP - PETTY ($950 & UNDER)",
"TRAIN WRECKING",
"TRESPASSING",
"UNAUTHORIZED COMPUTER ACCESS",
"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)",
"VANDALISM - MISDEAMEANOR ($399 OR UNDER)",
"VEHICLE - ATTEMPT STOLEN",
"VEHICLE - STOLEN",
"VEHICLE, STOLEN - OTHER (MOTORIZED SCOOTERS, BIKES, ETC)",
"VIOLATION OF COURT ORDER",
"VIOLATION OF RESTRAINING ORDER",
"VIOLATION OF TEMPORARY RESTRAINING ORDER",
"WEAPONS POSSESSION/BOMBING",


]

# Categorize each crime description
crime_categories = [categorize_crime(crime) for crime in crime_descriptions]

# Print the categorized crimes
for i in range(len(crime_descriptions)):
    print(f"Crime: {crime_descriptions[i]} - Category: {crime_categories[i]}")


In [None]:
df = pd.DataFrame(crime_categories)
df.rename(columns={0: 'CRIME CATEGORY'}, inplace=True)
df.head()

plt.figure(figsize=(10, 6))
plt.pie(df['CRIME CATEGORY'].value_counts(), labels=df['CRIME CATEGORY'].value_counts().index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Crime Categories')
plt.xlabel('Crime Category')
plt.ylabel('Number of Crimes')

plt.show()

### Victim Race 

# Child Abuse compare over the years

## Change in crime spot

In [None]:
#Splitting race by groups: Black, White, Hispanics, Asians

In [None]:
#Splitting premise into 4 categories: Commercial, residential, industrial and outdoors
# using Bins

In [None]:
#Percentage of increase and decrease in crimes over the years
Total_crime_count groupbyyear (count)

In [None]:
 #How has crime changed over the years?
 
 Line graphs 