# Crime Data Analysis

- Your analysis here
  
---

In [None]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import hvplot.pandas
import numpy as np
import scipy.stats as stats

import requests
# Import API key
from api_keys import geoapify_key



# File to Load (Remember to Change These)
data_2010_2019 = Path("Crime_Data_from_2010_to_2019 (1).csv")
data_2020_2023 = Path("Crime_Data_from_2020_to_Present_20231016.csv")

# Read data(2010-2019) and data (2020- present)Data File and store into Pandas DataFrames
pre_covid_data = pd.read_csv(data_2010_2019)
post_covid_data = pd.read_csv(data_2020_2023)
#pre_covid_data.head(5)
#post_covid_data.head(5)

# Combine the data into a single dataset.  
Data_complete = pd.concat([pre_covid_data, post_covid_data], ignore_index=True)
Data_complete.columns

In [None]:
Data_complete.head()

In [None]:
Data_complete.info()

Data Analysis


In [None]:
len(Data_complete.columns)

In [None]:

Data_complete['Crime Year'] = pd.to_datetime(Data_complete['Date Rptd']).dt.year

Data_complete['Crime Year']

In [None]:
#Drop the columns not needed
Data_complete.drop(Data_complete.columns[[1, 2, 3, 4, 6, 7, 10, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,28]], axis=1, inplace=True)

In [None]:

# Clean data for duplicate
Data_complete = Data_complete.drop_duplicates(subset=['DR_NO'])
# Clean data for -ve values

# Remove enteries with  Victim Sex : X and Victim Race: X, Removing 0 in VictAge

Data_complete = Data_complete[Data_complete['Vict Age'] > 0]

In [None]:
# Change the name of the columns
Data_complete.rename(columns={
                    'DR_NO':'DR Number',
                    'Crime Year': 'Crime Year',
                    'AREA NAME': 'Area Name',
                    'Crm Cd': 'Crime Code',
                    'Crm Cd Desc':'Type of Crime',
                    'Vict Age': 'Victim Age',
                    'Vict Sex': 'Victim Gender',
                    'Vict Descent': 'Victim Ethnicity',
                    'Premis Desc': 'Scene of Crime',
                    'LAT': 'Latitude',
                    'LON': 'Longitude'
                    
},inplace= True)

Data_complete.head()





In [None]:

Data_complete = Data_complete[(Data_complete['Longitude'] < -108) & (Data_complete['Longitude'] > -128)]


In [None]:
Data_complete = Data_complete[(Data_complete['Latitude'] < 44) & (Data_complete['Latitude'] > 24)]


## Crimes Summary

In [None]:
Total_crime_count = len(Data_complete['DR Number'])
Total_crime_count

## Area Summary

In [None]:
#  select all of the different Areas
Data_complete["Area Name"].value_counts()


##https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.replace.html

In [None]:
Data_complete["Area Name"].replace({"West LA":"West", 
                                    "77th Street":"South",
                                    "Southwest": "South",
                                    "Southeast": "South",
                                    "Harbor": "South",
                                    "Pacific":"West",
                                    "Wilshire":"West",
                                    "Olympic":"West",
                                    "Hollywood":"West",
                                    "Central":"Central",
                                    "Hollenbeck":"Central",
                                    "Newton":"Central",
                                    "Northeast":"Central",
                                    "Rampart":"Central",
                                    "Devonshire":"Valley",
                                    "Foothill":"Valley",
                                    "Mission":"Valley",
                                    "North":"Valley",
                                    "Topanga":"Valley",
                                    "Van Nuys":"Valley",
                                    "West Valley":"Valley",
                                    "N Hollywood":"Valley",
}, inplace= True)



In [None]:
# First, filter your DataFrame to include only the data for the years 2010 to 2019.
pre_covid_data = Data_complete[(Data_complete['Crime Year'] >= 2010) & (Data_complete['Crime Year'] <= 2019)]


# Then, group the filtered data by 'Area Name' and calculate the mean of 'DR Number' for each area.
crime_mean_by_area = pre_covid_data.groupby('Area Name')['DR Number'].count()

# Display the result.
print((crime_mean_by_area)/10)

In [None]:
# First, filter your DataFrame to include only the data for the years 2020 and 2023.
Covid_data_2020_2023 = Data_complete[(Data_complete['Crime Year'] >= 2020) & (Data_complete['Crime Year'] <= 2022)]
Total_crime_2020_2022 = Covid_data_2020_2023.groupby('Area Name')['DR Number'].count()
Total_crime_2020_2022/4


In [None]:
plt.figure(figsize=(15, 8))
x_axis = ['Central', 'South', 'Valley', 'West']
y_axis_1 = [37285, 40188, 54326,38842]
y_axis_2 = [28695, 26556, 36296, 31088]




width = 0.2  # Adjust the bar width to prevent overlap
x_pos = np.arange(len(x_axis))


plt.bar(x_pos - width/2, y_axis_1, width=width, alpha=0.8, color="r", label="Crime Count 2010-2019")
plt.bar(x_pos + width/2, y_axis_2, width=width, alpha=0.8, color="b", label="Crime Count 2020-2023")


plt.title("Crime per Area")
plt.ylabel("Crime Count")
plt.xlabel("Area Name")
tick_locations = [0, 1, 2, 3]
plt.xticks(tick_locations, ["Central", "South", "Valley", "West"], rotation= 45)


#plt.xticks(tick_locations,rotation=45)
plt.legend()  # Add a legend to differentiate the two years
plt.show()

In [None]:
crime_2020 = Data_complete[(Data_complete['Crime Year'] == 2020) ]


In [None]:
base_url = "https://api.geoapify.com/v2/places"
base_url

In [None]:


 # Configure the map
Crime_map_2010_2019 = pre_covid_data .hvplot.points(
    "Longitude",
    "Latitude",
    geo = True,
    tiles = "OSM",
    #alpha = 0.5,
    frame_width = 800,
    frame_height = 600,
    size = "Total",
    scale = 2.0,
    color = "Area Name",
    title = "Crimes Per Area in Pre Covid Years"
)
# Display the map plot
Crime_map_2010_2019


In [None]:
 # Configure the map
Crime_map_covid_era = Covid_data_2020_2023 .hvplot.points(
    "Longitude",
    "Latitude",
    geo = True,
    tiles = "OSM",
    #alpha = 0.5,
    frame_width = 800,
    frame_height = 600,
    size = "Total",
    scale = 2.0,
    color = "Area Name",
    title = "Crimes Per Area During Covid Years"
)
# Display the map plot
Crime_map_covid_era
