## Data Loading

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
df_crime=pd.read_csv(r"Crimes_-_2001_to_Present.csv")

In [None]:
df_crime.head(5)

In [None]:
df_crime.info()

In [None]:
df_crime.isna().sum()

In [None]:
df_crime.shape

## Data cleaning

In [None]:
df_crime.drop_duplicates

In [None]:
# Removing rows with null values in the specified columns
df_crime_final = df_crime.dropna(subset=['Latitude', 'Longitude', 'Location','X Coordinate','Y Coordinate','Location Description','District'])


In [None]:
df_crime_final.isna().sum()

In [None]:
df_crime_final['Community Area'].fillna(method='ffill',inplace=True)
df_crime_final['Community Area'].fillna(method='bfill',inplace=True)

df_crime_final['Ward'].fillna(method='ffill',inplace=True)
df_crime_final['Ward'].fillna(method='bfill',inplace=True)

In [None]:
df_crime_final.isna().sum()

In [None]:
df_crime_final.shape

In [None]:
8077151-7979798

In [None]:
# Convert 'Date' to datetime with the correct format
df_crime_final['Date'] = pd.to_datetime(df_crime_final['Date'], format='%m/%d/%Y %I:%M:%S %p')

# Create 'Year' column
df_crime_final['Year'] = df_crime_final['Date'].dt.year

# Create 'Hour' column
df_crime_final['Hour'] = df_crime_final['Date'].dt.hour

# Create 'Day of Week' column
df_crime_final['Day of Week'] = df_crime_final['Date'].dt.dayofweek  # Monday=0, Sunday=6

# Create 'Month' column
df_crime_final['Month'] = df_crime_final['Date'].dt.month

# Create 'Day' column
df_crime_final['Day'] = df_crime_final['Date'].dt.day



In [None]:
df_crime_final.to_csv("Chicago_Crimes.csv",index=False)

In [None]:
df_crime_final.columns

In [None]:
df_crime_final.shape

## Exploratory Data Analysis (EDA)

In [None]:
import matplotlib as plt
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns

In [None]:
from datetime import datetime

### 1. Temporal Analysis

#### Crime Trends Over Time: Examining how the number of crimes has changed over the years.

In [None]:
import plotly.graph_objs as go

# Grouping data by year and counting the number of crimes
crimes_per_year = df_crime_final['Year'].value_counts().sort_index()

# Creating a Plotly line plot for crimes per year
fig = go.Figure()
fig.add_trace(go.Scatter(x=crimes_per_year.index, y=crimes_per_year.values,
                         mode='lines+markers', marker=dict(color='blue'),
                         hovertemplate='<b>Year:</b> %{x}<br><b>Number of Crimes:</b> %{y}<extra></extra>'))
fig.update_layout(title='Number of Crimes Per Year',
                  xaxis_title='Year',
                  yaxis_title='Number of Crimes')
fig.show()


### 2. Crime Trends Over Time (Number of Crimes Per Month)

In [None]:
import plotly.graph_objs as go

# Grouping data by month and year, and counting the number of crimes
crimes_per_month = df_crime_final.groupby(['Year', 'Month']).size().reset_index(name='Count')

# Creating a Plotly line plot for crimes per month
fig = go.Figure()
for year in crimes_per_month['Year'].unique():
    data_year = crimes_per_month[crimes_per_month['Year'] == year]
    fig.add_trace(go.Scatter(x=data_year['Month'], y=data_year['Count'],
                             mode='lines+markers', name=str(year),
                             hovertemplate='<b>Month:</b> %{x}<br><b>Number of Crimes:</b> %{y}<extra></extra>'))
fig.update_layout(title='Number of Crimes Per Month',
                  xaxis_title='Month',
                  yaxis_title='Number of Crimes')
fig.show()


### 3. Peak Crime Hours (Number of Crimes Per Hour)

In [None]:
# Grouping data by hour of the day and counting the number of crimes
crimes_per_hour = df_crime_final['Hour'].value_counts().sort_index()

# Creating a Plotly bar plot for crimes per hour
fig = go.Figure()
fig.add_trace(go.Bar(x=crimes_per_hour.index, y=crimes_per_hour.values,
                     marker=dict(color='green')))
fig.update_layout(title='Number of Crimes Per Hour',
                  xaxis_title='Hour of the Day',
                  yaxis_title='Number of Crimes')
fig.show()


### 4. Crime Trends Over Time (Number of Crimes Per Day of the Week)

In [None]:
import plotly.graph_objects as go

# Group data by day of the week and count the number of crimes
crimes_per_day = df_crime_final['Day of Week'].value_counts().reset_index()
crimes_per_day.columns = ['Day of Week', 'Crime Count']

# Sort days of the week in chronological order
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
crimes_per_day = crimes_per_day.sort_values('Day of Week', key=lambda x: pd.Categorical(x, categories=days_order, ordered=True))

# Create a line chart for crimes per day of the week
fig = go.Figure(data=[
    go.Scatter(x=crimes_per_day['Day of Week'], y=crimes_per_day['Crime Count'], mode='lines+markers')
])

fig.update_layout(
    title='Number of Crimes Per Day of the Week',
    xaxis_title='Day of the Week',
    yaxis_title='Number of Crimes'
)

fig.show()


### 5. Crime Trends Over Time (Number of Crimes Per Season)

In [None]:
# Group data by season and count the number of crimes
crimes_per_season = df_crime_final['Season'].value_counts().reset_index()
crimes_per_season.columns = ['Season', 'Crime Count']

# Create a bar chart for crimes per season
fig = go.Figure(data=[
    go.Bar(x=crimes_per_season['Season'], y=crimes_per_season['Crime Count'])
])

fig.update_layout(
    title='Number of Crimes Per Season',
    xaxis_title='Season',
    yaxis_title='Number of Crimes'
)

fig.show()


### 2. Geospatial Analysis

#### Crime Hotspots Map

In [None]:
import folium
from folium.plugins import HeatMap

# Create a base map centered around Chicago
chicago_map = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

# Extract latitude and longitude coordinates
crime_locations = df_crime_final[['Latitude', 'Longitude']].values.tolist()

# Add heatmap layer to the map
HeatMap(crime_locations, radius=10).add_to(chicago_map)

# Display the map
chicago_map


### District/Ward Analysis

In [None]:
import plotly.graph_objects as go

# Group data by ward and count the number of crimes
ward_crime_counts = df_crime_final['Ward'].value_counts().reset_index()
ward_crime_counts.columns = ['Ward', 'Crime Count']

# Create a bar chart for crimes per ward
fig = go.Figure(go.Bar(
    x=ward_crime_counts['Ward'],
    y=ward_crime_counts['Crime Count'],
    marker=dict(color='red'),  # Adjust color if needed
))

fig.update_layout(
    title='Crimes Per Ward',
    xaxis_title='Ward',
    yaxis_title='Number of Crimes',
)

fig.show()


## 3. Crime Type Analysis


### Distribution of Crime Types

In [None]:
import plotly.graph_objects as go

# Assuming you have a DataFrame 'df_crime_final' with a column 'Primary Type' representing the primary type of each crime

# Count the frequency of each crime type
crime_type_counts = df_crime_final['Primary Type'].value_counts().reset_index()
crime_type_counts.columns = ['Crime Type', 'Crime Count']

# Sort the DataFrame by crime count in descending order
crime_type_counts = crime_type_counts.sort_values(by='Crime Count', ascending=False)

# Create a line graph for the distribution of crime types
fig = go.Figure()
fig.add_trace(go.Scatter(x=crime_type_counts['Crime Type'], y=crime_type_counts['Crime Count'], mode='lines+markers'))

fig.update_layout(
    title='Distribution of Crime Types',
    xaxis_title='Crime Type',
    yaxis_title='Crime Count',
    height=800 
)

fig.show()


### Severity Analysis

In [None]:
import plotly.graph_objects as go

# List of severe crimes
severe_crimes = ['HOMICIDE', 'CRIMINAL SEXUAL ASSAULT', 'OFFENSE INVOLVING CHILDREN', 
                 'ROBBERY', 'ASSAULT', 'SEX OFFENSE', 'WEAPONS VIOLATION', 
                 'KIDNAPPING', 'ARSON', 'HUMAN TRAFFICKING', 'CRIM SEXUAL ASSAULT', 
                 'DOMESTIC VIOLENCE']

# List of less severe crimes
less_severe_crimes = ['BURGLARY', 'BATTERY', 'CRIMINAL DAMAGE', 'DECEPTIVE PRACTICE', 
                      'THEFT', 'MOTOR VEHICLE THEFT', 'OTHER OFFENSE', 'STALKING', 
                      'CRIMINAL TRESPASS', 'PROSTITUTION', 'NARCOTICS', 
                      'CONCEALED CARRY LICENSE VIOLATION', 'INTERFERENCE WITH PUBLIC OFFICER', 
                      'PUBLIC PEACE VIOLATION', 'OBSCENITY', 'LIQUOR LAW VIOLATION', 
                      'INTIMIDATION', 'GAMBLING', 'OTHER NARCOTIC VIOLATION', 'NON-CRIMINAL', 
                      'PUBLIC INDECENCY', 'RITUALISM', 'NON-CRIMINAL (SUBJECT SPECIFIED)', 
                      'NON - CRIMINAL']

# Count the frequency of severe and less severe crimes
severe_count = sum(df_crime_final['Primary Type'].isin(severe_crimes))
less_severe_count = sum(df_crime_final['Primary Type'].isin(less_severe_crimes))

# Create hover text for each category
hover_text_severe = f'Severe Crimes:<br>{"<br>".join(severe_crimes)}'
hover_text_less_severe = f'Less Severe Crimes:<br>{"<br>".join(less_severe_crimes)}'

# Create a donut chart for the distribution of crimes by severity
labels = ['Severe', 'Less Severe']
values = [severe_count, less_severe_count]
hover_text = [hover_text_severe, hover_text_less_severe]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4, hovertext=hover_text, hoverinfo='text')])
fig.update_traces(textinfo='label+percent', insidetextorientation='radial')

fig.update_layout(title='Distribution of Crimes by Severity',height=500)
fig.update_layout(hoverlabel=dict(font=dict(size=8)))
fig.show()


### 4. Arrest and Domestic Incident Analysis


#### Arrest Rates Analysis

In [None]:
import plotly.graph_objects as go

# Calculate arrest rates by crime type
arrest_rates = df_crime_final.groupby('Primary Type')['Arrest'].mean().sort_values(ascending=False).reset_index()

# Create a bar chart for arrest rates
fig_arrest_rates = go.Figure(data=[
    go.Bar(
        x=arrest_rates['Primary Type'],
        y=arrest_rates['Arrest'] * 100,  # Convert to percentage
        text=arrest_rates['Arrest'] * 100,
        textposition='auto'
    )
])

fig_arrest_rates.update_layout(
    title='Arrest Rates by Crime Type',
    xaxis_title='Crime Type',
    yaxis_title='Arrest Rate (%)',
    xaxis_tickangle=-45,
    height=600
)

fig_arrest_rates.show()


#### Domestic vs. Non-Domestic Crimes Analysis

In [None]:
# Calculate the number of domestic vs. non-domestic crimes
domestic_counts = df_crime_final['Domestic'].value_counts()

# Create a pie chart for domestic vs. non-domestic crimes
fig_domestic = go.Figure(data=[
    go.Pie(
        labels=['Non-Domestic', 'Domestic'],
        values=[domestic_counts[False], domestic_counts[True]],
        hole=.3,
        hoverinfo='label+percent+value'
    )
])

fig_domestic.update_layout(
    title='Domestic vs. Non-Domestic Crimes'
)

fig_domestic.show()


#### Detailed Analysis of Domestic vs. Non-Domestic Crimes

In [None]:
import plotly.graph_objects as go

# Calculate the distribution of crime types for domestic vs. non-domestic incidents
domestic_crime_types = df_crime_final[df_crime_final['Domestic'] == True]['Primary Type'].value_counts().reset_index()
domestic_crime_types.columns = ['Primary Type', 'Domestic Count']

non_domestic_crime_types = df_crime_final[df_crime_final['Domestic'] == False]['Primary Type'].value_counts().reset_index()
non_domestic_crime_types.columns = ['Primary Type', 'Non-Domestic Count']

# Merge the dataframes for comparison
domestic_comparison = domestic_crime_types.merge(non_domestic_crime_types, on='Primary Type', how='outer').fillna(0)

# Create a bar chart for domestic vs. non-domestic crime types
fig_domestic_comparison = go.Figure(data=[
    go.Bar(
        x=domestic_comparison['Primary Type'],
        y=domestic_comparison['Domestic Count'],
        name='Domestic'
    ),
    go.Bar(
        x=domestic_comparison['Primary Type'],
        y=domestic_comparison['Non-Domestic Count'],
        name='Non-Domestic'
    )
])

fig_domestic_comparison.update_layout(
    title='Crime Types: Domestic vs. Non-Domestic',
    xaxis_title='Crime Type',
    yaxis_title='Number of Crimes',
    barmode='group',
    xaxis_tickangle=-45,
    height=800
)

fig_domestic_comparison.show()


### Location-Specific Analysis
#### Location Description Analysis


In [None]:
import plotly.graph_objects as go

# Calculate the most common locations for crimes
location_counts = df_crime_final['Location Description'].value_counts().head(30).reset_index()
location_counts.columns = ['Location Description', 'Crime Count']

# Create a horizontal bar chart for the most common crime locations
fig_location_counts = go.Figure(data=[
    go.Bar(
        x=location_counts['Crime Count'],
        y=location_counts['Location Description'],
        orientation='h',
        text=location_counts['Crime Count'],
        textposition='auto'
    )
])

fig_location_counts.update_layout(
    title='Top 30 Most Common Locations for Crimes',
    xaxis_title='Number of Crimes',
    yaxis_title='Location Description',
    height=600
)

fig_location_counts.show()


#### Beat and Community Area Analysis

In [None]:
import plotly.express as px

# Limit the number of crime types and locations for readability
top_locations = df_crime_final['Location Description'].value_counts().head(30).index
top_crime_types = df_crime_final['Primary Type'].value_counts().head(10).index

# Filter the data to include only the top locations and crime types
filtered_data = df_crime_final[df_crime_final['Location Description'].isin(top_locations) & df_crime_final['Primary Type'].isin(top_crime_types)]

# Calculate the number of different crime types by location
location_crime_types = filtered_data.groupby(['Location Description', 'Primary Type']).size().reset_index(name='Crime Count')

# Create a heatmap for crime types by location
fig_location_crime_types = px.density_heatmap(
    location_crime_types,
    x='Location Description',
    y='Primary Type',
    z='Crime Count',
    title='Top 30 Crime Types by Top 30 Locations',
    color_continuous_scale='Viridis'
)

fig_location_crime_types.update_layout(
    xaxis_tickangle=-45,
    height=600
)

fig_location_crime_types.show()


#### Beat and Community Area Analysis

In [None]:
# Calculate the number of crimes by community area
community_area_counts = df_crime_final['Community Area'].value_counts().reset_index()
community_area_counts.columns = ['Community Area', 'Crime Count']

# Create a bar chart for crime counts by community area
fig_community_area_counts = go.Figure(data=[
    go.Bar(
        x=community_area_counts['Community Area'],
        y=community_area_counts['Crime Count'],
        text=community_area_counts['Crime Count'],
        textposition='auto'
    )
])

fig_community_area_counts.update_layout(
    title='Crime Counts by Community Area',
    xaxis_title='Community Area',
    yaxis_title='Number of Crimes',
    xaxis_tickangle=-45,
    height=600
)

fig_community_area_counts.show()


In [None]:
import plotly.graph_objects as go

# Group the data by beat and community area and count the number of crimes
crime_heatmap_data = df_crime_final.groupby(['Beat', 'Community Area']).size().reset_index(name='Crime Count')

# Create the heatmap
fig_heatmap = go.Figure(data=go.Heatmap(
                   z=crime_heatmap_data['Crime Count'],
                   x=crime_heatmap_data['Beat'],
                   y=crime_heatmap_data['Community Area'],
                   hovertext=['Beat: {}<br>Community Area: {}<br>Crime Count: {}'.format(beat, area, count) 
                              for beat, area, count in zip(crime_heatmap_data['Beat'], 
                                                           crime_heatmap_data['Community Area'], 
                                                           crime_heatmap_data['Crime Count'])],
                   colorscale='Reds',  # Choose the colorscale
                   colorbar=dict(title='Crime Count')  # Add a colorbar
))

# Update layout
fig_heatmap.update_layout(
    title='Crime Heatmap by Beat and Community Area',
    xaxis_title='Police Beat',
    yaxis_title='Community Area',
    height=800,
    plot_bgcolor='white',  # White background
    font=dict(color='black')  # Font color
)

fig_heatmap.show()


### Repeat Offenders and Recidivism Analysis
