### **Data Cleaning**


In [None]:
import pandas as pd

# Load the Chicago Crime Dataset (replace with your local path)
crime_data = pd.read_csv('/content/Crimes_-_2001_to_Present.csv')

# Display the first few rows
print(crime_data.head())

# Check for missing values
print(crime_data.isnull().sum())

# Drop rows with missing values in key columns based on your dataset
crime_data.dropna(subset=['Date', 'Primary Type', 'Location Description', 'Latitude', 'Longitude'], inplace=True)

# Convert the 'Date' column to datetime format
crime_data['Date'] = pd.to_datetime(crime_data['Date'])

# Now the data is clean and ready for further analysis or plotting



In [None]:
print(crime_data.columns)


### **Top 10 Crimes in Chicago**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group by 'Primary Type' to get the number of occurrences of each crime type
top_10_crimes = crime_data['Primary Type'].value_counts().nlargest(10)

# Convert the result into a DataFrame for easier plotting
top_10_crimes_df = top_10_crimes.reset_index()
top_10_crimes_df.columns = ['Crime Type', 'Count']

# Plot the top 10 crimes using Seaborn
plt.figure(figsize=(10, 6))
sns.barplot(data=top_10_crimes_df, x='Count', y='Crime Type', palette='coolwarm')

# Add plot titles and labels
plt.title('Top 10 Crimes in Chicago', fontsize=16)
plt.xlabel('Number of Incidents', fontsize=12)
plt.ylabel('Crime Type', fontsize=12)

# Display the plot
plt.tight_layout()
plt.show()


### **Year wise Crime Analysis**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group the data by 'Year' and count the number of crimes per year
yearly_crimes = crime_data['Year'].value_counts().sort_index()

# Convert to a DataFrame for easier plotting
yearly_crimes_df = yearly_crimes.reset_index()
yearly_crimes_df.columns = ['Year', 'Crime Count']

# Plot the year-wise crimes
plt.figure(figsize=(10, 6))
sns.lineplot(data=yearly_crimes_df, x='Year', y='Crime Count', marker='o', color='b')

# Add titles and labels
plt.title('Year-wise Total Crimes in Chicago', fontsize=16)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Crimes', fontsize=12)

# Improve plot aesthetics
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Group by 'Year' and 'Primary Type', then count the number of crimes
yearly_crime_types = crime_data.groupby(['Year', 'Primary Type']).size().reset_index(name='Crime Count')

# For each year, find the crime type with the highest count
top_crimes_per_year = yearly_crime_types.loc[yearly_crime_types.groupby('Year')['Crime Count'].idxmax()]

# Sort by year for plotting
top_crimes_per_year = top_crimes_per_year.sort_values(by='Year')

# Plot the year-wise top crimes
plt.figure(figsize=(12, 6))
sns.barplot(data=top_crimes_per_year, x='Year', y='Crime Count', hue='Primary Type', dodge=False, palette='viridis')

# Add titles and labels
plt.title('Year-wise Top Crimes in Chicago', fontsize=16)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Crimes', fontsize=12)

# Improve plot aesthetics
plt.xticks(rotation=45)
plt.legend(title='Top Crime Type', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()

# Show the plot
plt.show()


### **Top 10 Crimes by Region**

In [None]:
import folium

# Create a base map centered around Chicago
chicago_map = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

# Add crime points to the map
for _, row in crime_data.iterrows():
    folium.CircleMarker(
        location=(row['Latitude'], row['Longitude']),
        radius=3,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        popup=row['Primary Type']
    ).add_to(chicago_map)

# Save the map as an HTML file
chicago_map.save('chicago_crime_map.html')

# To display in Jupyter/Colab, use the following:
chicago_map


In [None]:
import folium
from folium.plugins import MarkerCluster

# Step 1: Filter data for the top 10 crimes over all years
top_10_crimes = crime_data['Primary Type'].value_counts().nlargest(10).index
top_crimes_data = crime_data[crime_data['Primary Type'].isin(top_10_crimes)]

# Step 2: Create a folium map centered around Chicago
chicago_map = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

# Step 3: Add MarkerCluster to group markers for easier visualization
marker_cluster = MarkerCluster().add_to(chicago_map)

# Step 4: Add markers to the map for each crime
for idx, row in top_crimes_data.iterrows():
    crime_type = row['Primary Type']
    year = row['Year']
    location = (row['Latitude'], row['Longitude'])

    # Add each crime as a marker with a popup
    folium.Marker(
        location=location,
        popup=f"Crime: {crime_type}<br>Year: {year}<br>Location: {row['Location Description']}",
        icon=folium.Icon(color='red' if crime_type == 'THEFT' else 'blue', icon='info-sign')
    ).add_to(marker_cluster)

# Step 5: Save the map to an HTML file
chicago_map.save('top_10_crimes_chicago_map.html')

# Display a message that the map has been saved
print("Map has been created and saved as 'top_10_crimes_chicago_map.html'.")




In [None]:
from IPython.display import IFrame

# Display the saved map in Colab
IFrame('top_10_crimes_chicago_map.html', width=800, height=600)


In [None]:
marker_cluster
from google.colab import files

# Download the HTML map file
#files.download('top_10_crimes_chicago_map.html')

chicago_map


### **Total number of Arrest**


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group by the 'Arrest' column and count the number of occurrences
arrest_data = crime_data['Arrest'].value_counts().reset_index()
arrest_data.columns = ['Arrest Status', 'Count']

# Plotting the data
plt.figure(figsize=(8, 6))
sns.barplot(data=arrest_data, x='Arrest Status', y='Count', palette='coolwarm')
plt.title('Number of Arrests vs Non-Arrests')
plt.xlabel('Arrest Status')
plt.ylabel('Number of Cases')
plt.xticks([0, 1], ['Not Arrested', 'Arrested'])  # Customize labels
plt.tight_layout()
plt.show()


### **Year that has Highest Rate of** **Crime**


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group by 'Year' and count the number of crimes
crime_per_year = crime_data['Year'].value_counts().reset_index()
crime_per_year.columns = ['Year', 'Total Crimes']

# Sort the data by Year for better visualization
crime_per_year.sort_values(by='Year', inplace=True)

# Plotting the data using a line plot
plt.figure(figsize=(14, 8))
sns.set_theme(style="whitegrid")  # Set theme for better aesthetics
line_plot = sns.lineplot(data=crime_per_year, x='Year', y='Total Crimes', marker='o', color='blue')

# Title and labels
plt.title('Total Crimes in Chicago (Yearly Overview)', fontsize=16, fontweight='bold')
plt.xlabel('Year', fontsize=14)
plt.ylabel('Total Number of Crimes', fontsize=14)

# Add data labels on the points
for i in range(len(crime_per_year)):
    line_plot.text(crime_per_year['Year'].iloc[i], crime_per_year['Total Crimes'].iloc[i],
                   f"{crime_per_year['Total Crimes'].iloc[i]}",
                   ha='center', va='bottom', fontsize=10)

# Highlight the year with the maximum crimes
max_crime_year = crime_per_year.loc[crime_per_year['Total Crimes'].idxmax()]
plt.axvline(x=max_crime_year['Year'], color='red', linestyle='--', label='Highest Crime Year')
plt.text(max_crime_year['Year'], max_crime_year['Total Crimes'],
         f"Max: {max_crime_year['Total Crimes']}",
         color='red', fontsize=12, ha='left')

# Add legend
plt.legend()

# Show grid
plt.grid(visible=True)

# Show the plot
plt.tight_layout()
plt.show()

# Print the year with the highest number of crimes
print(f'The year with the highest number of crimes is {max_crime_year["Year"]} with {max_crime_year["Total Crimes"]} crimes.')


### **Summary of the Analysis**


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load your crime data
crime_data = pd.read_csv('/content/Crimes_-_2001_to_Present.csv')

# Group the data by Year to summarize total crimes and arrests
crime_summary = crime_data.groupby('Year').agg(
    Total_Crimes=('Primary Type', 'count'),
    Total_Arrests=('Arrest', 'sum')
).reset_index()

# Check data types and convert if necessary
crime_summary['Total_Crimes'] = pd.to_numeric(crime_summary['Total_Crimes'], errors='coerce')
crime_summary['Total_Arrests'] = pd.to_numeric(crime_summary['Total_Arrests'], errors='coerce')

# Calculate the total arrests and crimes
total_arrests = crime_summary['Total_Arrests'].sum()
total_crimes = crime_summary['Total_Crimes'].sum()

# Define safety criteria based on arrests
if total_crimes > 0:
    if total_arrests / total_crimes > 0.5:  # If arrests are more than 50% of total crimes
        safe_count = 1
        not_safe_count = 0
    else:
        safe_count = 0
        not_safe_count = 1
else:
    safe_count = 0
    not_safe_count = 1  # If no crimes, consider it "Not Safe"

# Data for pie chart
labels = ['Safe', 'Not Safe']
sizes = [safe_count, not_safe_count]
colors = ['lightgreen', 'lightcoral']

# Create the pie chart
plt.figure(figsize=(8, 6))
wedges, texts, autotexts = plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, shadow=True)

# Add a legend
plt.legend(wedges, labels, title="Safety Status", loc="upper right", bbox_to_anchor=(1.3, 1))

plt.title('Safety Status of Chicago Based on Crime Data', fontsize=14, fontweight='bold')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

# Show the plot
plt.show()
