In [1]:
# Imports Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load dataset
df = pd.read_csv("Nagpur_Crimes.csv")


In [None]:
# Overview
print("Shape:", df.shape)
df.head()

In [None]:
#Structure of Dataset
df.info()

In [None]:
# Missing values
print("\nMissing values:\n", df.isnull().sum())

In [None]:
# Handling Missing Crime_Type (603 missing)
print("Crime_Type missing percentage:", (df['Crime_Type'].isnull().sum() / len(df)) * 100)

In [None]:
#dropping the rows where crime_type is missing
df = df.dropna(subset=['Crime_Type'])
print("\nMissing values:\n", df.isnull().sum())

In [None]:
#Handling Missing values for Time and Date
print("Time missing %:", (df['Time'].isnull().sum() / len(df)) * 100)
print("Date missing %:", (df['Date'].isnull().sum() / len(df)) * 100)

In [None]:
# Drop rows where either Time or Date is missing
df = df.dropna(subset=['Time', 'Date'])
print("\nMissing values:\n", df.isnull().sum())

In [None]:
#Convert dates to proper datetime format
df['Time'] = pd.to_datetime(df['Time'], format='%H:%M', errors='coerce')

In [None]:
#Convert to datetime formats again (cleanly)
df['Time'] = pd.to_datetime(df['Time'], errors='coerce')
df['Hour'] = df['Time'].dt.hour

df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Month'] = df['Date'].dt.to_period('M')

In [None]:
df

In [None]:
#Remove Duplicates
print("Shape before removing duplicates:", df.shape)
df = df.drop_duplicates()
print("Shape after removing duplicates:", df.shape)
print("\nColumn data types:\n", df.dtypes)

In [None]:
#check if there are still missing values present or not
df.isnull().sum()

In [None]:
#Downloading the cleaned dataset
df.to_csv('cleaned_nagpur_crime_data.csv', index=False)

In [None]:
#checking how many rows left after cleaning
df

DATA ANALYSIS

In [None]:
#Crime Count by Hour of the Day
df['Hour'].value_counts().sort_index()

In [None]:
#Top 3 Most Common Crime Types
df['Crime_Type'].value_counts().head(3)

In [None]:
# Areas with Highest Crime Reports
df['Area'].value_counts().head(10)

In [None]:
#Most Common Crime Types in Each Zone
df.groupby('Zone')['Crime_Type'].value_counts().groupby(level=0).head(1)

In [None]:
#Count of Each Case Status (Solved/Pending)
df['Status'].value_counts()

In [None]:
#Monthly Crime Trends
df.groupby('Month').size().sort_index()

DATA VISUALISATION

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Top 10 crime-prone areas
top_areas = df['Area'].value_counts().head(10)

plt.figure(figsize=(10,6))
sns.barplot(x=top_areas.values, y=top_areas.index, palette='Reds_r')
plt.title("Top 10 Crime-Prone Areas in Nagpur")
plt.xlabel("Number of Crimes")
plt.ylabel("Area")
plt.tight_layout()
plt.show()

In [None]:
#Is crime increasing over time?
monthly_trend = df.groupby('Month').size()

plt.figure(figsize=(12,5))
monthly_trend.plot(marker='o', color='royalblue')
plt.title("Crime Trend Over Time (Monthly)")
plt.xlabel("Month")
plt.ylabel("Number of Crimes")
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()