In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing Data 

In [None]:
data = pd.read_csv("../input/district-wise-crimes-in-india/01_District_wise_crimes_committed_IPC_2001_2012.csv")

# Knowing about the data 

In [None]:
data.head(30)

* Note that in the 28th row, district name Total is there with sum of all the number of cases reported that year,
* Like the same there are Total elements for every state for every year 

In [None]:
data.describe()

In [None]:
data.info()

## For doing visualizations we are creating a dataframe with State name and total number of cases on that state.

In [None]:
total = data[(data["DISTRICT"] == "TOTAL")]
total.head()

#  Creating a dataframe with Crime data of Tamilnadu.

In [None]:
tamilnadu = total[(total["STATE/UT"] == "TAMIL NADU")]

In [None]:
tamilnadu.head()

## Importing required modules for visualizations

In [None]:
%matplotlib inline 
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
fig, (ax_0, ax_1) = plt.subplots(figsize=(15,10), nrows=2)
sns.pointplot(data=tamilnadu, x="YEAR", y="MURDER", ax=ax_0, color="blue");
sns.pointplot(data=tamilnadu, x="YEAR", y="KIDNAPPING & ABDUCTION", ax=ax_1, color="red");
ax_0.set_title("Murder and Kidnapping cases in Tamil Nadu", size=20);
sns.set_theme(style="dark")

In [None]:
fig = px.bar(tamilnadu, x="YEAR", y="RAPE", color_discrete_sequence=["blue"], title="Rape cases in Tamil nadu")
fig.update_layout(xaxis_title = "Year",
                  yaxis_title = "Cases Reported")
fig.layout.template ="plotly_dark"
fig.show()

* 2000 was recorded with the minimum number cases.
* 2012 was recorded with maximum number of cases in the previous decade.

# Analysing total number of Rape cases reported all over India. 

In [None]:
r = pd.DataFrame(total.groupby(["STATE/UT"])["RAPE"].sum().reset_index())
r.columns = ["STATE/UT", "CASES REPORTED"]
r.head()

## Using Geopandas reading the shape of Indian map

In [None]:
f = ("../input/indian-states-geo-pandas-outline/Igismap/Indian_States.shp")
map_df = gpd.read_file(f)
map_df.head()

* Replacing the names of the states in our dataframe to match the state names in map_df

In [None]:
r.replace(to_replace='A & N ISLANDS',value='Andaman & Nicobar Island',inplace=True)
r.replace(to_replace='ANDHRA PRADESH',value='Andhra Pradesh',inplace=True)
r.replace(to_replace='ARUNACHAL PRADESH',value='Arunanchal Pradesh',inplace=True)
r.replace(to_replace='ASSAM',value='Assam',inplace=True)
r.replace(to_replace='BIHAR',value='Bihar',inplace=True)
r.replace(to_replace='CHANDIGARH',value='Chandigarh',inplace=True)
r.replace(to_replace='CHHATTISGARH',value='Chhattisgarh',inplace=True)
r.replace(to_replace='D & N HAVELI',value='Dadara & Nagar Havelli',inplace=True)
r.replace(to_replace='Daman & Diu',value='DAMAN & DIU',inplace=True)
r.replace(to_replace='GOA',value='Goa',inplace=True)
r.replace(to_replace='GUJARAT',value='Gujarat',inplace=True)
r.replace(to_replace='HARYANA',value='Haryana',inplace=True)
r.replace(to_replace='HIMACHAL PRADESH',value='Himachal Pradesh',inplace=True)
r.replace(to_replace='JAMMU & KASHMIR',value='Jammu & Kashmir',inplace=True)
r.replace(to_replace='JHARKHAND',value='Jharkhand',inplace=True)
r.replace(to_replace='KARNATAKA',value='Karnataka',inplace=True)
r.replace(to_replace='KERALA',value='Kerala',inplace=True)
r.replace(to_replace='LAKSHADWEEP',value='Lakshadweep',inplace=True)
r.replace(to_replace='MADHYA PRADESH',value='Madhya Pradesh',inplace=True)
r.replace(to_replace='MAHARASHTRA',value='Maharashtra',inplace=True)
r.replace(to_replace='MANIPUR',value='Manipur',inplace=True)
r.replace(to_replace='MEGHALAYA',value='Meghalaya',inplace=True)
r.replace(to_replace='MIZORAM',value='Mizoram',inplace=True)
r.replace(to_replace='NAGALAND',value='Nagaland',inplace=True)
r.replace(to_replace='ODISHA',value='Odisha',inplace=True)
r.replace(to_replace='PUDUCHERRY',value='Puducherry',inplace=True)
r.replace(to_replace='PUNJAB',value='Punjab',inplace=True)
r.replace(to_replace='RAJASTHAN',value='Rajasthan',inplace=True)
r.replace(to_replace='SIKKIM',value='Sikkim',inplace=True)
r.replace(to_replace='TAMIL NADU',value='Tamil Nadu',inplace=True)
r.replace(to_replace='TRIPURA',value='Tripura',inplace=True)
r.replace(to_replace='UTTAR PRADESH',value='Uttar Pradesh',inplace=True)
r.replace(to_replace='UTTARAKHAND',value='Uttarakhand',inplace=True)
r.replace(to_replace='WEST BENGAL',value='West Bengal',inplace=True)

* Since the data we have is old, Telengana is not a state by then,
* So creating a new row in the dataframe as telengana and adjusting values with the existing data 

In [None]:
r.loc[len(r.index)] = ["Telangana", 5500] #gave 5500 as a value
r.replace(to_replace= 13479, value=7979, inplace=True) #subtracted 5500 from Andrapradesh value

In [None]:
together = map_df.set_index('st_nm').join(r.set_index("STATE/UT"))


In [None]:
fig,ax=plt.subplots(1, figsize=(10, 10))
ax.axis("off")
ax.set_title("State wise Rape cases Reported (2000 - 2012)",
             fontdict={'fontsize': '15', 'fontweight': "3"})
fig = together.plot(column="CASES REPORTED", cmap="Oranges", linewidth=0.5, ax=ax, edgecolor='0.2', legend=True)
fig.set_title("Statewise Rape Cases Reported(2000 - 2012)", size=17);


* Top 3 states having highest number of cases are Madhya pradesh, West Bengal and Uttar Pradesh

In [None]:
total_cases = pd.DataFrame(total.groupby(["STATE/UT"])["TOTAL IPC CRIMES"].sum().reset_index())
total_cases_sorted = total_cases.sort_values("TOTAL IPC CRIMES", ascending=False)[:10]

In [None]:
fig = px.bar(data_frame=total_cases_sorted, x="TOTAL IPC CRIMES", y="STATE/UT", orientation='h', color_discrete_sequence=["red"]);
fig.update_layout(yaxis=dict(autorange="reversed"))
fig.update_layout(title="Top 10 States with highest number of IPC crimes over(2000-2012)")
fig.layout.template ="plotly_dark"
fig.show()

In [None]:
m = pd.DataFrame(total.groupby(["STATE/UT"])["MURDER"].sum().reset_index())
m.head()

In [None]:
m.replace(to_replace='A & N ISLANDS',value='Andaman & Nicobar Island',inplace=True)
m.replace(to_replace='ANDHRA PRADESH',value='Andhra Pradesh',inplace=True)
m.replace(to_replace='ARUNACHAL PRADESH',value='Arunanchal Pradesh',inplace=True)
m.replace(to_replace='ASSAM',value='Assam',inplace=True)
m.replace(to_replace='BIHAR',value='Bihar',inplace=True)
m.replace(to_replace='CHANDIGARH',value='Chandigarh',inplace=True)
m.replace(to_replace='CHHATTISGARH',value='Chhattisgarh',inplace=True)
m.replace(to_replace='D & N HAVELI',value='Dadara & Nagar Havelli',inplace=True)
m.replace(to_replace='Daman & Diu',value='DAMAN & DIU',inplace=True)
m.replace(to_replace='GOA',value='Goa',inplace=True)
m.replace(to_replace='GUJARAT',value='Gujarat',inplace=True)
m.replace(to_replace='HARYANA',value='Haryana',inplace=True)
m.replace(to_replace='HIMACHAL PRADESH',value='Himachal Pradesh',inplace=True)
m.replace(to_replace='JAMMU & KASHMIR',value='Jammu & Kashmir',inplace=True)
m.replace(to_replace='JHARKHAND',value='Jharkhand',inplace=True)
m.replace(to_replace='KARNATAKA',value='Karnataka',inplace=True)
m.replace(to_replace='KERALA',value='Kerala',inplace=True)
m.replace(to_replace='LAKSHADWEEP',value='Lakshadweep',inplace=True)
m.replace(to_replace='MADHYA PRADESH',value='Madhya Pradesh',inplace=True)
m.replace(to_replace='MAHARASHTRA',value='Maharashtra',inplace=True)
m.replace(to_replace='MANIPUR',value='Manipur',inplace=True)
m.replace(to_replace='MEGHALAYA',value='Meghalaya',inplace=True)
m.replace(to_replace='MIZORAM',value='Mizoram',inplace=True)
m.replace(to_replace='NAGALAND',value='Nagaland',inplace=True)
m.replace(to_replace='ODISHA',value='Odisha',inplace=True)
m.replace(to_replace='PUDUCHERRY',value='Puducherry',inplace=True)
m.replace(to_replace='PUNJAB',value='Punjab',inplace=True)
m.replace(to_replace='RAJASTHAN',value='Rajasthan',inplace=True)
m.replace(to_replace='SIKKIM',value='Sikkim',inplace=True)
m.replace(to_replace='TAMIL NADU',value='Tamil Nadu',inplace=True)
m.replace(to_replace='TRIPURA',value='Tripura',inplace=True)
m.replace(to_replace='UTTAR PRADESH',value='Uttar Pradesh',inplace=True)
m.replace(to_replace='UTTARAKHAND',value='Uttarakhand',inplace=True)
m.replace(to_replace='WEST BENGAL',value='West Bengal',inplace=True)

In [None]:
# making telengana a seperate element in dataframe for geopandas visualization
m.loc[len(r.index)] = ["Telangana", 12000]
m.replace(to_replace= 31756, value=19756, inplace=True)

In [None]:
merged = map_df.set_index('st_nm').join(m.set_index("STATE/UT"))

In [None]:
fig,ax = plt.subplots(1, figsize=(10,10))
ax.axis("off")
fig = merged.plot(column = "MURDER", cmap="RdPu", linewidth = 0.5, ax=ax, edgecolor="0.2", legend=True)
fig.set_title("Statewise Murder Cases Reported(2000 - 2012)", size=17);

* Top 3 states having high murder cases reported are Uttar Pradesh, Bihar and Maharashtra. 
* Top 3 states having low murder cases are Lakshadweep, Daman and diu and Dadara & Nagar Havelli.	

### Thanks for watching

# **Give an UPVOTE if you liked the Kernel**

## Will upload more visualizations in the upcoming versions.