In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing the dataset

In [None]:
suicide_data=pd.read_csv("/kaggle/input/suicides-in-india/Suicides in India 2001-2012.csv")

In [None]:
suicide_data

In [None]:
suicide_data["State"].unique()

# Checking missing values

In [None]:
import missingno as msno
msno.bar(suicide_data)

In [None]:
grp = suicide_data.groupby('State')['Total'].sum()
total_suicides = pd.DataFrame(grp).reset_index().sort_values('Total',ascending=False)
total_suicides = total_suicides[2:]

# The total number of suicides per state in descending order

In [None]:
total_suicides

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
fig , ax = plt.subplots(figsize=(18,6))
g=sns.barplot(x='State',y='Total',data=total_suicides)
g.set_xticklabels(g.get_xticklabels(),rotation=45)

In [None]:
counts = suicide_data['Gender'].value_counts().sort_index()
print(counts)
# Plot a pie chart
counts.plot(kind='pie', title='Gender Count',figsize=(10,8))

plt.legend()
plt.show()

In [None]:
suicide_data["Type_code"].unique()

In [None]:
cause = suicide_data[suicide_data['Type_code'] == 'Causes']
edu_status = suicide_data[suicide_data['Type_code'] == 'Education_Status']
means_adpt = suicide_data[suicide_data['Type_code'] == 'Means_adopted']
prof = suicide_data[suicide_data['Type_code'] == 'Professional_Profile']
soc_status = suicide_data[suicide_data['Type_code'] == 'Social_Status']

In [None]:
def plot_type(data, Title, X_lab):
    p_type = data.groupby('Type').sum()['Total']
    sort_df = p_type.sort_values(ascending = False)

    fig = sort_df.plot(kind='bar', figsize = (10,6), title = Title + '\n', width = 0.75)
    fig.set_xlabel('\n' + X_lab )
    fig.set_ylabel('Count\n')
    sns.set_style('whitegrid')
    sns.set_palette('Set2')  

In [None]:
plot_type(cause, 'Suicide by cause', 'Cause')

In [None]:
#plot by the educational causes
plot_type(edu_status, 'Suicide by Education Status', 'Education Status')

In [None]:
# plot by means adopted
plot_type(means_adpt, 'Suicide by Means Adopted', 'Means Adopted')

In [None]:
# suicide by professional profile
plot_type(prof, 'Suicide by Professional Profile', 'Professional Profile')

In [None]:
# suicide by social Status
plot_type(soc_status, 'Suicide by Social Status', 'Social Status')

In [None]:
#which age group commits more suicide
age_grp = suicide_data.groupby('Age_group')['Total'].sum()
age = pd.DataFrame(age_grp).reset_index()
age = age[1:]
age


In [None]:
plt.subplots(figsize=(5,5))
g = sns.barplot(x='Age_group',y='Total',data=age)

In [None]:
x = suicide_data.groupby(['State','Year'])['Total'].sum()
y = pd.DataFrame(x).reset_index()
y = y.pivot(index='State',columns='Year')
y['sum'] = y.sum(axis=1)
yearly_total = y.sum(axis=0)
y = y.sort_values('sum',ascending=False)
y = y[2:14]
y = y/10
y = y.drop('sum',axis=1)
y

In [None]:
#plot of yearly suicide in india
yearly = pd.DataFrame(yearly_total).reset_index()[:-1].drop('level_0',axis=1)
yearly.columns = ['Year','No of suicides']
plt.figure(figsize=(10,5))
sns.lineplot(x='Year',y='No of suicides',data=yearly)

In [None]:
#Splitting the data as per the State
State1 = suicide_data[suicide_data['State']=='Maharashtra']
State2 = suicide_data[suicide_data['State']=='West Bengal']
State3 = suicide_data[suicide_data['State']=='Tamil Nadu']

In [None]:
State1

In [None]:
State1["Age_group"].value_counts().index

In [None]:
def plot_for_State_by_age(data):
    plt.figure(figsize=(12,6))
    data = suicide_data[['Age_group','Gender','Total']]
    edSort = data.groupby(['Age_group','Gender'],as_index=False).sum().sort_values('Total',ascending=False)
    sns.barplot(x='Age_group',y='Total',hue='Gender',data=edSort,palette='RdBu')

# **No of suicides as per age group in males and females in Maharashtra**

In [None]:
#plotting as per the age_group
plot_for_State_by_age(State1)

# **No of suicides as per age group in males and females in West Bengal**

In [None]:
#plotting as per the age_group
plot_for_State_by_age(State2)

# **No of suicides as per age group in males and females in Tamil Nadu**

In [None]:
#plotting as per the age_group
plot_for_State_by_age(State3)

# **What is the Social Issue Causing Suicides?**

In [None]:
filter_social_status = pd.DataFrame(suicide_data[suicide_data["Type_code"]=="Social_Status"].groupby(["Type","Gender"])["Total"].sum()).reset_index()
sns.catplot(x="Type", y="Total",hue="Gender", kind="bar", data=filter_social_status,height=8.27, aspect=11.7/8.27);

# **What was the Education Status of people commiting suicides?**

In [None]:
filter_social_status = pd.DataFrame(suicide_data[suicide_data["Type_code"]=="Education_Status"].groupby(["Type","Gender"])["Total"].sum()).reset_index()
g = sns.catplot(x="Type", y="Total",hue="Gender", kind="bar", data=filter_social_status,height=8.27, aspect=11.7/8.27);
g.set_xticklabels(rotation=90)

# **What was the profession of the people who committed suicides?**

In [None]:
filter_social_status = pd.DataFrame(suicide_data[suicide_data["Type_code"]=="Professional_Profile"].groupby(["Type","Gender"])["Total"].sum()).reset_index()
g = sns.catplot(x="Type", y="Total",hue="Gender", kind="bar", data=filter_social_status,height=8.27, aspect=11.7/8.27);
g.set_xticklabels(rotation=90)

# Conclusion

* Males tend to commit more suicides compared to Females in India
* Highest no. of suicide cases occur in Maharashtra, West Bengal, and Tamil Nadu.
* Male might commit more suicide compared to females in the future if this trend continues.
* People who commit suicide are mostly-
* Married
* Farmers and housewives
* Youngsters (15-29 age) and middle age (30-44)