# **Importing libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm 
import seaborn as sns


from plotly.offline import init_notebook_mode, plot
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
init_notebook_mode(connected=True)

import geopandas as gpd

# **Reading and storing data**

In [None]:
data = pd.read_csv('../input/suicides-in-india/Suicides in India 2001-2012.csv')
data.head()

# Check shape of the data

In [None]:
data.shape

# **Fetch all the unique values under column 'State'**

In [None]:
data['State'].unique()

# **Remove unnecessary rows from the data**

In [None]:
frame = data[data['State'] != 'Total (All India)']
frame = frame[frame['State'] != 'Total (States)']
frame = frame[frame['State'] != 'Total (Uts)']
frame.shape

In [None]:
frame['State'].unique()

# **Analysing only the causes of suicides**

In [None]:
final_frame = frame[frame['Type_code'] == 'Causes']
final_frame.shape

# **Removing uncertain causes for suicides for better EDA**

In [None]:
final_frame_new = final_frame[final_frame['Type'] != 'Other Causes (Please Specity)']
final_frame_new.shape

In [None]:
final_frame_new = final_frame_new[final_frame_new['Type'] != 'Causes Not known']
final_frame_new = final_frame_new[final_frame_new['Type'] != 'Other Prolonged Illness']
final_frame_new = final_frame_new[final_frame_new['Type'] != 'Not having Children(Barrenness/Impotency']
final_frame_new = final_frame_new[final_frame_new['Type'] != 'Not having Children (Barrenness/Impotency']
final_frame_new.shape
data_new = pd.DataFrame(final_frame_new)
data_new.shape

# **A completely new dataframe for EDA**

In [None]:
data_new

# **Year-wise suicide comparison using bar graph plot**

In [None]:
sns.barplot(x = data_new['Year'].unique(), y = data_new.groupby('Year').sum()['Total'], ci = None, palette = 'husl')
sns.set(rc={'figure.figsize':(30,15)})

# **Age-group wise suicide comparison using bar graph plot**

In [None]:
fig = px.bar(data_new,
    x = data_new['Age_group'].unique(),
    y = data_new.groupby('Age_group').sum()['Total'],
    color_discrete_sequence=["#516882"],
)

fig.update_layout(
    title="Age Group suicide",
    xaxis_title="Age group",
    yaxis_title="Suicide Count",
)

fig.show()

# **Gender-wise suicide comparison using grouped bar graph plot**

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Bar(
    x= data_new['Year'].unique(),
    y= data_new.groupby([data_new.Gender == 'Male' , 'Year']).sum()['Total'],
    name='Female',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x= data_new['Year'].unique(),
    y= data_new.groupby([data_new["Gender"] == 'Female' , 'Year']).sum()['Total'],
    name='Male',
    marker_color='lightsalmon'
))


fig.update_layout(barmode='group', xaxis_tickangle=-90)
fig.show()

# **Importing Indian states shp file and making the required changes for join**

In [None]:
india_map = gpd.read_file('../input/final-shp/Indian_States.shp')
data_new['State'].replace({'A & N Islands':'Andaman & Nicobar Island',
                        'Delhi (Ut)':'NCT of Delhi',
                        'D & N Haveli':'Dadra and Nagar Haveli',
                       }, inplace = True)

india_map['st_nm'].replace({'Telangana':'Andhra Pradesh',
                        'Dadara & Nagar Havelli': 'Dadra and Nagar Haveli',
                       }, inplace = True)

india_map.st_nm.unique()

In [None]:
data_new.State.unique()

# **Grouping the data for state-wise suicide count**

In [None]:
india_map.rename(columns = {'st_nm':'State'}, inplace = True)
suicide_data_states = data_new.groupby(['State']).agg({'Total':'sum'})

# **Geospatial analysis of state-wise suicides**

In [None]:
suicide_data_map = india_map.merge(suicide_data_states, left_on='State', right_on='State')

suicide_data_map['coords'] = suicide_data_map['geometry'].apply(lambda x: x.representative_point().coords[:])
suicide_data_map['coords'] = [coords[0] for coords in suicide_data_map['coords']]

fig, ax = plt.subplots(figsize=(22, 15))

cmap = 'Reds'

ax = suicide_data_map.plot(ax=ax, cmap=cmap,column = 'Total',scheme = 'equal_interval',edgecolor = 'black')
ax.set_facecolor('white')
ax.set_title('Suicide Cases per State')

for idx, row in suicide_data_map.iterrows():
    ax.text(row.coords[0], row.coords[1], s=row['Total'], 
           horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.8, 'pad': 2, 'edgecolor':'none'})

norm = matplotlib.colors.Normalize(vmin=suicide_data_map['Total'].min(), vmax= suicide_data_map['Total'].max())
n_cmap = cm.ScalarMappable(cmap= cmap, norm = norm)
n_cmap.set_array([])
ax.get_figure().colorbar(n_cmap)

#suicide_map[suicide_map['Total'] > 0].plot(ax=ax, cmap=cmap, markersize=1)

plt.xticks([])
plt.yticks([])
plt.show()

# **Major causes of suicides**

In [None]:
q_stats = data_new.groupby(['Type']).sum()

q_stats.Total.sort_values(ascending = False)

# **Geospatial analysis of state-wise suicide with *family problems* as a cause**

In [None]:
f_p = data_new[data_new['Type'] == 'Family Problems']
f_p = f_p.groupby(['Type','State']).sum()

stats_1 = f_p.drop(['Year'], axis = 1)

f_p_stats = pd.DataFrame(stats_1)

In [None]:
suicide_data_map = india_map.merge(f_p_stats, left_on='State', right_on='State')

suicide_data_map['coords'] = suicide_data_map['geometry'].apply(lambda x: x.representative_point().coords[:])
suicide_data_map['coords'] = [coords[0] for coords in suicide_data_map['coords']]

fig, ax = plt.subplots(figsize=(22, 15))

cmap = 'RdPu'

ax = suicide_data_map.plot(ax=ax, cmap=cmap,column = 'Total',scheme = 'equal_interval',edgecolor = 'black')
ax.set_facecolor('white')
ax.set_title('Suicide Cases per State')

for idx, row in suicide_data_map.iterrows():
   ax.text(row.coords[0], row.coords[1], s=row['Total'], 
           horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.8, 'pad': 2, 'edgecolor':'none'})

norm = matplotlib.colors.Normalize(vmin=suicide_data_map['Total'].min(), vmax= suicide_data_map['Total'].max())
n_cmap = cm.ScalarMappable(cmap= cmap, norm = norm)
n_cmap.set_array([])
ax.get_figure().colorbar(n_cmap)

#suicide_map[suicide_map['Total'] > 0].plot(ax=ax, cmap=cmap, markersize=1)

plt.xticks([])
plt.yticks([])
plt.show()

# **Geospatial analysis of state-wise suicide with *insanity/mental illness* as a cause**

In [None]:
mi = data_new[data_new['Type'] == 'Insanity/Mental Illness']
mi = mi.groupby(['Type','State']).sum()

stats_2 = mi.drop(['Year'], axis = 1)

mi_stats = pd.DataFrame(stats_2)

In [None]:
suicide_data_map = india_map.merge(mi_stats, left_on='State', right_on='State')

suicide_data_map['coords'] = suicide_data_map['geometry'].apply(lambda x: x.representative_point().coords[:])
suicide_data_map['coords'] = [coords[0] for coords in suicide_data_map['coords']]

fig, ax = plt.subplots(figsize=(22, 15))

cmap = 'Blues'

ax = suicide_data_map.plot(ax=ax, cmap=cmap,column = 'Total',scheme = 'equal_interval',edgecolor = 'black')
ax.set_facecolor('white')
ax.set_title('Suicide Cases per State')

for idx, row in suicide_data_map.iterrows():
   ax.text(row.coords[0], row.coords[1], s=row['Total'], 
           horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.8, 'pad': 2, 'edgecolor':'none'})

norm = matplotlib.colors.Normalize(vmin=suicide_data_map['Total'].min(), vmax= suicide_data_map['Total'].max())
n_cmap = cm.ScalarMappable(cmap= cmap, norm = norm)
n_cmap.set_array([])
ax.get_figure().colorbar(n_cmap)

#suicide_map[suicide_map['Total'] > 0].plot(ax=ax, cmap=cmap, markersize=1)

plt.xticks([])
plt.yticks([])
plt.show()

# **Geospatial analysis of state-wise suicide with *love affairs* as a cause**

In [None]:
love = data_new[data_new['Type'] == 'Love Affairs']
love = love.groupby(['Type','State']).sum()

stats_3 = mi.drop(['Year'], axis = 1)

love_stats = pd.DataFrame(stats_3)

In [None]:
suicide_data_map = india_map.merge(love_stats, left_on='State', right_on='State')

suicide_data_map['coords'] = suicide_data_map['geometry'].apply(lambda x: x.representative_point().coords[:])
suicide_data_map['coords'] = [coords[0] for coords in suicide_data_map['coords']]

fig, ax = plt.subplots(figsize=(22, 15))

cmap = 'BuGn'

ax = suicide_data_map.plot(ax=ax, cmap=cmap,column = 'Total',scheme = 'equal_interval',edgecolor = 'black')
ax.set_facecolor('white')
ax.set_title('Suicide Cases per State')

for idx, row in suicide_data_map.iterrows():
   ax.text(row.coords[0], row.coords[1], s=row['Total'], 
           horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.8, 'pad': 2, 'edgecolor':'none'})

norm = matplotlib.colors.Normalize(vmin=suicide_data_map['Total'].min(), vmax= suicide_data_map['Total'].max())
n_cmap = cm.ScalarMappable(cmap= cmap, norm = norm)
n_cmap.set_array([])
ax.get_figure().colorbar(n_cmap)

#suicide_map[suicide_map['Total'] > 0].plot(ax=ax, cmap=cmap, markersize=1)

plt.xticks([])
plt.yticks([])
plt.show()