In [None]:
#installing all the required libraries
pip install pandas
pip install matplotlib
pip install seaborn
pip install scipy
pip install statsmodels
pip install geopandas

In [None]:
#importing all the libraries that will be required
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
import statsmodels.api as sm
import geopandas as gpd


In [None]:
#Defining the path of our dataset
data = pd.read_csv('Suicides_by_causes_state.csv')



In [None]:
#Checking to see the dataset
data



In [None]:
#After checking our dataset we find there are additional rows of TOTAL UTs and TOTAL ALL INDIA
#since it will mess up our visualizations we will exclude it to make clean visualizations
#we will do that by filtering our dataset with all the rows in STATE/UT containing the string TOTAL
df = data[~data['STATE/UT'].str.startswith('TOTAL')]



In [None]:
# Now, we can use filtered_df for your visualizations
#Displaying the data

df

In [None]:
#This code segment is analyzing and visualizing the yearly trends of total suicide cases in line chart. 

#Yearly Trends

# Group the DataFrame 'df' by the 'Year' column and sum the 'Grand Total' for each year
yearly_trends = df.groupby('Year')['Grand Total'].sum()

# Plotting the yearly trend
plt.plot(yearly_trends.index, yearly_trends.values)

# Adding title to the plot
plt.title('Yearly Trends - Total Suicide Cases')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:
#This code segment is analyzing and visualizing the gender distribution of suicide cases over the years in  stacked bar chart

# Gender Distribution

# Group the DataFrame 'df' by the 'Year' column and sum the 'Total Male' and 'Total Female' for each year
gender_distribution = df.groupby('Year')[['Total Male', 'Total Female']].sum()

# Plotting the gender distribution as a stacked bar chart
gender_distribution.plot(kind='bar', stacked=True)

# Adding title to the plot
plt.title('Gender Distribution of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment is performing an age group analysis of suicide cases over the years. 


# Age Group Analysis

# Group the DataFrame 'df' by the 'Year' column and sum the suicide cases for different age groups, separated by gender
age_group_analysis = df.groupby('Year')[['Male upto 14 years', 'Male 15-29 years', 'Male 30-44 years', 'Male 45-59 years', 'Male 60 years and above',
                                        'Female upto 14 years', 'Female 15-29 years', 'Female 30-44 years', 'Female 45-59 years', 'Female 60 years and above']].sum()

# Plotting the age group analysis as a stacked bar chart with a legend
age_group_analysis.plot(kind='bar', stacked=True).legend(bbox_to_anchor=(1,1))

# Adding title to the plot
plt.title('Age Group Analysis of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:

# Exclude rows with causes 'Total' and 'Total Illness' to help in better visualization
filtered_df = df[~df['CAUSE'].isin(['Total', 'Total Illness'])]

#This code segment performs a cause-wise analysis of suicide cases over the years.

# Group the DataFrame 'filtered_df' by both 'Year' and 'CAUSE', then unstack to create a pivot table
cause_wise_analysis = filtered_df.groupby(['Year', 'CAUSE'])['Grand Total'].sum().unstack()

# Plotting the cause-wise analysis as a stacked bar chart with a legend and adjusted figure size
cause_wise_analysis.plot(kind='bar', stacked=True, figsize=(15, 10)).legend(bbox_to_anchor=(1,1))

# Adding title to the plot
plt.title('Cause-wise Analysis of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment analyzes the top 10 causes of suicide cases by filtering the DataFrame and grouping it by 'CAUSE'

# Group the filtered DataFrame by 'CAUSE' and calculate the sum of 'Grand Total', sort the values in descending order, and select the top 10 causes
top_causes = filtered_df.groupby('CAUSE')['Grand Total'].sum().sort_values(ascending=False).head(10)

# Plotting the top causes as a bar chart
top_causes.plot(kind='bar')

# Adding title to the plot
plt.title('Top 10 Causes of Suicide Cases')

# Adding label to the x-axis
plt.xlabel('Cause')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment performs a state/UT-wise analysis of suicide cases by grouping the DataFrame 'df' by 'STATE/UT'.

#State/UT-wise Analysis

# Group the DataFrame 'df' by 'STATE/UT' and calculate the sum of 'Grand Total' for each state, then sort the values in descending order
state_wise_analysis = df.groupby('STATE/UT')['Grand Total'].sum().sort_values(ascending=False)

# Plotting the state/UT-wise analysis as a bar chart with a specified figure size
state_wise_analysis.plot(kind='bar', figsize=(12, 6))

# Adding title to the plot
plt.title('State/UT-wise Analysis of Suicide Cases')

# Adding label to the x-axis
plt.xlabel('State/UT')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:
#This code segment creates a yearly trend for the "Failure in Examination" cause. 

# Yearly Trend for Failure in Examination
# Filter the data for the 'Failure in Examination' cause
failure_data = df[df['CAUSE'] == 'Failure in Examination']

# Group the data by year and calculate the total failures each year
yearly_failure = failure_data.groupby('Year')['Grand Total'].sum().reset_index()

# Plotting the yearly trend for failure in examination as a line chart
plt.figure(figsize=(10, 6))
plt.plot(yearly_failure['Year'], yearly_failure['Grand Total'], marker='o', linestyle='-')

# Adding title to the plot
plt.title('Yearly Trend for Failure in Examination')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Failures')

# Adding grid lines to the plot
plt.grid(True)

# Display the plot
plt.show()