In [None]:
#installing all the required libraries
pip install pandas
pip install matplotlib
pip install seaborn
pip install scipy
pip install statsmodels
pip install geopandas

In [None]:
#importing all the libraries that will be required
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
import statsmodels.api as sm
import geopandas as gpd


In [None]:
#Defining the path of our dataset
#The dataset is missing data from some states
data = pd.read_csv('Suicides_by_causes_state.csv')



In [None]:
#Checking to see the dataset
data



In [None]:
#After checking our dataset we find there are additional rows of TOTAL UTs and TOTAL ALL INDIA
#since it will mess up our visualizations we will exclude it to make clean visualizations
#we will do that by filtering our dataset with all the rows in STATE/UT containing the string TOTAL
df = data[~data['STATE/UT'].str.startswith('TOTAL')]



In [None]:
# Now, we can use filtered_df for your visualizations
#Displaying the data

df

In [None]:
#This code segment is analyzing and visualizing the yearly trends of total suicide cases in line chart. 

#Yearly Trends

# Group the DataFrame 'df' by the 'Year' column and sum the 'Grand Total' for each year
yearly_trends = df.groupby('Year')['Grand Total'].sum()

# Plotting the yearly trend
plt.plot(yearly_trends.index, yearly_trends.values)

# Adding title to the plot
plt.title('Yearly Trends - Total Suicide Cases')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:
#This code segment is analyzing and visualizing the gender distribution of suicide cases over the years in  stacked bar chart

# Gender Distribution

# Group the DataFrame 'df' by the 'Year' column and sum the 'Total Male' and 'Total Female' for each year
gender_distribution = df.groupby('Year')[['Total Male', 'Total Female']].sum()

# Plotting the gender distribution as a stacked bar chart
gender_distribution.plot(kind='bar', stacked=True)

# Adding title to the plot
plt.title('Gender Distribution of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment is performing an age group analysis of suicide cases over the years. 


# Age Group Analysis

# Group the DataFrame 'df' by the 'Year' column and sum the suicide cases for different age groups, separated by gender
age_group_analysis = df.groupby('Year')[['Male upto 14 years', 'Male 15-29 years', 'Male 30-44 years', 'Male 45-59 years', 'Male 60 years and above',
                                        'Female upto 14 years', 'Female 15-29 years', 'Female 30-44 years', 'Female 45-59 years', 'Female 60 years and above']].sum()

# Plotting the age group analysis as a stacked bar chart with a legend
age_group_analysis.plot(kind='bar', stacked=True).legend(bbox_to_anchor=(1,1))

# Adding title to the plot
plt.title('Age Group Analysis of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:

# Exclude rows with causes 'Total' and 'Total Illness' to help in better visualization
filtered_df = df[~df['CAUSE'].isin(['Total', 'Total Illness'])]

#This code segment performs a cause-wise analysis of suicide cases over the years.

# Group the DataFrame 'filtered_df' by both 'Year' and 'CAUSE', then unstack to create a pivot table
cause_wise_analysis = filtered_df.groupby(['Year', 'CAUSE'])['Grand Total'].sum().unstack()

# Plotting the cause-wise analysis as a stacked bar chart with a legend and adjusted figure size
cause_wise_analysis.plot(kind='bar', stacked=True, figsize=(15, 10)).legend(bbox_to_anchor=(1,1))

# Adding title to the plot
plt.title('Cause-wise Analysis of Suicide Cases Over Years')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment analyzes the top 10 causes of suicide cases by filtering the DataFrame and grouping it by 'CAUSE'

# Group the filtered DataFrame by 'CAUSE' and calculate the sum of 'Grand Total', sort the values in descending order, and select the top 10 causes
top_causes = filtered_df.groupby('CAUSE')['Grand Total'].sum().sort_values(ascending=False).head(10)

# Plotting the top causes as a bar chart
top_causes.plot(kind='bar')

# Adding title to the plot
plt.title('Top 10 Causes of Suicide Cases')

# Adding label to the x-axis
plt.xlabel('Cause')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()

In [None]:
#This code segment performs a state/UT-wise analysis of suicide cases by grouping the DataFrame 'df' by 'STATE/UT'.

#State/UT-wise Analysis

# Group the DataFrame 'df' by 'STATE/UT' and calculate the sum of 'Grand Total' for each state, then sort the values in descending order
state_wise_analysis = df.groupby('STATE/UT')['Grand Total'].sum().sort_values(ascending=False)

# Plotting the state/UT-wise analysis as a bar chart with a specified figure size
state_wise_analysis.plot(kind='bar', figsize=(12, 6))

# Adding title to the plot
plt.title('State/UT-wise Analysis of Suicide Cases')

# Adding label to the x-axis
plt.xlabel('State/UT')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:
#This code segment creates a yearly trend for the "Failure in Examination" cause. 

# Yearly Trend for Failure in Examination
# Filter the data for the 'Failure in Examination' cause
failure_data = df[df['CAUSE'] == 'Failure in Examination']

# Group the data by year and calculate the total failures each year
yearly_failure = failure_data.groupby('Year')['Grand Total'].sum().reset_index()

# Plotting the yearly trend for failure in examination as a line chart
plt.figure(figsize=(10, 6))
plt.plot(yearly_failure['Year'], yearly_failure['Grand Total'], marker='o', linestyle='-')

# Adding title to the plot
plt.title('Yearly Trend for Failure in Examination')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Failures')

# Adding grid lines to the plot
plt.grid(True)

# Display the plot
plt.show()

In [None]:
#This code segment filters the DataFrame for the "Failure in Examination" cause, groups the data by year and state, calculates the total failures each year for each state, and creates a pivot table. 

# Filter the data for the 'Failure in Examination' cause
failure_data = df[df['CAUSE'] == 'Failure in Examination']

# Group the data by year and state, and calculate the total failures each year for each state
state_yearly_failure = failure_data.groupby(['Year', 'STATE/UT'])['Grand Total'].sum().reset_index()

# Pivot the data to have states as columns and years as index
pivot_table = state_yearly_failure.pivot_table(index='Year', columns='STATE/UT', values='Grand Total', fill_value=0)

# Plot the yearly trend for each state
plt.figure(figsize=(15, 8))
pivot_table.plot(marker='o', linestyle='-', ax=plt.gca())

# Adding title to the plot
plt.title('Yearly Trend for Failure in Examination by State')

# Adding label to the x-axis
plt.xlabel('Year')

# Adding label to the y-axis
plt.ylabel('Total Failures')

# Adding legend with title and adjusting its position
plt.legend(title='State', bbox_to_anchor=(1.05, 1), loc='upper left')

# Adding grid lines to the plot
plt.grid(True)

# Adjusting layout for better appearance
plt.tight_layout()

# Display the plot
plt.show()


In [None]:
#This code segment calculates the overall summary by summing the 'Total Male', 'Total Female', and 'Grand Total' columns and plots it as a bar chart.
# Overall Summary

# Calculate the overall summary by summing the 'Total Male', 'Total Female', and 'Grand Total' columns
overall_summary = df[['Total Male', 'Total Female', 'Grand Total']].sum()

# Plotting the overall summary as a bar chart
overall_summary.plot(kind='bar')

# Adding title to the plot
plt.title('Overall Summary of Suicide Cases')

# Adding label to the x-axis
plt.xlabel('Gender')

# Adding label to the y-axis
plt.ylabel('Total Suicide Cases')

# Display the plot
plt.show()


In [None]:
#This pie chart is messy as the causes are descriptive that creates a mess in displaying it properly 

#This code segment calculates the percentage distribution of causes and creates a pie chart using Matplotlib.

# Percentage Distribution of Causes (Pie Chart)
cause_percentage_df = filtered_df.groupby('CAUSE')['Grand Total'].sum() / filtered_df['Grand Total'].sum() * 100

# Create a new figure with a specified size for the pie chart
plt.figure(figsize=(10, 10))

# Create a pie chart using Matplotlib's pie function
pie = plt.pie(cause_percentage_df, labels=cause_percentage_df.index, autopct='%1.1f%%', startangle=90)

# Add a title to the pie chart
plt.title('Percentage Distribution of Causes')

# Rotate labels if needed
for text in pie[1]:
    text.set_rotation(50)  # You can adjust the rotation angle as needed

# Display the pie chart
plt.show()


In [None]:
# Regression Analysis (Scipy) of the entire dataset
# Calculate the total number of suicide cases per year in the entire dataset
regression_df = df.groupby('Year')['Grand Total'].sum().reset_index()

# Perform linear regression analysis on the entire dataset using SciPy's linregress
slope, intercept, r_value, p_value, std_err = linregress(regression_df['Year'], regression_df['Grand Total'])

# Fit linear regression model to analyze the trend over the years
# This involves adding a constant term and using statsmodels' OLS (Ordinary Least Squares) regression
X = sm.add_constant(regression_df['Year'])
model = sm.OLS(regression_df['Grand Total'], X).fit()

# Predict the trend line based on the linear regression model
trend_line = model.predict(X)

# Plotting the actual data and the trend line for visualization
plt.plot(regression_df['Year'], regression_df['Grand Total'], label='Actual')
plt.plot(regression_df['Year'], trend_line, label='Trend Line', linestyle='dashed')
plt.legend()
plt.title('Trend Analysis of Suicides Over the Years')
plt.xlabel('Year')
plt.ylabel('Total Suicides')
plt.show()

In [None]:
#This code segment creates a bar chart comparing suicide rates in Andhra Pradesh to the national or regional averages.
# Bar chart: Comparison of suicide rates in Andhra Pradesh to national or regional averages
bar_chart_state_comparison = df[df['STATE/UT'] == 'ANDHRA PRADESH'].groupby(['Year']).sum()['Grand Total'].plot(kind='bar', color='blue', label='Andhra Pradesh')
national_average = df.groupby(['Year']).sum()['Grand Total'].plot(kind='bar', color='orange', label='National Average', alpha=0.7)
plt.title('Comparison of Suicide Rates in Andhra Pradesh and National Average')
plt.xlabel('Year')
plt.ylabel('Total Suicides')
plt.legend()
plt.show()


In [None]:
#This code snippet reads GeoJSON data from a file, creates a GeoDataFrame from it, and then merges it with the suicide data
# Load GeoJSON data
geo_data = gpd.read_file('./INDIA_STATES.json')

# Create GeoDataFrame from GeoJSON
#gdf_geojson = gpd.GeoDataFrame.from_features(geo_data["features"])

# Convert GeoJSON to GeoDataFrame
gdf = gpd.GeoDataFrame.from_features(geo_data.get("features", []))

# Load suicide data (assuming your dataset is stored in a variable df)
# Merge suicide data with GeoJSON data
merged_df = geo_data.merge(df, left_on="STATE", right_on="STATE/UT", how="left")
