In [None]:
# This file focus on the analyzing the top suicide rate of different countries and the facilites

import os
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import sys
sys.path.append(os.path.abspath('..'))
from common_lib.data_reader import SuicideDataReader, SuicideRawData, SuicideProcessedData

In [None]:
# ========= Getting Data =========

data_reader = SuicideDataReader()

suicide_rates_dataframe = data_reader.read_data(SuicideProcessedData.SUICIDE_RATES)
print(suicide_rates_dataframe)

country_facilities_dataframe = data_reader.read_data(SuicideProcessedData.FACILITIES)
print(country_facilities_dataframe)

gini_coef_dataframe = data_reader.read_data(SuicideRawData.SOCIOECONOMIC)
print(gini_coef_dataframe)

In [None]:
# define constant variable
ALL_AGE_RANGES = ["all_age", "80_above", "70to79", "60to69", "50to59", "40to49", "30to39", "20to29", "10to19"]
ALL_FACILITIES = ["mental_hospitals", "health_units", "outpatient_facilities", "day_treatment", "residential_facilities"]
ALL_SEX = suicide_rates_dataframe["sex"].unique()
COLOR_FOR_SEX = {
    "Male" : "#0000AA",
    "Female" : "#AA0000",
    "Both sexes" : "#00AA00"
}
DISPLAY_NAME_FOR_FACILITIES = {
    "mental_hospitals" : "Mental Hospitals",
    "health_units" : "Health Units",
    "outpatient_facilities" : "Outpatient Facilities",
    "day_treatment" : "Day Treatment",
    "residential_facilities" : "Residential Facilities"
}

In [None]:
# ========= Prepare Data =========
# join country facilities and gini coef to suicde rate data
suicide_rates_facilities_dataframe = suicide_rates_dataframe.merge(country_facilities_dataframe, how="left", left_on="country", right_on="country")

suicide_rates_facilities_dataframe = suicide_rates_facilities_dataframe.merge(gini_coef_dataframe, how="left", left_on="country", right_on="country")

suicide_rates_facilities_dataframe["hos_out_ratio"] = suicide_rates_facilities_dataframe["outpatient_facilities"] / suicide_rates_facilities_dataframe["mental_hospitals"]

# distribution of suicide rate of all countries
for facility in ALL_FACILITIES:
    suicide_rates_facilities_dataframe["{0}_cat".format(facility)] = pd.qcut(suicide_rates_facilities_dataframe[facility], 
                                                                     q = 10, 
                                                                     labels = False, 
                                                                     precision = 0)


suicide_rates_facilities_both_sexes_dataframe = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["sex"] == "Both sexes"]

# Sorting and pick the first 5
# top_five_youth_suicide_rates_dataframe = suicide_rates_facilities_dataframe.sort_values(by=["10to19"], ascending=False).head(5)

# # print the results
# top_five_youth_suicide_rates_dataframe

In [None]:
# ========= plot graph =========
top_suicide_rates_country_dataframe = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["10to19"] > 10].sort_values(by=["10to19"], ascending=False)

#top suicide rate plot
plt.figure(figsize=(20,10))
plt.bar(top_suicide_rates_country_dataframe["country"], top_suicide_rates_country_dataframe["10to19"])
plt.title("Top Youth Suicide Rate Countries \n rate > 10")
plt.xlabel("Country")
plt.ylabel("Suicide Rate (%)")
plt.xticks(rotation=90)
plt.show()

In [None]:
# Check correlation
suicide_rates_facilities_both_sexes_dataframe[ALL_FACILITIES+["gini_coef", "poverty_199", "poverty_320"]].corr()

In [None]:
# plot the distribution of different facilities
for facility_2 in ALL_FACILITIES:
    plt.figure(figsize=(20,10))
    fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
    fig.figsize=(20,10)
    facilities_index = 0	# use for control the location of the plot
    for facility in ALL_FACILITIES:
        x_index = math.floor(facilities_index / 3)
        y_index = facilities_index % 3
        subplot = axs[x_index, y_index]
        subplot.scatter(x = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["sex"] == "Both sexes"]["{0}".format(facility_2)], 
                        y = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["sex"] == "Both sexes"]["{0}".format(facility)], 
                        color = "#00AA00", 
                        s = 5)
        subplot.set_title("{0}".format(facility))
        
        plt.suptitle("{0} facilities".format(facility_2))
        facilities_index = facilities_index + 1
    axs[1, 2].axis('off')  
    plt.show()

In [None]:
# plot the suicide rate vs different facilities for analysis


for sex in ALL_SEX:
    sex_color = COLOR_FOR_SEX[sex]
    for age_range in ALL_AGE_RANGES:
        plt.figure(figsize=(20,10))
        fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
        fig.figsize=(20,10)
        facilities_index = 0	# use for control the location of the plot
        for facility in ALL_FACILITIES:
            x_index = math.floor(facilities_index / 3)
            y_index = facilities_index % 3
            subplot = axs[x_index, y_index]
            subplot.scatter(x = suicide_rates_facilities_both_sexes_dataframe[age_range], 
                            y = suicide_rates_facilities_both_sexes_dataframe[facility], 
                            color = sex_color, 
                            s = 5)
            subplot.set_title("{0}".format(facility))
            #subplot.xlabel("{0} {1} suicide Rate (%)".format(sex, age_range))
            #subplot.ylabel("{0}".format(facility))
            plt.suptitle("{0} {1} vs different facilities".format(sex, age_range))
            facilities_index = facilities_index + 1
        axs[1, 2].axis('off')  
        plt.show()

In [None]:
for sex in ALL_SEX:
    sex_color = COLOR_FOR_SEX[sex]
    for age_range in ALL_AGE_RANGES:
        plt.figure(figsize=(20,10))
        fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
        fig.figsize=(20,10)
        facilities_index = 0	# use for control the location of the plot
        for facility in ALL_FACILITIES:
            x_index = math.floor(facilities_index / 3)
            y_index = facilities_index % 3
            subplot = axs[x_index, y_index]
            subplot.scatter(x = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["sex"] == sex][age_range], 
                            y = suicide_rates_facilities_dataframe[suicide_rates_facilities_dataframe["sex"] == sex]["{0}_cat".format(facility)], 
                            color = sex_color, 
                            s = 5)
            subplot.set_title("{0}".format(facility))
            #subplot.xlabel("{0} {1} suicide Rate (%)".format(sex, age_range))
            #subplot.ylabel("{0}".format(facility))
            plt.suptitle("{0} {1} vs different facilities".format(sex, age_range))
            facilities_index = facilities_index + 1
        axs[1, 2].axis('off')  
        plt.show()

In [None]:
# check soc-econ vs facilities
test_variable = "gini_coef"
test_variable = "poverty_199"
plt.figure(figsize=(1920,1080))
fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
fig.figsize=(20,10)
fig.dpi = 300
facilities_index = 0	# use for control the location of the plot
plotting_dataframe = suicide_rates_facilities_both_sexes_dataframe.dropna()
for facility in ALL_FACILITIES:
    x_index = math.floor(facilities_index / 3)
    y_index = facilities_index % 3
    subplot = axs[x_index, y_index]
    
    subplot.scatter(x = plotting_dataframe[test_variable], 
                y = plotting_dataframe["{0}".format(facility)], 
                color = "#00AA00", 
                s = 5)
    
    #plt.xlabel(test_variable)
    #plt.ylabel("facility")
    subplot.set_title("{0}".format(facility))
    #plt.title("{0} vs gini coef".format(facility))
    facilities_index = facilities_index + 1
axs[1, 2].axis('off')  
plt.show()

In [None]:
# plot the suicide rate vs different facilities for analysis
sex = "Both sexes"
sex_color = COLOR_FOR_SEX[sex]
age_range = "all_age"
plt.figure(figsize=(3,1080))
fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
# fig.figsize=(20,10)
fig.dpi = 300
facilities_index = 0	# use for control the location of the plot

# Remove outliners
plotting_dataframe = suicide_rates_facilities_both_sexes_dataframe[(suicide_rates_facilities_both_sexes_dataframe["mental_hospitals"] < 2) & 
                                                                   (suicide_rates_facilities_both_sexes_dataframe["health_units"] < 4) &
                                                                   (suicide_rates_facilities_both_sexes_dataframe["outpatient_facilities"] < 10) &
                                                                   (suicide_rates_facilities_both_sexes_dataframe["day_treatment"] < 15) 
                                                                  ]
for facility in ALL_FACILITIES:
    x_index = math.floor(facilities_index / 3)
    y_index = facilities_index % 3
    subplot = axs[x_index, y_index]
    subplot.scatter(x = plotting_dataframe[facility], 
                    y = plotting_dataframe[age_range], 
                    color = "#00AA0070", 
                    s = 8)
    subplot.set_title("{0}".format(DISPLAY_NAME_FOR_FACILITIES[facility]))
    
    #(m, b) = np.polyfit(plotting_dataframe[facility], plotting_dataframe[age_range], 1)
    #yp = np.polyval([m, b], plotting_dataframe[facility])
    #subplot.plot(plotting_dataframe[facility], yp)
    
    
    plt.suptitle("Suicide Rate vs different facilities")
    facilities_index = facilities_index + 1

axs[1, 2].axis("off")  
plt.setp(axs[1, 1], xlabel = "Facility per 100,000 population")
plt.setp(axs[0, 0], ylabel = "Suicide Rate")
plt.setp(axs[1, 0], ylabel = "Suicide Rate")
plt.show()

In [None]:
# Both sexes vs Facilites
for facility in ALL_FACILITIES:
    fig=plt.figure(figsize=(12,8), dpi= 100, facecolor="w", edgecolor="k")
    plotting_dataframe = suicide_rates_facilities_both_sexes_dataframe[suicide_rates_facilities_both_sexes_dataframe["mental_hospitals"] < 8]
    plt.scatter(x = plotting_dataframe[facility],
                y = plotting_dataframe["all_age"],
                s = 12,
                c = "#00AA00"
               )
    plt.xlabel("# {0}".format(facility))
    plt.ylabel("Suicide Rate")
    plt.title("Suicide Rate vs # {0}".format(facility))
    plt.show()

In [None]:
# Males vs Facilites
plotting_dataframe = suicide_rates_facilities_dataframe[(suicide_rates_facilities_dataframe["mental_hospitals"] < 8) &
                                                        (suicide_rates_facilities_dataframe["sex"] == "Male")
                                                       ]

for facility in ALL_FACILITIES:
    fig=plt.figure(figsize=(12,8), dpi= 100, facecolor="w", edgecolor="k")
    plt.scatter(x = plotting_dataframe[facility],
                y = plotting_dataframe["all_age"],
                s = 12,
                c = "#0000AA"
               )
    plt.xlabel("# {0}".format(facility))
    plt.ylabel("Suicide Rate")
    plt.title("Suicide Rate vs # {0}".format(facility))
    plt.show()

In [None]:
# Females vs Facilites
plotting_dataframe = suicide_rates_facilities_dataframe[(suicide_rates_facilities_dataframe["mental_hospitals"] < 8) &
                                                        (suicide_rates_facilities_dataframe["sex"] == "Female")
                                                       ]

for facility in ALL_FACILITIES:
    fig=plt.figure(figsize=(12,8), dpi= 100, facecolor="w", edgecolor="k")
    plt.scatter(x = plotting_dataframe[facility],
                y = plotting_dataframe["all_age"],
                s = 12,
                c = "#AA0000"
               )
    plt.xlabel("# {0}".format(facility))
    plt.ylabel("Suicide Rate")
    plt.title("Suicide Rate vs # {0}".format(facility))
    plt.show()

In [None]:
# facilites distribution
plt.figure(figsize=(40,20))
fig, axs = plt.subplots(nrows=2, ncols=3, constrained_layout=True)
fig.figsize=(40,20)
facilities_index = 0	# use for control the location of the plot
for facility in ALL_FACILITIES:
    x_index = math.floor(facilities_index / 3)
    y_index = facilities_index % 3
    subplot = axs[x_index, y_index]
    
    subplot.hist(x = suicide_rates_facilities_both_sexes_dataframe["{0}".format(facility)], 
                 bins = 25,
                color = "#00AA00")
    #plt.xlabel(test_variable)
    #plt.ylabel("facility")
    subplot.set_title("{0}".format(facility))
    #plt.title("{0} vs gini coef".format(facility))
    facilities_index = facilities_index + 1
axs[1, 2].axis('off')    
plt.show()

In [None]:
# 3D plot
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(xs = suicide_rates_facilities_both_sexes_dataframe["all_age"], 
           ys = suicide_rates_facilities_both_sexes_dataframe["mental_hospitals_cat"], 
           zs = suicide_rates_facilities_both_sexes_dataframe["poverty_199"])

plt.show()

In [None]:
# Plot the facilites sum

suicide_rates_column_sum_dataframe = suicide_rates_facilities_both_sexes_dataframe.mean()

plotting_dataframe = pd.DataFrame({"facility" : ALL_FACILITIES, "average_num" : suicide_rates_column_sum_dataframe[ALL_FACILITIES]})
print(plotting_dataframe)

#plt.figure(figsize=(20,10))
plt.barh(plotting_dataframe["facility"], plotting_dataframe["average_num"])
plt.title("Average facilities per 100,000 population")
plt.xlabel("Average number per 100,000 population")
plt.ylabel("Facility")
plt.yticks(np.arange(5), ('Residential Facilities', 'Day Treatment', 'Outpatient Facilities', 'Health Units', 'Mental Hospitals'))
plt.show()