In [132]:
from pathlib import Path
import pandas as pd
from sklearn.linear_model import LinearRegression
import hvplot.pandas
from bokeh.sampledata.unemployment1948 import data as unemployment_df
from bokeh.sampledata.us_marriages_divorces import data as divorces_df
from bokeh.plotting import figure, show
from bokeh.transform import linear_cmap
from bokeh.util.hex import hexbin


In [139]:
us_suicides_total = pd.read_csv(r"../Resources/Clean/us_suicides_1985-2021.csv")
russia_suicides_total = pd.read_csv(r"../Resources/Clean/russia_suicides_1985-2016.csv")
japan_suicides_total = pd.read_csv(r"../Resources/Clean/japan_suicides_1985-2016.csv")
canada_suicides_total = pd.read_csv(r"../Resources/Clean/canada_suicides_1985-2016.csv")
germany_suicides_total = pd.read_csv(
    r"../Resources/Clean/germany_suicides_1985-2016.csv"
)
untreated_mental_illness = pd.read_csv(
    r"../Resources/Clean/untreated_mental_illness_total_2018-2019.csv"
).assign(Year="2018-2019")
metrics_988 = pd.read_csv(r"../Resources/Clean/988_performance_metrics.csv")
us_suicide_rates = pd.read_csv(r"../Resources/Clean/us_suicide_rates_1985-2016.csv")
russia_suicide_rates = pd.read_csv(
    r"../Resources/Clean/russia_suicide_rates_1985-2016.csv"
)
us_suicide_rates_99_16 = pd.read_csv(
    r"../Resources/Clean/us_suicide_rates_1999-2016.csv"
)
us_suicides_master = pd.read_csv(
    r"../Resources/Clean/us_suicides_1985-2016(master).csv"
)
states_suicides = pd.read_csv(
    r"../Resources/Originals/state_suicides_2005-2021.csv"
)
world_suicides = r"../Resources/Originals/world_suicides_1985-2016.csv"

In [67]:
untreated_mental_illness.set_index("Year", inplace=True)

In [68]:
untreated_mental_illness.hvplot.bar(ylim=(0, 100))

In [69]:
us_suicides_total.hvplot(x="Year", y="Number of Suicides", rot=45).opts(
    height=450, width=800
) * us_suicides_total.hvplot.scatter(
    x="Year", y="Number of Suicides", rot=45, c="k"
).opts(
    height=450, width=800
)

In [70]:
japan_suicides_total.hvplot(x="Year", y="Number of Suicides", rot=45).opts(
    height=450, width=800
) * japan_suicides_total.hvplot.scatter(
    x="Year", y="Number of Suicides", rot=45, c="k"
).opts(
    height=450, width=800
)

In [71]:
# Separate the features (Year) and target (Number of Suicides)
X = us_suicides_total[["Year"]]
y = us_suicides_total["Number of Suicides"]

# Create and fit a linear regression model
model = LinearRegression()
model.fit(X, y)

# Generate predictions using the fitted model
y_pred = model.predict(X)

# Create scatter plots using hvPlot
scatter_actual = us_suicides_total.hvplot.scatter(
    x="Year", y="Number of Suicides", rot=37
)
scatter_predicted = pd.DataFrame(
    {"Year": us_suicides_total["Year"], "Predicted Suicides": y_pred}
).hvplot.line(x="Year", y="Predicted Suicides", color="red")

# Combine the scatter plots
scatter_actual * scatter_predicted

In [72]:
metrics_988 = metrics_988.rename(columns={"Unnamed: 0": "Month"})
metrics_988 = metrics_988.iloc[::-1]

In [73]:
X = us_suicide_rates_99_16[["Year"]]
y = us_suicide_rates_99_16["Suicides per 100k"]

# Create and fit a linear regression model
model = LinearRegression()
model.fit(X, y)

# Generate predictions using the fitted model
y_pred = model.predict(X)

# Create scatter plots using hvPlot
scatter_actual = us_suicide_rates_99_16.hvplot.scatter(
    x="Year", y="Suicides per 100k", rot=37
)
scatter_predicted = pd.DataFrame(
    {"Year": us_suicide_rates_99_16["Year"], "Predicted Suicides": y_pred}
).hvplot.line(x="Year", y="Predicted Suicides", color="red")

# Combine the scatter plots
scatter_actual * scatter_predicted

In [74]:
us_suicides_master.drop(["Unnamed: 0"], axis=1, inplace=True)


In [75]:
us_suicides_master.set_index("Country")

Unnamed: 0_level_0,Year,Sex,Age,Number of Suicides,Suicides per 100k,GDP for Year ($),GDP per Capita ($),Generation
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
United States,1985,male,75+ years,2177,53.57,4346734000000,19693,G.I. Generation
United States,1985,male,55-74 years,5302,29.50,4346734000000,19693,G.I. Generation
United States,1985,male,25-34 years,5134,24.46,4346734000000,19693,Boomers
United States,1985,male,35-54 years,6053,22.77,4346734000000,19693,Silent
United States,1985,male,15-24 years,4267,21.38,4346734000000,19693,Generation X
...,...,...,...,...,...,...,...,...
United States,2015,female,25-34 years,1444,6.70,18120714000000,60387,Millenials
United States,2015,female,15-24 years,1132,5.23,18120714000000,60387,Millenials
United States,2015,female,75+ years,540,4.58,18120714000000,60387,Silent
United States,2015,male,5-14 years,255,1.20,18120714000000,60387,Generation Z


In [76]:
unemployment_df = unemployment_df.set_index('Year').drop('Annual', axis=1).transpose()
unemployment_df.hvplot.heatmap(
    x='columns', 
    y='index', 
    title='US Unemployment 1948—2016', 
    cmap=["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"], 
    xaxis='top', 
    rot=70,
    width=1000, height=500).opts(
    fontsize={'title': 10, 'xticks': 5, 'yticks': 5}
)

In [77]:
us_suicide_rates.hvplot(x="Year", y="Suicides per 100k")

In [100]:
divorces_df.tail(27).hvplot(x="Year", y="Divorces_per_1000", color="maroon") + us_suicide_rates.head(27).hvplot(x="Year", y="Suicides per 100k")

In [111]:
us_suicides_master.head()

Unnamed: 0.1,Unnamed: 0,Country,Year,Sex,Age,Number of Suicides,Suicides per 100k,GDP for Year ($),GDP per Capita ($),Generation
0,26848,United States,1985,male,75+ years,2177,53.57,4346734000000,19693,G.I. Generation
1,26849,United States,1985,male,55-74 years,5302,29.5,4346734000000,19693,G.I. Generation
2,26850,United States,1985,male,25-34 years,5134,24.46,4346734000000,19693,Boomers
3,26851,United States,1985,male,35-54 years,6053,22.77,4346734000000,19693,Silent
4,26852,United States,1985,male,15-24 years,4267,21.38,4346734000000,19693,Generation X


In [131]:
us_suicides_males = us_suicides_master[~(us_suicides_master["Sex"] == "female")]
us_suicides_females = us_suicides_master[~(us_suicides_master["Sex"] == "male")]
us_suicides_males = us_suicides_males.groupby("Year")["Suicides per 100k"].sum().reset_index()
us_suicides_females = us_suicides_females.groupby("Year")["Suicides per 100k"].sum().reset_index()
us_suicides_males.hvplot(
    x="Year", y="Suicides per 100k"
    ) * us_suicides_females.hvplot(
        x="Year", y="Suicides per 100k"
        )