# Mental Health matters, always and everywhere

## Introduction

In this jupyter notebook I will explore the data that was found on Kaggle about Mental Health. The data consists of several factors, such as countries, years, mental and substance use, prevalence of depression and so on. This dataset will mainly be used to learn more skills, such as plotting data on world map and more useful and easy to read and understand visualisations.


### Geospatial Data Visualisation
Geographic data is a subset of Data Science that deals with location-based data. Geo data is important nowadays, since most businesses and applications revolve around the location element. Visualisations can be important to derive faster insights from location-based datasets.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#import folium

In [None]:
prevalence_depression = pd.read_csv('/Users/Roz/Documents/mental_health_project/data/prevalence-of-depression-males-vs-females.csv')
prevalence_depression.info()

In [None]:
prevalence_depression = prevalence_depression.rename(columns={"Entity":"Country", "Prevalence - Depressive disorders - Sex: Male - Age: Age-standardized (Percent)": "Prevalence_depr_male",
                        "Prevalence - Depressive disorders - Sex: Female - Age: Age-standardized (Percent)" : "Prevalence_depr_female", "Population (historical estimates)": "Population_estimate"})

prevalence_depression.head()

In [None]:
prevalence_depression.isnull().sum()

In [None]:
prevalence_depression = prevalence_depression.drop(['Continent'], axis = 1) #delete unnecessary columns
prevalence_depression.dropna(how='any', inplace=True)

In [None]:
prevalence_depression.isnull().sum()

In [None]:
import geopandas as gpd

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.head()

In [None]:
world = world[['geometry','name', 'iso_a3']].copy()
world.head()

In [None]:
world = world.rename(columns={0:"geometry", 'name':'Country', 'iso_a3':'Code'})
world.info()

In [None]:
# centroid_list[['x_cor', 'y_cor']] = centroid_list.coordinates.str.split("(", expand = True) ignore

mergedDF = pd.merge(prevalence_depression, world, on = "Code")

mergedDF
#mergedDF.isnull().sum() # check whether there are null values

In [None]:
mergedDF.describe() # The standard deviation is a measure that tells how spread out a group of numbers in a dataset are

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,4))

sns.histplot(mergedDF['Prevalence_depr_male'], ax=ax[0],bins=15, edgecolor="yellow", color="green")
sns.boxplot(mergedDF['Prevalence_depr_male'], ax=ax[1], color="green")
plt.show

# plt.boxplot(mergedDF['Prevalence_depr_male'])
# plt.show

# prev_depression_afghanistan = mergedDF.loc[mergedDF['Country'] == 'Afghanistan']
# prev_depression_afghanistan

# plt.hist(prev_depression_afghanistan['Prevalence_depr_male'], edgecolor = 'black')
# plt.show

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(4,8))

sns.histplot(mergedDF['Prevalence_depr_male'], ax=ax[0],bins=15, edgecolor="yellow", color="green")
sns.boxplot(mergedDF['Prevalence_depr_male'], ax=ax[1], color="green")
plt.show


In [None]:
def prev_categorized (row):
    if row['Prevalence_depr_male'] >= 3.676174 :
        return 'high'
    elif row['Prevalence_depr_male'] >= 2.603879 :
        return 'moderate'
    else:
        return 'low'

mergedDF.apply(lambda row: prev_categorized(row), axis=1)

In [None]:
mergedDF['Prevalence_male'] = mergedDF.apply(lambda row: prev_categorized(row), axis=1)
mergedDF

In [None]:
def prev_categorized (row):
    if row['Prevalence_depr_female'] >= 3.676174 :
        return 'high'
    elif row['Prevalence_depr_female'] >= 2.603879 :
        return 'moderate'
    else:
        return 'low'

mergedDF['Prevalence_female'] = mergedDF.apply(lambda row: prev_categorized(row), axis=1)
mergedDF

In [None]:
import geopandas
world = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
# world.head()

world.plot(figsize=(12,8))

In [None]:
gdf = geopandas.GeoDataFrame(mergedDF)

gdf.head()

In [None]:
prev_depression_2010 = mergedDF.loc[mergedDF['Year'] == 2010]
prev_depression_2010

In [None]:
gdf = gpd.GeoDataFrame(prev_depression_2010)
gdf.head()


In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

fig, ax = plt.subplots(1, 1, figsize=(15, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

world.plot(ax=ax, color='white', edgecolor='gray')
world_map_2010_male = gdf.plot(column='Prevalence_depr_male', ax=ax, markersize=50, legend=True, cax=cax)
world_map_2010_male.set_title("World Map with Depression Prevalence of Males, 2010")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

world.plot(ax=ax, color='white', edgecolor='gray')
world_map_2010_female = gdf.plot(column='Prevalence_depr_female', ax=ax, markersize=50, legend=True, cax=cax)
world_map_2010_female.set_title("World Map with Depression Prevalence of Females, 2010")
plt.show()

In [None]:
# prev_depression_2010 = mergedDF.loc[mergedDF['Year'] == 2010]
# prev_depression_2010

# def prev_per_year(year):
#     if year in mergedDF['Year'] == year:
#         return mergedDF.loc[mergedDF['Year'] == year]
#     else:
#         return

# mergedDF['Prevalence_female'] = mergedDF.apply(lambda row: prev_categorized(row), axis=1)
# mergedDF
    

In [None]:
# grouped = mergedDF.groupby('Year')

# for year in grouped:
#   print('\nCREATE TABLE {}('.format(year))
  
#   print(year["column"])

In [None]:
# prev_depression_1970 = mergedDF.loc[mergedDF['Year'] == 1970]
# prev_depression_1970

# gdf = gpd.GeoDataFrame(prev_depression_1970)

# fig, ax = plt.subplots(1, 1, figsize=(15, 10))
# divider = make_axes_locatable(ax)
# cax = divider.append_axes("right", size="5%", pad=0.1)

# world.plot(ax=ax, color='white', edgecolor='gray')
# world_map_1970_female = gdf.plot(column='Prevalence_depr_female', ax=ax, markersize=50, legend=True, cax=cax)
# world_map_1970_female.set_title("World Map with Depression Prevalence of Females, 2009")
# plt.show()

    

In [None]:
prev_depression_1990 = mergedDF.loc[mergedDF['Year'] == 1990]
# prev_depression_1990

gdf1990 = gpd.GeoDataFrame(prev_depression_1990)
# gdf1990.head()

from mpl_toolkits.axes_grid1 import make_axes_locatable

fig, ax = plt.subplots(1, 1, figsize=(15, 10))
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

world.plot(ax=ax, color='white', edgecolor='gray')
world_map_1990_male = gdf1990.plot(column='Prevalence_depr_male', ax=ax, markersize=50, legend=True, cax=cax, cmap='OrRd',     
    missing_kwds={
        "color": "lightgrey",
        "edgecolor": "red",
        "hatch": "///",
        "label": "Missing values",
    })
world_map_1990_male.set_title("World Map with Depression Prevalence of Males, 1990")
plt.show()