In [146]:
import json
import pandas as pd

In [147]:
file_path = './data/student_depression_dataset.csv'
mental_healt_df = pd.read_csv(file_path)

In [148]:
mental_healt_df.shape

(27901, 18)

In [149]:
mental_healt_df = mental_healt_df[mental_healt_df['Dietary Habits'] != 'Others']
mental_healt_df = mental_healt_df[mental_healt_df['Sleep Duration'] != 'Others']

In [150]:
mental_healt_df['Depression'].value_counts()

Depression
1    16319
0    11552
Name: count, dtype: int64

In [151]:
charts_info_dict = {}
depression_count = mental_healt_df['Depression'].value_counts()

charts_info_dict['depression-rates'] = {
    'Positius': str(depression_count[1]),
    'Negatius': str(depression_count[0])
}

suicide_thoughts_count = mental_healt_df['Have you ever had suicidal thoughts ?'].value_counts()
charts_info_dict['suicidal-thougths-rates'] = {
    'Positius': str(suicide_thoughts_count['Yes']),
    'Negatius': str(suicide_thoughts_count['No'])
}

In [152]:
city_counts = mental_healt_df['City'].value_counts()
city_counts = city_counts[city_counts >= 400].index
mental_healt_df = mental_healt_df[mental_healt_df['City'].isin(city_counts)]

In [153]:
city_depression = mental_healt_df.groupby(['City', 'Depression']).count()['id']

city_totals = city_depression.groupby(level=0).sum()
ordered_cities = city_totals.sort_values(ascending=False).index

charts_info_dict['city-depression'] = {}

for city in ordered_cities:
    city_data = city_depression.loc[city]
    
    if not isinstance(city_data, pd.Series):
        city_data = pd.Series({city_data.index: city_data})

    charts_info_dict['city-depression'][city] = {}

    for depression in [1, 0]:
        if depression in city_data:
            depression_key = 'Positiu' if depression == 1 else 'Negatiu'
            charts_info_dict['city-depression'][city][depression_key] = str(city_data[depression])


In [154]:
all_labels = pd.concat([
    mental_healt_df['Dietary Habits'],
    mental_healt_df['Have you ever had suicidal thoughts ?'],
    mental_healt_df['Depression']
]).unique().tolist()

label_to_index = {label: idx for idx, label in enumerate(all_labels)}

nodes = [{"name": label} for label in all_labels]

links = []

group1 = mental_healt_df.groupby(['Dietary Habits', 'Have you ever had suicidal thoughts ?']).size().reset_index(name='count')
for _, row in group1.iterrows():
    links.append({
        "source": label_to_index[row['Dietary Habits']],
        "target": label_to_index[row['Have you ever had suicidal thoughts ?']],
        "value": int(row['count'])
    })

group2 = mental_healt_df.groupby(['Have you ever had suicidal thoughts ?', 'Depression']).size().reset_index(name='count')
for _, row in group2.iterrows():
    links.append({
        "source": label_to_index[row['Have you ever had suicidal thoughts ?']],
        "target": label_to_index[row['Depression']],
        "value": int(row['count'])
    })

charts_info_dict['sankey-depression1'] = {
    "nodes": nodes,
    "links": links
}

In [155]:
all_labels = pd.concat([
    mental_healt_df['Sleep Duration'],
    mental_healt_df['Have you ever had suicidal thoughts ?'],
    mental_healt_df['Depression']
]).unique().tolist()

label_to_index = {label: idx for idx, label in enumerate(all_labels)}

nodes = [{"name": label} for label in all_labels]

links = []

group1 = mental_healt_df.groupby(['Sleep Duration', 'Have you ever had suicidal thoughts ?']).size().reset_index(name='count')
for _, row in group1.iterrows():
    links.append({
        "source": label_to_index[row['Sleep Duration']],
        "target": label_to_index[row['Have you ever had suicidal thoughts ?']],
        "value": int(row['count'])
    })

group2 = mental_healt_df.groupby(['Have you ever had suicidal thoughts ?', 'Depression']).size().reset_index(name='count')
for _, row in group2.iterrows():
    links.append({
        "source": label_to_index[row['Have you ever had suicidal thoughts ?']],
        "target": label_to_index[row['Depression']],
        "value": int(row['count'])
    })

charts_info_dict['sankey-depression2'] = {
    "nodes": nodes,
    "links": links
}

In [None]:
grouped = (
    mental_healt_df.groupby(['Dietary Habits', 'Depression'])
    .size()
    .reset_index(name='value')
    .rename(columns={
        'Dietary Habits': 'category',
        'Depression': 'subcategory'
    })
)

charts_info_dict['mossaic'] = grouped.to_dict(orient='records')

In [None]:
grouped = (
    mental_healt_df.groupby(['Sleep Duration', 'Depression'])
    .size()
    .reset_index(name='value')
    .rename(columns={
        'Sleep Duration': 'category',
        'Depression': 'subcategory'
    })
)

charts_info_dict['mossaic'] = grouped.to_dict(orient='records')

In [157]:
output_json_path = '../public/data/mental-health-data.json'
with open(output_json_path, 'w', encoding='utf-8') as file:
    json.dump(charts_info_dict, file, ensure_ascii=False, indent=4)