# Analyzing Data with Pandas

### Import Libraries

In [None]:
import pandas as pd

## Load Dataset
#### Student Mental Health

In [None]:
df = pd.read_csv("../datasets/updated/student_mental_health.csv")

## Overview of the Dataset

In [None]:
df.head()

## Get Metadata

### Size

In [None]:
# Table shape
df.shape

## Understanding the Shape

In [None]:
num_students = df.shape[0]
num_metrics = df.shape[1]
print(f'There are {num_students} students measured on {num_metrics} metrics.')

### Column Names

In [None]:
# Column names
df.columns

### Data Types

In [None]:
df.dtypes

## Counting Analysis

### Number of Men, Women Sampled

In [None]:
# Number of men sampled
num_men_sampled = df[df['gender'] == 'Male'].shape[0]
num_women_sampled = df[df['gender'] == 'Female'].shape[0]

print(f'There were {num_men_sampled} men sampled')
print(f'There were {num_women_sampled} women sampled')

### Count of Students by Seniority

In [None]:
# Count of students by seniority
students_by_seniority = df['seniority'].value_counts()
students_by_seniority = students_by_seniority.sort_index()
students_by_seniority

### Students Experiencing Symptoms

In [None]:
# Count of students who experience depression, anxiety, or panic attacks
students_experiencing_symptoms = df[(df['depressed'] == True) | (df['anxiety'] == True) | (df['panic_attacks'] == True)]

num_students_experiencing_symptoms = students_experiencing_symptoms.shape[0]
print(f'There were {num_students_experiencing_symptoms} students experiencing symptoms out of {num_students} students.')


### Average Age of Students

In [None]:
# Average age of students
average_age = df['age'].mean()
print(f'The average age of students is {average_age} years old.')

### Average Age of Students Experiencing Symptoms

In [None]:
# Average age of students experiencing symptoms
average_age_symptoms = students_experiencing_symptoms['age'].mean()
print(f'The average age of students experiencing symptoms is {average_age_symptoms} years old.')

## Male vs Female Symptoms

### Depression

In [None]:
num_females_with_depression = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Female']['depressed'].sum()
num_males_with_depression = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Male']['depressed'].sum()

female_depression_percent = num_females_with_depression / num_women_sampled * 100
male_depression_percent = num_males_with_depression / num_men_sampled * 100

print(f'The prevalence of depression in women was {female_depression_percent:.2f}% and the prevalence of depression in men was {male_depression_percent:.2f}%.')

### Anxiety

In [None]:
num_females_with_anxiety = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Female']['anxiety'].sum()
num_males_with_anxiety = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Male']['anxiety'].sum()

female_anxiety_percent = num_females_with_anxiety / num_women_sampled * 100
male_anxiety_percent = num_males_with_anxiety / num_men_sampled * 100

print(f'The prevalence of anxiety in women was {female_anxiety_percent:.2f}% and the prevalence of anxiety in men was {male_anxiety_percent:.2f}%.')

## Correlation between Anxiety and Panic Attacks

In [None]:
# Correlation between anxiety and panic attacks
correlation = df['anxiety'].corr(df['panic_attacks'])

num_with_anxiety_and_panic_attacks = df[(df['anxiety'] == True) & (df['panic_attacks'] == True)].shape[0]
num_with_either_anxiety_or_panic_attacks = df[(df['anxiety'] == True) | (df['panic_attacks'] == True)].shape[0]

percent_with_both = num_with_anxiety_and_panic_attacks / num_with_either_anxiety_or_panic_attacks * 100
print(f'The percentage of students with both anxiety and panic attacks is {percent_with_both:.2f}%.')

print(f'The correlation between anxiety and panic attacks is {correlation:.2f}.')