# Analyzing Data with Pandas

### Import Libraries

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load Dataset
#### Student Mental Health

In [41]:
df = pd.read_csv("../datasets/updated/student_mental_health.csv")

## Overview of the Dataset

In [42]:
df.head()

Unnamed: 0,timestamp,gender,age,course,seniority,gpa,married,depressed,anxiety,panic_attacks,treatment
0,2020-08-07 12:02:00,Female,18,Engineering,1,3.00 - 3.49,False,True,False,True,False
1,2020-08-07 12:04:00,Male,21,Islamic education,2,3.00 - 3.49,False,False,True,False,False
2,2020-08-07 12:05:00,Male,19,BIT,1,3.00 - 3.49,False,True,True,True,False
3,2020-08-07 12:06:00,Female,22,Laws,3,3.00 - 3.49,True,True,False,False,False
4,2020-08-07 12:13:00,Male,23,Mathemathics,4,3.00 - 3.49,False,False,False,False,False


## Get Metadata

### Size

In [43]:
# Table shape
df.shape

(100, 11)

## Understanding the Shape

In [44]:
num_students = df.shape[0]
num_metrics = df.shape[1]
print(f'There are {num_students} students measured on {num_metrics} metrics.')

There are 100 students measured on 11 metrics.


### Column Names

In [45]:
# Column names
df.columns

Index(['timestamp', 'gender', 'age', 'course', 'seniority', 'gpa', 'married',
       'depressed', 'anxiety', 'panic_attacks', 'treatment'],
      dtype='object')

### Data Types

In [46]:
df.dtypes

timestamp        object
gender           object
age               int64
course           object
seniority         int64
gpa              object
married            bool
depressed          bool
anxiety            bool
panic_attacks      bool
treatment          bool
dtype: object

## Counting Analysis

### Number of Men, Women Sampled

In [47]:
# Number of men sampled
num_men_sampled = df[df['gender'] == 'Male'].shape[0]
num_women_sampled = df[df['gender'] == 'Female'].shape[0]

print(f'There were {num_men_sampled} men sampled')
print(f'There were {num_women_sampled} women sampled')

There were 25 men sampled
There were 75 women sampled


### Count of Students by Seniority

In [48]:
# Count of students by seniority
students_by_seniority = df['seniority'].value_counts()
students_by_seniority = students_by_seniority.sort_index()
students_by_seniority

seniority
1    42
2    26
3    24
4     8
Name: count, dtype: int64

### Students Experiencing Symptoms

In [54]:
# Count of students who experience depression, anxiety, or panic attacks
students_experiencing_symptoms = df[(df['depressed'] == True) | (df['anxiety'] == True) | (df['panic_attacks'] == True)]

num_students_experiencing_symptoms = students_experiencing_symptoms.shape[0]
print(f'There were {num_students_experiencing_symptoms} students experiencing symptoms out of {num_students} students.')


There were 64 students experiencing symptoms out of 100 students.


### Average Age of Students

In [50]:
# Average age of students
average_age = df['age'].mean()
print(f'The average age of students is {average_age} years old.')

The average age of students is 20.53 years old


### Average Age of Students Experiencing Symptoms

In [56]:
# Average age of students experiencing symptoms
average_age_symptoms = students_experiencing_symptoms['age'].mean()
print(f'The average age of students experiencing symptoms is {average_age_symptoms} years old.')

The average age of students experiencing symptoms is 20.4375 years old.


## Male vs Female Symptoms

### Depression

In [63]:
num_females_with_depression = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Female']['depressed'].sum()
num_males_with_depression = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Male']['depressed'].sum()

female_depression_percent = num_females_with_depression / num_women_sampled * 100
male_depression_percent = num_males_with_depression / num_men_sampled * 100

print(f'The prevalence of depression in women was {female_depression_percent:.2f}% and the prevalence of depression in men was {male_depression_percent:.2f}%.')

The prevalence of depression in women was 38.67% and the prevalence of depression in men was 24.00%.


### Anxiety

In [64]:
num_females_with_anxiety = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Female']['anxiety'].sum()
num_males_with_anxiety = students_experiencing_symptoms[students_experiencing_symptoms['gender'] == 'Male']['anxiety'].sum()

female_anxiety_percent = num_females_with_anxiety / num_women_sampled * 100
male_anxiety_percent = num_males_with_anxiety / num_men_sampled * 100

print(f'The prevalence of anxiety in women was {female_anxiety_percent:.2f}% and the prevalence of anxiety in men was {male_anxiety_percent:.2f}%.')

The prevalence of anxiety in women was 32.00% and the prevalence of anxiety in men was 40.00%.


## Correlation between Anxiety and Panic Attacks

In [66]:
# Correlation between anxiety and panic attacks
correlation = df['anxiety'].corr(df['panic_attacks'])

num_with_anxiety_and_panic_attacks = df[(df['anxiety'] == True) & (df['panic_attacks'] == True)].shape[0]
num_with_either_anxiety_or_panic_attacks = df[(df['anxiety'] == True) | (df['panic_attacks'] == True)].shape[0]

percent_with_both = num_with_anxiety_and_panic_attacks / num_with_either_anxiety_or_panic_attacks * 100
print(f'The percentage of students with both anxiety and panic attacks is {percent_with_both:.2f}%.')

print(f'The correlation between anxiety and panic attacks is {correlation:.2f}.')

The percentage of students with both anxiety and panic attacks is 24.07%.
The correlation between anxiety and panic attacks is 0.08.
