In [1]:
print('Hello Team!')

Hello Team!


In [1]:
# Import Dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import linregress
# import plotly.express as px #function for hover functions

In [3]:
# Store filepath into variable
anxiety_csv = Path("Resources/anxiety_attack_dataset.csv")
# Read in files.
anxiety_df = pd.read_csv(anxiety_csv)

In [4]:
#quick statistical analysis
anxiety_df.describe()

Unnamed: 0,ID,Age,Sleep Hours,Physical Activity (hrs/week),Caffeine Intake (mg/day),Alcohol Consumption (drinks/week),Stress Level (1-10),Heart Rate (bpm during attack),Breathing Rate (breaths/min),Sweating Level (1-5),Therapy Sessions (per month),Diet Quality (1-10),Severity of Anxiety Attack (1-10)
count,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0,12000.0
mean,6000.5,40.96675,6.48265,5.030892,246.696083,9.492833,5.46225,119.3985,25.462333,2.987417,4.518417,5.497333,5.507583
std,3464.24595,13.47328,2.014885,2.889,144.487071,5.769364,2.897201,34.806711,8.090686,1.414482,2.86601,2.867579,2.858663
min,1.0,18.0,3.0,0.0,0.0,0.0,1.0,60.0,12.0,1.0,0.0,1.0,1.0
25%,3000.75,29.0,4.8,2.5,122.0,5.0,3.0,89.0,18.0,2.0,2.0,3.0,3.0
50%,6000.5,41.0,6.5,5.0,244.0,9.0,5.0,119.0,25.0,3.0,5.0,5.0,6.0
75%,9000.25,53.0,8.2,7.525,371.0,15.0,8.0,149.0,32.0,4.0,7.0,8.0,8.0
max,12000.0,64.0,10.0,10.0,499.0,19.0,10.0,179.0,39.0,5.0,9.0,10.0,10.0


In [None]:
#list of columns
anxiety_df.columns

In [None]:
#Renaming columns that I am pulling from (updating Severity of Anxiety Attack to Severity)
anxiety_df.columns = ['ID', 'Age', 'Gender', 'Occupation', 'Sleep Hours',
       'Physical Activity (hrs/week)', 'Caffeine Intake (mg/day)',
       'Alcohol Consumption (drinks/week)', 'Smoking',
       'Family History of Anxiety', 'Stress Level (1-10)',
       'Heart Rate (bpm during attack)', 'Breathing Rate (breaths/min)',
       'Sweating Level (1-5)', 'Dizziness', 'Medication',
       'Therapy Sessions (per month)', 'Recent Major Life Event',
       'Diet Quality (1-10)', 'Severity'
]
anxiety_df.head(5)

In [None]:
#Organize by age, gender, and severity, setting severity as index
organized_anxiety_df = anxiety_df[['Gender', 'Age', 'Severity']].set_index('Severity')
organized_anxiety_df.head()

In [None]:
#sort in descending order for age
organized_anxiety_df.sort_values(by=['Age'], ascending=[True], inplace=True)
organized_anxiety_df.head()

In [None]:
#sort in descending order for severity
organized_anxiety_df.sort_values(by=['Severity'], ascending=[True], inplace=True)
organized_anxiety_df.head()

In [None]:
#showing the number of age count duplicates
organized_anxiety_df["Age"].value_counts()

In [None]:
# Top 500
organized_anxiety_df.head(500)

#Bottom 500
organized_anxiety_df.tail(500)

## What is the relationship between the severity of anxiety attacks and demographics such as age, gender, and occupation

### Age and Severity

In [None]:
#severity and age original before organized df (below) - still should give the same output
mean = anxiety_df.groupby('Severity')['Age'].mean()
median = anxiety_df.groupby('Severity')['Age'].median()
variance = anxiety_df.groupby('Severity')['Age'].var()
std_dev = anxiety_df.groupby('Severity')['Age'].std()
sem = anxiety_df.groupby('Severity')['Age'].sem()

#new data frame
age_df = pd.DataFrame({
        'Age Mean': mean,
        'Age Median': median,
        'Age Var': variance,
        'Age Std Dev': std_dev,
        'Age Std Err': sem
})
age_df

In [None]:
#sort by just age and severity level
age_df = anxiety_df[['Age', 'Severity']]
age_df.head()

In [None]:
#age counts
age_count_df = organized_anxiety_df.groupby(['Severity', 'Age']).size().unstack(fill_value=0)
print(age_count_df)

In [None]:
#severity and gender stats
# need to create a df that will show the severity levels and their gender counts
gender_df = anxiety_df.groupby(["Severity", "Gender"]).size().unstack(fill_value=0)
# gender_df = organized_anxiety_df.groupby(["Severity", "Gender"]).size().unstack(fill_value=0)

# Display result (maybe get rid of other category)
gender_df
# print(gender_df)

In [None]:
#only show male/female genders with Severity as index
df_selected = gender_df[["Female", "Male"]]
df_selected

In [None]:
#female count with severity levels
female_df = organized_anxiety_df[organized_anxiety_df['Gender'] =='Female']
female_df.head()

female_severity_df = female_df.groupby(["Severity", "Gender"]).size().unstack(fill_value=0)
print(female_severity_df)

In [None]:
#male count with respect to the serverity levels
male_df = organized_anxiety_df[organized_anxiety_df['Gender'] =='Male']
female_df.head()

male_severity_df = male_df.groupby(["Severity", "Gender"]).size().unstack(fill_value=0)
print(male_severity_df)

In [None]:
#scatter plot of female count vs severity
plt.scatter(df_selected.index, df_selected["Female"], color='red', alpha=0.6)

# Labels and title
plt.xlabel("Severity Level")
plt.ylabel("Female Count")
plt.title("Female Count vs Severity Level")

# Show the plot
plt.show()

In [None]:
#scatter plot of female count vs severity
plt.scatter(df_selected.index, df_selected["Male"], color='blue', alpha=0.6)
# plt.plot(df_selected['Severity'], regression_line, color='red', label='Regression Line')

# Labels and title
plt.xlabel("Severity Level")
plt.ylabel("Male Count")
plt.title("Male Count vs Severity Level")

# Show the plot
plt.show()

In [None]:
#correlation calculation for female/male vs severity
correlation, p_value = pearsonr(df_selected["Male"], df_selected["Female"])

# Display results
print(f"Pearson correlation coefficient: {correlation:.4f}")
print(f"P-value: {p_value:.4f}")

In [None]:
#scatter of both genders combined

# Scatter plot for Female
plt.scatter(df_selected.index, df_selected["Female"], color='red', alpha=0.6, label="Female")

# Scatter plot for Male
plt.scatter(df_selected.index, df_selected["Male"], color='blue', alpha=0.6, label="Male")

# Labels and title
plt.xlabel("Severity Level")
plt.ylabel("Count")
plt.title("Male & Female Count vs Severity Level")

# Add legend
plt.legend()

# Show the plot
plt.show()

In [None]:
#severity and gender bar graph (removed other gender category)
df_selected.plot(kind='bar', figsize=(5.8, 5)) #creating bar plot

# Add titles and labels
plt.title('Severity vs Gender Count')
plt.xlabel('Severity Level')
# plt.xticks(rotation=90)
plt.ylabel('Persons')

#plotting
plt.tight_layout()
plt.show()

In [None]:
#line plot of severity and gender count
df_selected.plot(kind="line", figsize=(8, 5))

# Customize the plot
plt.xlabel("Severity Level")
plt.ylabel("Persons")
plt.title("Severity Levels vs. Gender Count")
plt.legend(title="Gender")
plt.grid(True)

# Show the plot
plt.show()

### Occupation and Severity

In [None]:
#severity and occcupation
occupation_df = anxiety_df.groupby(["Severity", "Occupation"]).size().unstack(fill_value=0)

# Display result
occupation_df
# print(occupation_df)

In [None]:
#scatter plot
occupation_df.plot(kind="line", figsize=(8, 5))

# Customize the plot
plt.xlabel("Severity Level")
plt.ylabel("Persons Count")
plt.title("Severity Levels vs. Occcupation")
plt.legend(title="Occupation")
plt.grid(True)

# Show the plot
plt.show()

## Correlations

### Gender and Age

In [None]:
# merging the age and gender df to compare the two categories + setting severity as index
merged_df = pd.merge(age_df, df_selected, on='Severity').set_index('Severity')

#sorting in descending order on Severity
merged_df.sort_values(by=['Severity'], ascending=[True], inplace=True)

merged_df.head()
# print(merged_df)

In [None]:
organized_anxiety_df.head()

In [None]:
#correlation calculation for female/male vs severity
correlation, p_value = pearsonr(merged_df["Male"], merged_df["Female"])

# Display results
print(f"Pearson correlation coefficient: {correlation:.4f}")
print(f"P-value: {p_value:.4f}")

Conclussion for above p-value of 0.0010
This data suggests there is no true correlation between genderrs and the severity of the anxiety attacks.