# Interactive Altair Plots

In [72]:
import pandas as pd
import altair as alt
import plotly.express as px

# Preprocessing

In [76]:
# data cleaning
df = pd.read_csv('panic_attack_dataset.csv')
pd.set_option('display.max_rows', None)

missing_vals = df.isnull().sum()
df = df.dropna()

# convert categorical data to numerical (Yes/No -> 1/0)
yes_no = ["Sweating", "Shortness_of_Breath", "Dizziness", "Trembling", "Smoking", "Therapy", "Medication", "Chest_Pain"]
df[yes_no] = df[yes_no].map(lambda x: 1 if x =="Yes" else 0) # assigns 1s to yes val and 0 to no

# removing non-binary individuals, making male=1, female=0
df = df[df["Gender"] != "Non-binary"].copy()
df.loc[:, "Gender"] = df["Gender"].map({"Female": 0, "Male": 1})
df = df.reset_index(drop=True)

numerical = ["Heart_Rate", "Caffeine_Intake", "Exercise_Frequency", "Sleep_Hours", "Alcohol_Consumption"]

# function for classifying panic severity
def panic_severity(score):
    if score <=3:
        return "Low"
    elif 4 <= score <= 6:
        return "Medium"
    else:
        return "High"

# applying function to df
df.loc[:, "Panic_Severity"] = df["Panic_Score"].apply(panic_severity)


df = df.drop(columns=["ID"])

# symptom cols
symptom_cols = ["Sweating", "Shortness_of_Breath", "Dizziness", "Chest_Pain", "Trembling"]

# convert 1/0 to Yes/No just for this plot 
for col in symptom_cols:
    if df[col].dtype != 'object':
        df[col] = df[col].map({1: "Yes", 0: "No"})

        # melt symptom columns to long format
symptom_df = df.melt(
    id_vars=["Panic_Severity"],
    value_vars=symptom_cols,
    var_name="Category",
    value_name="Value")


# keep only 'Yes' symptoms
symptom_df = symptom_df[symptom_df["Value"] == "Yes"]
symptom_df["Type"] = "Symptom"
symptom_df["Label"] = symptom_df["Category"]  # just show symptom name


# lifestyle processing 
# bin lifestyle columns
df["Caffeine"] = pd.cut(df["Caffeine_Intake"], bins=[-1, 2, 4, 7, 10], labels=["None", "Light", "High", "Very High"])
df["Alcohol"] = pd.cut(df["Alcohol_Consumption"], bins=[-1, 2, 4, 7, 10], labels=["None", "Light", "High", "Very_High"])
df["Sleep"] = pd.cut(df["Sleep_Hours"], bins=[-1, 6, 8, 24], labels=["Low", "Normal", "High"])

df = df.dropna(subset=["Caffeine", "Alcohol", "Sleep"])

# melt lifestyle columns to long format
lifestyle_df = df.melt(
    id_vars=["Panic_Severity"],
    value_vars=["Caffeine", "Alcohol", "Sleep"],
    var_name="Category",
    value_name="Value")

lifestyle_df["Type"] = "Lifestyle"
lifestyle_df["Label"] = lifestyle_df["Category"] + ": " + lifestyle_df["Value"].astype(str)


# combine 
combined_df = pd.concat([symptom_df, lifestyle_df], ignore_index=True)

# radio toggling for symptom vs lifestyle
type_radio = alt.binding_radio(options=["Symptom", "Lifestyle"], name="View:")
type_selector = alt.param(bind=type_radio, name="TypeSelector", value="Symptom")


# dropdown for Panic_Severity
severity_dropdown = alt.binding_select(options=["Low", "Medium", "High"], name="Panic Severity:")
severity_selector = alt.param(bind=severity_dropdown, name="SeveritySelector", value="Low")

In [77]:
df.head()

Unnamed: 0,Age,Gender,Panic_Attack_Frequency,Duration_Minutes,Trigger,Heart_Rate,Sweating,Shortness_of_Breath,Dizziness,Chest_Pain,...,Exercise_Frequency,Sleep_Hours,Alcohol_Consumption,Smoking,Therapy,Panic_Score,Panic_Severity,Caffeine,Alcohol,Sleep
0,56,0,9,5,Caffeine,134,Yes,No,Yes,Yes,...,3,6.4,5,1,1,5,Medium,,High,Normal
1,46,1,8,9,Stress,139,Yes,Yes,No,No,...,5,5.0,3,0,1,7,High,,Light,Low
2,32,0,6,31,PTSD,141,No,Yes,Yes,No,...,0,8.3,8,0,1,7,High,Light,Very_High,High
3,60,1,5,20,Caffeine,109,Yes,Yes,No,No,...,3,5.3,8,0,0,1,Low,Light,Very_High,Low
4,38,1,0,44,Social Anxiety,154,Yes,Yes,Yes,No,...,4,4.8,6,0,0,8,High,,High,Low


# Visualizations

In [78]:
import plotly.express as px

# Step 1: Group and calculate counts
tree_df = combined_df.groupby(["Type", "Category", "Label", "Panic_Severity"]).size().reset_index(name="Count")
tree_df = tree_df.sort_values("Count", ascending=False)

# Step 2: Calculate total count per label path
tree_df['Label_Total'] = tree_df.groupby(["Type", "Category", "Label"])['Count'].transform('sum')

# Step 3: Compute percentage of total within each label group
tree_df['Percent'] = (tree_df['Count'] / tree_df['Label_Total'] * 100).round(1)

# Step 4: Color mapping
severity_color_map = {
    "Low": "#89CFF0",
    "Medium": "#1E81B0",
    "High": "#0d3b66"
}

# Step 5: Create treemap with raw severity labels (no % in box)
fig = px.treemap(
    tree_df,
    path=["Type", "Category", "Label", "Panic_Severity"],
    values="Count",
    color="Panic_Severity",
    color_discrete_map=severity_color_map,
    title="Treemap of Symptoms and Lifestyle Factors by Panic Severity"
)

# Step 6: Layout tweaks
fig.update_layout(
    font=dict(family="Roboto Mono"),
    title=dict(
        text="Treemap of Symptoms and Lifestyle Factors by Panic Severity",
        x=0.5,
        font=dict(family="Roboto Mono", color="#0d3b66")
    ),
    paper_bgcolor="white",
    plot_bgcolor="white",
    margin=dict(t=60, l=20, r=20, b=20),
    uniformtext=dict(minsize=10, mode='show'),
    coloraxis_showscale=False
)

# Step 7: Add custom hovertemplate to show percentage
fig.update_traces(
    root_color="white",
    tiling=dict(pad=3),
    marker=dict(line=dict(color="white", width=1)),
    hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percent: %{customdata[0]}%<extra></extra>',
    customdata=tree_df[["Percent"]]
)

fig.show()
# fig.write_html("treemap_panic_severity.html")

# Panic Duration by Trigger

In [80]:
# Add "All" to the beginning of the sorted trigger list
trigger_options = ["All"] + sorted(df["Trigger"].unique())

# Set up dropdown with "All" as the default value
trigger_dropdown = alt.binding_select(options=trigger_options, name="Trigger:")
trigger_param = alt.param(bind=trigger_dropdown, name="TriggerSelect", value="All")

# Chart showing all severities, with conditional filtering
chart_all_severities = alt.Chart(df).mark_bar().encode(
    x=alt.X("Duration_Minutes:Q", bin=alt.Bin(maxbins=30), title="Duration (minutes)"),
    y=alt.Y("count():Q", title="Number of Cases"),
    color=alt.Color(
        "Panic_Severity:N",
        title="Panic Severity",
        scale=alt.Scale(
            domain=["Low", "Medium", "High"],
            range=["#89CFF0", "#1E81B0", "#0d3b66"]
        )
    ),
    tooltip=["count()", "Panic_Severity"]
).transform_filter(
    # Show all if 'All' selected, otherwise filter
    (alt.datum.Trigger == trigger_param) | (trigger_param == "All")
).add_params(
    trigger_param
).properties(
    title="Panic Duration Distribution by Trigger",
    width=600,
    height=400
)

chart_all_severities
#chart_all_severities.save("panic_duration_by_trigger.html")

# How sleep affects panic score

In [21]:
slider = alt.binding_range(min=4, max=10, step=0.5, name='Min Sleep Hours:')
sleep_param = alt.param(bind=slider, value=5)

# Define a custom color scale with blue shades
blue_shades = ['#89cff0', '#145DA0', '#1E81B0', '#279AF1', '#89CFF0']

chart_sleep = alt.Chart(df).mark_circle(size=60, opacity=0.6).encode(
    x=alt.X("Sleep_Hours:Q", title="Sleep Hours", scale=alt.Scale(domain=[4, 9.5])),
    y="Panic_Score:Q",
    color=alt.Color("Panic_Severity:N", scale=alt.Scale(range=blue_shades)),
    tooltip=["Sleep_Hours", "Panic_Score", "Panic_Severity"]
).add_params(sleep_param).transform_filter(
    alt.datum.Sleep_Hours >= sleep_param
).properties(
    title="How Does Sleep Affect Panic Score?"
)

chart_sleep

In [81]:
import pandas as pd
import altair as alt

# Bin alcohol into 3 levels
df["Alcohol"] = pd.cut(
    df["Alcohol_Consumption"],
    bins=[-1, 2, 5, 10],  # Bins: None (≤2), Light (2–5], High (>5)
    labels=["None", "Light", "High"]
)

# Drop rows with missing key values
df = df.dropna(subset=["Alcohol", "Sleep_Hours", "Panic_Severity"])

# Boxplot (no tooltip needed, so we just omit it)
box = alt.Chart(df).mark_boxplot(extent='min-max').encode(
    x=alt.X("Sleep_Hours:Q", title="Sleep Hours", scale=alt.Scale(domain=[4, 9.5])),
    y=alt.Y("Alcohol:N", sort=["None", "Light", "High"],
            axis=alt.Axis(title=None)),  # Hide y-axis title from repeating
    color=alt.Color("Panic_Severity:N", legend=None)
)

# Facet by Panic Severity
chart = box.facet(
    row=alt.Row("Panic_Severity:N", title=None)
).resolve_axis(
    y='shared'
).properties(
    title="Sleep Hours by Alcohol Consumption and Panic Severity (3-Level Binning)",
    bounds="flush",
    spacing=10
)

chart