<div style="text-align: center; background: linear-gradient(135deg, #0b3d2e, #1e8449, #27ae60); color: white; padding: 100px 50px; border-radius: 15px; font-family: 'Segoe UI', 'Helvetica Neue', 'Roboto', sans-serif;">
<h1 style="font-size: 3.5em; margin-bottom: 20px; text-shadow: 2px 2px 4px rgba(0,0,0,0.5); font-family: 'Montserrat', 'Segoe UI', sans-serif; font-weight: 700; letter-spacing: -1px;">Autism Screening Analysis</h1>
<h2 style="font-size: 1.8em; margin-top: 30px; font-weight: 300; opacity: 0.9; font-family: 'Open Sans', 'Segoe UI', sans-serif;">Seaborn and Matplotlib</h2>
<div style="margin-top: 50px; font-size: 1.2em; opacity: 0.8; font-family: 'Source Sans Pro', 'Segoe UI', sans-serif; font-weight: 400;">Dataset: autism_screening.csv</div>
</div>


In [6]:
# Robust setup: ensure seaborn present, set theme, load data safely
import sys, subprocess, os

# Ensure seaborn is installed in this kernel

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

# Green/white theme (valid rcParams)
sns.set_theme(style='darkgrid')
purple = '#27AE60'  # keep variable name used later, but set to green
white = '#FFFFFF'

plt.rcParams.update({
    'figure.facecolor': '#111111',
    'axes.facecolor': '#111111',
    'axes.edgecolor': white,
    'axes.labelcolor': white,
    'xtick.color': white,
    'ytick.color': white,
    'text.color': white,
    'grid.color': '#FFFFFF',
    'grid.alpha': 0.15
})

# Load CSV using absolute path to avoid working-dir issues
csv_path = "/Users/rohanmalhotra/Desktop/VS Code/Old Projects/BAC/autism_screening.csv"
raw = pd.read_csv(csv_path)

# Cleaning helpers
def to_num(x):
    try:
        return float(x)
    except Exception:
        return np.nan


def norm_yes_no(x):
    if pd.isna(x):
        return np.nan
    v = str(x).strip().lower()
    if v in {"y","yes","true","1"}: return "YES"
    if v in {"n","no","false","0"}: return "NO"
    return str(x)


def map_gender(g):
    if pd.isna(g): return 'Other'
    v = str(g).strip().lower()
    if v == 'm': return 'Male'
    if v == 'f': return 'Female'
    return 'Other'

# Apply cleaning
df = raw.copy()
df['result'] = df['result'].apply(to_num)
df['age'] = df['age'].apply(to_num)
df['ASD'] = df['Class/ASD'].apply(norm_yes_no)
df['gender_clean'] = df['gender'].apply(map_gender)
plot_df = df.dropna(subset=['result','age','ASD'])

print(f"Loaded rows: {len(plot_df)} (clean)")


ModuleNotFoundError: No module named 'seaborn'

# Our Analysis Overview

<div style="display: flex; justify-content: space-around; margin: 50px 0; text-align: center;">
<div style="background: linear-gradient(135deg, #0B5345, #117A65); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">AQ Scores</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Overall AQ score distribution and separation between ASD classes.</p>
</div>

<div style="background: linear-gradient(135deg, #1E8449, #27AE60); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Demographic Profile</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Age and gender distributions and how they relate to ASD labels.</p>
</div>

<div style="background: linear-gradient(135deg, #145A32, #1E8449); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Key Findings</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Practical takeaways to inform screening analysis and interpretation.</p>
</div>
</div>


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import warnings

warnings.filterwarnings('ignore')

# Green/White theme
sns.set_theme(style='darkgrid')
green = '#327ae6'
white = '#FFFFFF'

plt.rcParams.update({
    'figure.facecolor': '#111111',
    'axes.facecolor': '#111111',
    'axes.edgecolor': '#FFFFFF',
    'axes.labelcolor': '#FFFFFF',
    'xtick.color': '#FFFFFF',
    'ytick.color': '#FFFFFF',
    'text.color': '#FFFFFF',
    'grid.color': '#FFFFFF',   # color only
    'grid.alpha': 0.15         # transparency here
})

# Load and clean
raw = pd.read_csv('../autism_screening.csv')

def to_num(x):
    try:
        return float(x)
    except Exception:
        return np.nan


def norm_yes_no(x):
    if pd.isna(x):
        return np.nan
    v = str(x).strip().lower()
    if v in {"y","yes","true","1"}: return "YES"
    if v in {"n","no","false","0"}: return "NO"
    return str(x)


def map_gender(g):
    if pd.isna(g): return 'Other'
    v = str(g).strip().lower()
    if v == 'm': return 'Male'
    if v == 'f': return 'Female'
    return 'Other'


df = raw.copy()
df['result'] = df['result'].apply(to_num)
df['age'] = df['age'].apply(to_num)
df['ASD'] = df['Class/ASD'].apply(norm_yes_no)
df['gender_clean'] = df['gender'].apply(map_gender)
plot_df = df.dropna(subset=['result','age','ASD'])

plot_df.head()


In [None]:
# 1) Score distribution by ASD class (overlayed KDE + hist)
fig, ax = plt.subplots(figsize=(12,6), dpi=120)

bins = MaxNLocator(nbins='auto').tick_values(plot_df['result'].min(), plot_df['result'].max())

sns.histplot(data=plot_df[plot_df['ASD']=='NO'], x='result', bins=20, color=white, alpha=0.6, label='ASD=NO', ax=ax)
sns.histplot(data=plot_df[plot_df['ASD']=='YES'], x='result', bins=20, color=green, alpha=0.6, label='ASD=YES', ax=ax)

sns.kdeplot(data=plot_df[plot_df['ASD']=='NO'], x='result', color=white, linewidth=2, ax=ax)
sns.kdeplot(data=plot_df[plot_df['ASD']=='YES'], x='result', color=green, linewidth=2, ax=ax)

ax.set_title('AQ Score Distribution by ASD Class', fontsize=18, color=white)
ax.set_xlabel('Score')
ax.set_ylabel('Count')
ax.legend(facecolor='#111111', edgecolor=white)
plt.show()


In [None]:
# 2) Age vs Score scatter (colored by ASD)
fig, ax = plt.subplots(figsize=(12,6), dpi=120)

ax.scatter(plot_df.loc[plot_df['ASD']=='NO','age'], plot_df.loc[plot_df['ASD']=='NO','result'],
           s=30, c=white, edgecolors=green, linewidths=1, alpha=0.85, label='ASD=NO')
ax.scatter(plot_df.loc[plot_df['ASD']=='YES','age'], plot_df.loc[plot_df['ASD']=='YES','result'],
           s=30, c=green, edgecolors=white, linewidths=1, alpha=0.85, label='ASD=YES')

ax.set_title('Age vs Score by ASD Class', fontsize=18, color=white)
ax.set_xlabel('Age')
ax.set_ylabel('Score')
ax.legend(facecolor='#111111', edgecolor=white)
plt.show()


In [None]:
# 3) Gender distribution by ASD class (grouped bar)
fig, ax = plt.subplots(figsize=(12,6), dpi=120)

cats = ['Male','Female','Other']
no_counts = plot_df.loc[plot_df['ASD']=='NO','gender_clean'].value_counts().reindex(cats, fill_value=0)
yes_counts = plot_df.loc[plot_df['ASD']=='YES','gender_clean'].value_counts().reindex(cats, fill_value=0)

x = np.arange(len(cats))
bar_w = 0.35

ax.bar(x - bar_w/2, no_counts.values, width=bar_w, color=white, edgecolor=green, label='ASD=NO', alpha=0.9)
ax.bar(x + bar_w/2, yes_counts.values, width=bar_w, color=green, edgecolor=white, label='ASD=YES', alpha=0.9)

ax.set_xticks(x)
ax.set_xticklabels(cats)
ax.set_title('Gender Distribution by ASD Class', fontsize=18, color=white)
ax.set_xlabel('Gender')
ax.set_ylabel('Count')
ax.legend(facecolor='#111111', edgecolor=white)
plt.show()
