<div style="text-align: center; background: linear-gradient(135deg, #2b1055, #6C3483, #8E44AD); color: white; padding: 100px 50px; border-radius: 15px; font-family: 'Segoe UI', 'Helvetica Neue', 'Roboto', sans-serif;">
<h1 style="font-size: 3.5em; margin-bottom: 20px; text-shadow: 2px 2px 4px rgba(0,0,0,0.5); font-family: 'Montserrat', 'Segoe UI', sans-serif; font-weight: 700; letter-spacing: -1px;">Autism Screening Analysis</h1>
<h2 style="font-size: 1.8em; margin-top: 30px; font-weight: 300; opacity: 0.9; font-family: 'Open Sans', 'Segoe UI', sans-serif;">Exploring AQ Scores, Demographics, and ASD Labels</h2>
<div style="margin-top: 50px; font-size: 1.2em; opacity: 0.8; font-family: 'Source Sans Pro', 'Segoe UI', sans-serif; font-weight: 400;">Dataset: autism_screening.csv</div>
</div>


# Our Analysis Overview

<div style="display: flex; justify-content: space-around; margin: 50px 0; text-align: center;">
<div style="background: linear-gradient(135deg, #6C3483, #8E44AD); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">AQ Scores</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Overall AQ score distribution and separation between ASD classes.</p>
</div>

<div style="background: linear-gradient(135deg, #8E44AD, #AF7AC5); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Demographic Profile</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Age and gender distributions and how they relate to ASD labels.</p>
</div>

<div style="background: linear-gradient(135deg, #512E5F, #6C3483); padding: 40px 30px; border-radius: 15px; color: white; flex: 1; margin: 0 10px;">
<h2 style="font-size: 2.0em; margin-bottom: 20px; font-family: 'Poppins', 'Segoe UI', sans-serif; font-weight: 600; letter-spacing: 0.5px;">Key Findings</h2>
<p style="font-size: 1.1em; line-height: 1.4; font-family: 'Inter', 'Segoe UI', sans-serif; font-weight: 400;">Practical takeaways to inform screening analysis and interpretation.</p>
</div>
</div>


In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly, sys
print(plotly.__version__, plotly.__file__)
from plotly import graph_objects as go
import warnings
warnings.filterwarnings('ignore')

# Configure Plotly dark theme for slides
import plotly.io as pio
pio.templates.default = "plotly_dark"

# Load CSV
raw = pd.read_csv('../autism_screening.csv')

# Basic cleaning and normalization

def to_num(x):
    try:
        return float(x)
    except Exception:
        return np.nan


def norm_yes_no(x):
    if pd.isna(x):
        return np.nan
    v = str(x).strip().lower()
    if v in {"y", "yes", "true", "1"}: return "YES"
    if v in {"n", "no", "false", "0"}: return "NO"
    return str(x)


df = raw.copy()
df['result'] = df['result'].apply(to_num)
df['age'] = df['age'].apply(to_num)
df['ASD'] = df['Class/ASD'].apply(norm_yes_no)

def map_gender(g):
    if pd.isna(g):
        return 'Other'
    v = str(g).strip().lower()
    if v == 'm':
        return 'Male'
    if v == 'f':
        return 'Female'
    return 'Other'


df['gender_clean'] = df['gender'].apply(map_gender)

# Filter sensible rows
plot_df = df.dropna(subset=['result', 'age', 'ASD'])

# Quick dataset summary
summary = plot_df[['result','age','ASD','gender_clean']].describe(include='all')
summary


In [None]:
# Score distribution by ASD class (overlaid histograms)
fig_hist = go.Figure()

fig_hist.add_trace(go.Histogram(
    x=plot_df.loc[plot_df['ASD'] == 'NO', 'result'],
    name='ASD=NO',
    opacity=0.8,
    marker_color='#FFFFFF'  # white
))
fig_hist.add_trace(go.Histogram(
    x=plot_df.loc[plot_df['ASD'] == 'YES', 'result'],
    name='ASD=YES',
    opacity=0.8,
    marker_color='#8E44AD'  # purple
))

fig_hist.update_layout(
    barmode='overlay',
    title=dict(text='AQ Score Distribution by ASD Class', font=dict(size=24), x=0.5),
    xaxis_title='Score',
    yaxis_title='Count',
    width=1000,
    height=600
)

fig_hist.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig_hist.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig_hist.show()


In [None]:
# Age vs Score scatter, colored by ASD
fig_scatter = go.Figure()

mask_no = plot_df['ASD'] == 'NO'
mask_yes = plot_df['ASD'] == 'YES'

fig_scatter.add_trace(go.Scatter(
    x=plot_df.loc[mask_no, 'age'],
    y=plot_df.loc[mask_no, 'result'],
    mode='markers',
    name='ASD=NO',
    marker=dict(size=8, color='#FFFFFF', line=dict(width=1, color='#8E44AD'), opacity=0.85)
))

fig_scatter.add_trace(go.Scatter(
    x=plot_df.loc[mask_yes, 'age'],
    y=plot_df.loc[mask_yes, 'result'],
    mode='markers',
    name='ASD=YES',
    marker=dict(size=8, color='#8E44AD', line=dict(width=1, color='white'), opacity=0.85)
))

fig_scatter.update_layout(
    title=dict(text='Age vs Score by ASD Class', font=dict(size=24), x=0.5),
    xaxis_title='Age',
    yaxis_title='Score',
    width=1000,
    height=600
)

fig_scatter.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig_scatter.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig_scatter.show()


In [None]:
# Gender distribution by ASD class (grouped bar)
genders = ['Male', 'Female', 'Other']

counts_no = plot_df.loc[plot_df['ASD']=='NO', 'gender_clean'].value_counts()
counts_yes = plot_df.loc[plot_df['ASD']=='YES', 'gender_clean'].value_counts()

def get_counts(series, order):
    return [int(series.get(g, 0)) for g in order]

fig_gender = go.Figure()
fig_gender.add_trace(go.Bar(x=genders, y=get_counts(counts_no, genders), name='ASD=NO', marker_color='#FFFFFF'))
fig_gender.add_trace(go.Bar(x=genders, y=get_counts(counts_yes, genders), name='ASD=YES', marker_color='#8E44AD'))

fig_gender.update_layout(
    barmode='group',
    title=dict(text='Gender Distribution by ASD Class', font=dict(size=24), x=0.5),
    xaxis_title='Gender',
    yaxis_title='Count',
    width=1000,
    height=600
)

fig_gender.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')
fig_gender.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(255,255,255,0.1)')

fig_gender.show()


**Key Findings:**
- AQ scores are generally higher among ASD=YES respondents.
- Age spans adult ranges without a single dominant cluster.
- Gender proportions differ by class; treat with caution for dataset skews.
