## VI LAB 2 

## Data collection and preparation

For this project, the data was obtained directly from the NSF Award Search portal, which is the official source used by the National Science Foundation to publish information about funded grants (referred to administratively as “awards”). This source was chosen because it provides all the attributes required by the project specification, including state, directorate, award dates, and awarded amounts, and because it allows filtering by both time period and award status.

Two datasets were collected to fully satisfy the project requirements. The first dataset contains all NSF grants awarded during the last five years (2020–2024) and serves as the baseline for analyzing current funding distribution and evolution. The second dataset contains NSF grants that were explicitly terminated during the Trump administration (2017–2021), filtered using the “terminated” award status. This separation is intentional and necessary, as the project explicitly requires analyzing both recent grants and historical cancellations from a different political period.

Due to export limitations of the NSF portal, the 2020–2024 dataset was downloaded in multiple smaller time ranges and later merged. This approach ensured complete coverage while preserving data integrity and consistency.

## Data cleaning

All major data cleaning was performed in OpenRefine to keep the Python notebook focused on visualization rather than preprocessing. In OpenRefine, column names were standardized across datasets, unnecessary administrative fields were removed, and monetary values were converted to numeric format. A derived year attribute was created from the award start date to support temporal analysis. Additionally, a categorical flag (cancelled_trump) was introduced to clearly distinguish between baseline grants and Trump-era terminated grants.

After cleaning, the datasets were exported as clean CSV files and loaded into the Python notebook. Only minimal preprocessing was performed in Python, consisting of type checks, column name normalization, and the creation of aggregated DataFrames for each visualization task.

In [4]:
import pandas as pd
import altair as alt

# Performance: required by the project (datasets > 5000 rows)
alt.data_transformers.enable("vegafusion")


DataTransformerRegistry.enable('vegafusion')

In [5]:
# load datasets

base_path = "."

df_grants = pd.read_csv(
    f"{base_path}/NSF_Grants_Last5Years_Clean.csv"
)

df_trump = pd.read_csv(
    f"{base_path}/trump17-21-csv.csv"
)


In [6]:
# Ensure correct dtypes
df_grants["year"] = df_grants["year"].astype(int)
df_grants["award_amount"] = pd.to_numeric(df_grants["award_amount"], errors="coerce")

df_trump["year"] = df_trump["year"].astype(int)
df_trump["award_amount"] = pd.to_numeric(df_trump["award_amount"], errors="coerce")


In [7]:
# Drop rows with critical missing values
df_grants = df_grants.dropna(subset=["state", "directorate", "year"])
df_trump = df_trump.dropna(subset=["directorate"])


In [8]:
year_selection = alt.selection_point(
    fields=["year"],
    bind=alt.binding_select(
        options=sorted(df_grants["year"].unique()),
        name="Year: "
    ),
    value=sorted(df_grants["year"].unique())[0]
)

state_selection = alt.selection_point(
    fields=["state"],
    bind=alt.binding_select(
        options=sorted(df_grants["state"].unique()),
        name="State: "
    )
)


## Q1: How are the grants distributed by states every year?

In [9]:
import altair as alt
import pandas as pd

# 0. ENSURE RENDERER IS CORRECT (Fixes the error)
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q1_df = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. CREATE SELECTIONS
# Define the missing year_selection
year_selection = alt.selection_point(
    fields=["year"],
    bind=alt.binding_select(
        options=sorted(q1_df["year"].unique()), name="Select Year: "
    ),
    value=[{"year": 2021}],  
)

state_click = alt.selection_point(fields=["state"], empty="all")

# 3. CREATE CHARTS
q1_bars = (
    alt.Chart(q1_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Number of grants"),
        color=alt.condition(
            state_click,
            alt.Color(
                "grants_count:Q", scale=alt.Scale(scheme="blues"), title="Grants count"
            ),
            alt.value("lightgray"),
        ),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total amount ($)", format=",.0f"),
        ],
    )
    .add_params(year_selection, state_click)
    .transform_filter(year_selection)
    .properties(width=750, height=380, title="Q1 — Grants by State")
)

q1_state_trend = (
    alt.Chart(q1_df)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("grants_count:Q", title="Grants"),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total amount ($)", format=",.0f"),
        ],
    )
    .transform_filter(state_click)
    .properties(width=750, height=180, title="Selected State — Grants over Time")
)

# 4. DISPLAY
(q1_bars & q1_state_trend)

In [10]:
import altair as alt
import pandas as pd
from vega_datasets import data

# 0. FIX THE RENDERER (Crucial for avoiding errors)
alt.data_transformers.enable("default")

# 1. PREPARE THE DATA
# We ensure the aggregation is correct
q1_df = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. DEFINE THE MAP DATA
# We need to map State Abbreviations (AK, AL) to FIPS Codes (02, 01) for the map to work.
state_to_fips = {
    "WA": "53",
    "DE": "10",
    "DC": "11",
    "WI": "55",
    "WV": "54",
    "HI": "15",
    "FL": "12",
    "WY": "56",
    "PR": "72",
    "NJ": "34",
    "NM": "35",
    "TX": "48",
    "LA": "22",
    "NC": "37",
    "ND": "38",
    "NE": "31",
    "TN": "47",
    "NY": "36",
    "PA": "42",
    "AK": "02",
    "NV": "32",
    "NH": "33",
    "VA": "51",
    "CO": "08",
    "CA": "06",
    "AL": "01",
    "AR": "05",
    "VT": "50",
    "IL": "17",
    "GA": "13",
    "IN": "18",
    "IA": "19",
    "MA": "25",
    "AZ": "04",
    "ID": "16",
    "CT": "09",
    "ME": "23",
    "MD": "24",
    "OK": "40",
    "OH": "39",
    "UT": "49",
    "MO": "29",
    "MN": "27",
    "MI": "26",
    "RI": "44",
    "KS": "20",
    "MT": "30",
    "MS": "28",
    "SC": "45",
    "KY": "21",
    "OR": "41",
    "SD": "46",
}

# Create a lookup dataframe
fips_df = pd.DataFrame(list(state_to_fips.items()), columns=["state", "id"])
fips_df["id"] = fips_df["id"].astype(int)  # Ensure ID is integer to match topojson

# 3. AGGREGATE DATA BY STATE FIRST (for the map - sum across all years)
# This gives us one row per state with total funding
q1_map_agg = (
    q1_df.groupby("state")
    .agg(total_amount=("total_amount", "sum"))
    .reset_index()
)

# Merge FIPS IDs into aggregated data
q1_map_data_agg = q1_map_agg.merge(fips_df, on="state", how="inner")

# Keep the full data with FIPS for the trend chart
q1_map_data_full = q1_df.merge(fips_df, on="state", how="inner")

# 4. CREATE THE INTERACTION (Compatible with Altair 4 & 5)
# "Clicking a state selects it"
try:
    # Try new syntax first
    state_select = alt.selection_point(fields=["id"], empty="all")
except AttributeError:
    # Fallback for older Altair versions
    state_select = alt.selection_single(fields=["id"], empty="all")

# 5. DRAW THE MAP (Overview)
# We use a standard US map topology
us_states = alt.topo_feature(data.us_10m.url, "states")

map_chart = (
    alt.Chart(us_states)
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(q1_map_data_agg, "id", ["total_amount", "state"]),
        default=0  # Default value for states without data
    )
    .encode(
        color=alt.Color(
            "total_amount:Q", 
            scale=alt.Scale(scheme="blues"), 
            title="Total Funding ($)",
            legend=alt.Legend(format="$,.0f")
        ),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("total_amount:Q", title="Total Funding", format="$,.0f")
        ],
    )
    .add_params(state_select)
    .project(type="albersUsa")
    .properties(
        width=700, height=400, title="Q1: Funding by State (Click to Filter Trend)"
    )
)

# 6. DRAW THE TREND LINE (Detail)
# Shows the trend for the SELECTED state
# Filter the trend data based on the selected state's id
trend_chart = (
    alt.Chart(q1_map_data_full)
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total Amount ($)", axis=alt.Axis(format="$,.0f")),
        color=alt.value("lightblue"),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("total_amount:Q", title="Total Amount", format="$,.0f")
        ],
    )
    .transform_filter(state_select)  # This will filter by the id field in the selection
    .add_params(state_select)
    .properties(width=700, height=200, title="Funding Trend for Selected State")
)

# 7. COMBINE
final_viz = map_chart & trend_chart
final_viz

In [11]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q1_df = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. CREATE SELECTIONS
min_year = int(q1_df["year"].min())
max_year = int(q1_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    # Modern Altair
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    state_select = alt.selection_point(
        name="state_select", fields=["state"], empty="all"
    )
except AttributeError:
    # Older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    state_select = alt.selection_single(
        name="state_select", fields=["state"], empty="all"
    )

# 3. CHART A: MAIN BAR CHART
bars = (
    alt.Chart(q1_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Number of Grants"),
        color=alt.condition(
            state_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),  # Very light gray for unselected
        ),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("grants_count:Q"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(year_select)
    .properties(width=550, height=400, title="Grants Distribution")
)

# 4. CHART B: KPI TEXT (Subtle & Professional)
# We layer two text marks: one for the label, one for the number
base_text = (
    alt.Chart(q1_df).transform_filter(year_select).transform_filter(state_select)
)

# Layer 1: The Label "Total Funding"
label = base_text.mark_text(
    align="center", color="#888", fontSize=14, dy=-15  # Light gray  # Move up slightly
).encode(text=alt.value("Total Funding"), y=alt.value(200), x=alt.value(100))

# Layer 2: The Value (The Number)
value = base_text.mark_text(
    align="center",
    color="#444",  # Darker gray (but not black)
    fontSize=24,  # Smaller than 40
    fontWeight="bold",
    dy=15,  # Move down slightly
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(200),
    x=alt.value(100),
)

kpi_section = (label + value).properties(width=200, height=400)

# 5. DISPLAY (Combine and Attach Slider)
# Attaching add_params to the final object puts the slider at the bottom
final_q1 = (
    (bars | kpi_section)
    .add_params(year_select, state_select)
    .resolve_scale(color="independent")
)

final_q1

In [57]:
import altair as alt
import pandas as pd
from vega_datasets import data

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q1_df = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. MAP DATA PREPARATION
state_to_fips = {
    "WA": "53",
    "DE": "10",
    "DC": "11",
    "WI": "55",
    "WV": "54",
    "HI": "15",
    "FL": "12",
    "WY": "56",
    "PR": "72",
    "NJ": "34",
    "NM": "35",
    "TX": "48",
    "LA": "22",
    "NC": "37",
    "ND": "38",
    "NE": "31",
    "TN": "47",
    "NY": "36",
    "PA": "42",
    "AK": "02",
    "NV": "32",
    "NH": "33",
    "VA": "51",
    "CO": "08",
    "CA": "06",
    "AL": "01",
    "AR": "05",
    "VT": "50",
    "IL": "17",
    "GA": "13",
    "IN": "18",
    "IA": "19",
    "MA": "25",
    "AZ": "04",
    "ID": "16",
    "CT": "09",
    "ME": "23",
    "MD": "24",
    "OK": "40",
    "OH": "39",
    "UT": "49",
    "MO": "29",
    "MN": "27",
    "MI": "26",
    "RI": "44",
    "KS": "20",
    "MT": "30",
    "MS": "28",
    "SC": "45",
    "KY": "21",
    "OR": "41",
    "SD": "46",
}
fips_df = pd.DataFrame(list(state_to_fips.items()), columns=["state", "id"])
fips_df["id"] = fips_df["id"].astype(int)
q1_full = q1_df.merge(fips_df, on="state", how="inner")

# 3. INTERACTION SETUP
min_year = int(q1_full["year"].min())
max_year = int(q1_full["year"].max())
slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    state_select = alt.selection_point(name="state_select", fields=["id"], empty="all")
except AttributeError:
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    state_select = alt.selection_single(name="state_select", fields=["id"], empty="all")

# 4. CHART A: THE MAP (Smaller & Left)
us_states = alt.topo_feature(data.us_10m.url, "states")

map_base = (
    alt.Chart(us_states)
    .mark_geoshape(fill="lightgray", stroke="white")
    .project(type="albersUsa")
)

map_layer = (
    alt.Chart(us_states)
    .mark_geoshape(stroke="white")
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(q1_full, "id", ["total_amount", "state", "year"]),
    )
    .transform_filter(year_select)
    .encode(
        color=alt.Color(
            "total_amount:Q", scale=alt.Scale(scheme="blues"), title="Funding ($)"
        ),
        tooltip=["state:N", alt.Tooltip("total_amount:Q", format="$,.0f")],
    )
    .project(type="albersUsa")
)

the_map = (map_base + map_layer).properties(
    width=300, height=350, title="Geographic Overview"  # REDUCED WIDTH (was 500)
)

# 5. CHART B: THE BAR CHART (Larger & Right)
the_bars = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("total_amount:Q", title="Total Funding ($)"),
        y=alt.Y("state:N", sort="-x", title="State"),
        color=alt.condition(
            state_select,
            alt.Color("total_amount:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("lightgray"),
        ),
        tooltip=["state", "total_amount"],
    )
    .transform_filter(year_select)
    .properties(
        width=400,  # INCREASED WIDTH (was 200)
        height=350,
        title="Ranked Funding by State",
    )
)

# 6. CHART C: THE EVOLUTION (Uniform Color)
the_trend = (
    alt.Chart(q1_full)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total Funding ($)"),
        color=alt.value("steelblue"),  # UNIFORM COLOR (Fixed Blue)
        tooltip=["state", "year", alt.Tooltip("total_amount", format="$,.0f")],
    )
    .transform_filter(state_select)
    .properties(
        width=750,  # Matches the sum of top charts approx (300+400 + padding)
        height=200,
        title="Evolution of Funding (Selected State)",
    )
)

# 7. FINAL DASHBOARD
final_dashboard = (
    ((the_bars | the_map) & the_trend)
    .add_params(year_select, state_select)
    .resolve_scale(color="independent")
)

final_dashboard

In [120]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
# We need detailed data for the trend (state + year), which you already have in q1_df
q1_df = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. CREATE SELECTIONS
min_year = int(q1_df["year"].min())
max_year = int(q1_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    # Modern Altair
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    state_select = alt.selection_point(
        name="state_select", fields=["state"], empty="all"
    )
except AttributeError:
    # Older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    state_select = alt.selection_single(
        name="state_select", fields=["state"], empty="all"
    )

# 3. LEFT COLUMN: MAIN BAR CHART
bars = (
    alt.Chart(q1_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Number of Grants"),
        color=alt.condition(
            state_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),  # Light gray for unselected
        ),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("grants_count:Q"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(year_select)
    .properties(width=500, height=400, title="Grants Distribution (Click a bar)")
)

# 4. RIGHT COLUMN: EVOLUTION + KPI

# A. Evolution Chart (Top Right)
# Shows the trend for the selected state over ALL years
trend_chart = (
    alt.Chart(q1_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),  # Ordinal to show specific years
        y=alt.Y("total_amount:Q", title="Total Amount ($)", axis=alt.Axis(format="~s")),
        color=alt.value("#4c78a8"),  # Blue to match the bar chart color scheme
        tooltip=[alt.Tooltip("year:O"), alt.Tooltip("total_amount:Q", format="$,.0f")],
    )
    .transform_filter(state_select)  # This is the magic: Filter by the click!
    .properties(
        width=250,
        height=180,  # Half the height of the main chart roughly
        title="History (Selected State)",
    )
)

# B. KPI Text (Bottom Right)
base_text = (
    alt.Chart(q1_df).transform_filter(year_select).transform_filter(state_select)
)

# Layer 1: Label
label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Funding (Selected Year)"), y=alt.value(75), x=alt.value(110)
)

# Layer 2: Value
value = base_text.mark_text(
    align="center",
    color="#444",
    fontSize=24,
    fontWeight="bold",
    dy=15,
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(75),  # Centered vertically in its box
    x=alt.value(110),  # Centered horizontally
)

kpi_section = (label + value).properties(width=225, height=155)

# 5. ASSEMBLE
# Right column is Trend on top of KPI
right_col = trend_chart & kpi_section

# Final is Bars on left of Right Column
final_q1 = (
    (bars | right_col)
    .add_params(year_select, state_select)
    .resolve_scale(color="independent")
)

final_q1

In [121]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION

# A. Yearly Data (Specific Years)
q1_yearly = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# B. Global Data (Year 0 = "All Time")
q1_total = (
    df_grants.groupby(["state"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total["year"] = 0  # Assign 0 for the aggregate

# C. Combine
q1_full = pd.concat([q1_yearly, q1_total], ignore_index=True)


# 2. INTERACTION SETUP
# Get list of years from data + Add 0
years = sorted(q1_yearly["year"].unique())
year_options = [0] + years
year_labels = ["All Years (Total)"] + [str(y) for y in years]

input_element = alt.binding_select(
    options=year_options, labels=year_labels, name="Select Year: "
)

try:
    # Modern Altair
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=input_element, value=[{"year": 0}]
    )
    state_select = alt.selection_point(
        name="state_select", fields=["state"], empty="all"
    )
except AttributeError:
    # Older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=input_element, init={"year": 0}
    )
    state_select = alt.selection_single(
        name="state_select", fields=["state"], empty="all"
    )


# 3. LEFT COLUMN: MAIN BAR CHART
# Uses q1_full to show either specific year stats or All-Time totals
bars = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Number of Grants"),
        color=alt.condition(
            state_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),  # Light gray for unselected
        ),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", format="$,.0f", title="Total Amount"),
        ],
    )
    .add_params(year_select, state_select)
    .transform_filter(year_select)  # <--- Listens to Dropdown
    .properties(width=500, height=400, title="Grants Distribution (Click a bar)")
)


# 4. RIGHT COLUMN: EVOLUTION + KPI

# A. Evolution Chart (Top Right)
# Uses q1_yearly ONLY (excludes Year 0 so it doesn't plot a weird point)
trend_chart = (
    alt.Chart(q1_yearly)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total Amount ($)", axis=alt.Axis(format="~s")),
        color=alt.value("#4c78a8"),
        tooltip=[alt.Tooltip("year:O"), alt.Tooltip("total_amount:Q", format="$,.0f")],
    )
    .transform_filter(state_select)  # Listens to Click
    .properties(
        width=250,
        height=180,
        title="History (Selected State)",
    )
)

# B. KPI Text (Bottom Right)
# Uses q1_full to show correct totals based on dropdown
base_text = (
    alt.Chart(q1_full).transform_filter(year_select).transform_filter(state_select)
)

# Layer 1: Label
label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Funding (Selected Year)"), y=alt.value(75), x=alt.value(110)
)

# Layer 2: Value
value = base_text.mark_text(
    align="center",
    color="#444",
    fontSize=24,
    fontWeight="bold",
    dy=15,
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(75),
    x=alt.value(110),
)

kpi_section = (label + value).properties(width=225, height=155)


# 5. ASSEMBLE
right_col = trend_chart & kpi_section

final_q1 = (
    (bars | right_col).resolve_scale(color="independent").configure_view(stroke=None)
)

final_q1

To analyze grant distribution by state, I implemented a **composite dashboard** centered on a sorted bar chart. A bar chart was chosen over a choropleth map for the primary view because it allows for precise ranking and direct comparison of grant magnitudes, which are often obscured by geography in map views.

The design follows Shneiderman’s mantra: the bars provide the **overview** for the selected year. The **filtering** mechanism (year slider) enables temporal exploration, allowing users to observe shifts in distribution over time. **Details-on-demand** are achieved through linking: clicking a specific state isolates it visually (using a "focus+context" gray/blue color scheme) and triggers the side panels.

The right-hand column adds critical context: the **trend line** reveals the selected state's 5-year funding trajectory (evolution), while the **KPI text** provides the precise financial figure ($), bridging the gap between abstract patterns and exact data. This structure answers the question by showing both the relative standing of states and their individual historical performance.

## Q2: How are the grants distributed per directorates? And for a certain year?

In [40]:
# Q2 aggregation: grants per directorate per year
q2_df = (
    df_grants
    .groupby(["directorate", "year"])
    .agg(
        grants_count=("award_id", "count"),
        total_amount=("award_amount", "sum")
    )
    .reset_index()
)

#q2_df.head()


In [16]:
dir_click = alt.selection_point(fields=["directorate"], empty="all")

In [17]:
q2_overview = (
    alt.Chart(q2_df)
    .mark_bar()
    .encode(
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        x=alt.X("grants_count:Q", title="Number of grants"),
        color=alt.condition(
            dir_click,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), title="Grants count"),
            alt.value("lightgray")
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total amount ($)", format=",.0f"),
        ],
    )
    .add_params(year_selection, dir_click)
    .transform_filter(year_selection)
    .properties(
        title="Q2 — Grants by Directorate (select a year + click a directorate)",
        width=750,
        height=420,
    )
)


In [18]:
q2_trend = (
    alt.Chart(q2_df)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("grants_count:Q", title="Number of grants"),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total amount ($)", format=",.0f"),
        ],
    )
    .transform_filter(dir_click)
    .properties(
        title="Selected directorate — grants over time",
        width=750,
        height=180,
    )
)


In [19]:
(q2_overview & q2_trend)


In [20]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q2_df = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. CREATE SELECTIONS
min_year = int(q2_df["year"].min())
max_year = int(q2_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    # Modern Altair
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    dir_select = alt.selection_point(
        name="dir_select", fields=["directorate"], empty="all"
    )
except AttributeError:
    # Older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    dir_select = alt.selection_single(
        name="dir_select", fields=["directorate"], empty="all"
    )

# 3. LEFT COLUMN: HORIZONTAL BARS (The Overview)
bars = (
    alt.Chart(q2_df)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Number of Grants"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="teals"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total Amount ($)", format=",.0f"),
        ],
    )
    .transform_filter(year_select)
    .properties(width=450, height=550, title="Grants by Directorate (Click to Filter)")
)

# 4. RIGHT COLUMN: TREND + KPI

# A. Trend Line (The History) - REMOVED 'PADDING' TO FIX ERROR
trend_chart = (
    alt.Chart(q2_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        color=alt.value("teal"),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(dir_select)
    .properties(
        width=300,
        height=200,
        title="Funding History (Selected Directorate)",
        # padding removed here
    )
)

# B. KPI Text (The Detail)
base_text = alt.Chart(q2_df).transform_filter(year_select).transform_filter(dir_select)

label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Funding (Selected Year)"), y=alt.value(75), x=alt.value(150)
)

value = base_text.mark_text(
    align="center", color="#444", fontSize=24, fontWeight="bold", dy=15
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(75),
    x=alt.value(150),
)

kpi_section = (label + value).properties(width=300, height=155)

# 5. ASSEMBLE
# We vertically stack Trend + KPI
right_col = trend_chart & kpi_section

# We horizontally stack Bars | Right Column
final_q2 = (
    (bars | right_col)
    .add_params(year_select, dir_select)
    .resolve_scale(color="independent")
)

final_q2

In [21]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q2_df = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# 2. CREATE SELECTIONS
min_year = int(q2_df["year"].min())
max_year = int(q2_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    dir_select = alt.selection_point(
        name="dir_select", fields=["directorate"], empty="all"
    )
except AttributeError:
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    dir_select = alt.selection_single(
        name="dir_select", fields=["directorate"], empty="all"
    )

# 3. LEFT COLUMN: HORIZONTAL BARS
bars = (
    alt.Chart(q2_df)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Number of Grants"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color(
                "grants_count:Q",
                scale=alt.Scale(scheme="blues"),
                legend=None,
            ),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total Amount ($)", format=",.0f"),
        ],
    )
    .transform_filter(year_select)
    .properties(width=450, height=550, title="Grants by Directorate (Click to Filter)")
)

# 4. RIGHT COLUMN COMPONENTS

# A. Spacer para crear gap superior
top_spacer = (
    alt.Chart(q2_df)
    .mark_rect()
    .encode(opacity=alt.value(0))
    .properties(width=300, height=30)
)

# B. Trend Line
trend_chart = (
    alt.Chart(q2_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        color=alt.value("#4c78a8"),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(dir_select)
    .properties(
        width=300,
        height=220,
        title="Funding History (Selected Directorate)",
    )
)

# C. KPI Text
base_text = alt.Chart(q2_df).transform_filter(year_select).transform_filter(dir_select)

label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Funding (Selected Year)"), y=alt.value(75), x=alt.value(100)
)

value = base_text.mark_text(
    align="center", color="#444", fontSize=24, fontWeight="bold", dy=15
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(75),
    x=alt.value(100),
)

kpi_section = (label + value).properties(width=300, height=100)

# D. Legend
legend_chart = (
    alt.Chart(q2_df)
    .mark_circle(opacity=0)
    .encode(
        color=alt.Color(
            "grants_count:Q",
            scale=alt.Scale(scheme="blues"),
            legend=alt.Legend(
                title="Grant Count Intensity",
                orient="bottom",
                direction="horizontal",
                titleAnchor="middle",
                gradientLength=200,
            ),
        )
    )
    .transform_filter(year_select)
    .properties(width=300, height=40)
)

# 5. ASSEMBLE
# Ahora incluimos el spacer al inicio
right_col = alt.vconcat(
    top_spacer,
    trend_chart,
    kpi_section,
    legend_chart,
    spacing=5,
)

# Final Assembly
final_q2 = (
    (bars | right_col)
    .add_params(year_select, dir_select)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=10)
)

final_q2

In [118]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION

# A. Yearly Data (Specific Years)
q2_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# B. Global Data (Year 0 = "All Time")
q2_total = (
    df_grants.groupby(["directorate"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total["year"] = 0  # Assign 0 for the aggregate

# C. Combine
q2_full = pd.concat([q2_yearly, q2_total], ignore_index=True)


# 2. INTERACTION SETUP
# Get list of years from data + Add 0
years = sorted(q2_yearly["year"].unique())
year_options = [0] + years
year_labels = ["All Years (Total)"] + [str(y) for y in years]

input_element = alt.binding_select(
    options=year_options, labels=year_labels, name="Select Year: "
)

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=input_element, value=[{"year": 0}]
    )
    dir_select = alt.selection_point(
        name="dir_select", fields=["directorate"], empty="all"
    )
except AttributeError:
    # Older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=input_element, init={"year": 0}
    )
    dir_select = alt.selection_single(
        name="dir_select", fields=["directorate"], empty="all"
    )

# 3. LEFT COLUMN: HORIZONTAL BARS
# Uses q2_full so it can show Total or Yearly counts based on dropdown
bars = (
    alt.Chart(q2_full)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Number of Grants"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color(
                "grants_count:Q",
                scale=alt.Scale(scheme="blues"),
                legend=None,
            ),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("grants_count:Q", title="Grants"),
            alt.Tooltip("total_amount:Q", title="Total Amount ($)", format=",.0f"),
        ],
    )
    .add_params(dir_select, year_select)
    .transform_filter(year_select)  # <--- Filter by Dropdown
    .properties(width=450, height=550, title="Grants by Directorate (Click to Filter)")
)

# 4. RIGHT COLUMN COMPONENTS

# A. Spacer
top_spacer = (
    alt.Chart(q2_full)
    .mark_rect()
    .encode(opacity=alt.value(0))
    .properties(width=300, height=30)
)

# B. Trend Line
# Uses q2_yearly (Excludes Year 0 so the line chart looks correct)
trend_chart = (
    alt.Chart(q2_yearly)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        color=alt.value("#4c78a8"),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(dir_select)
    .properties(
        width=300,
        height=220,
        title="Funding History (Selected Directorate)",
    )
)

# C. KPI Text
# Uses q2_full to show correct totals based on dropdown
base_text = (
    alt.Chart(q2_full).transform_filter(year_select).transform_filter(dir_select)
)

label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Funding (Selected Year)"), y=alt.value(75), x=alt.value(100)
)

value = base_text.mark_text(
    align="center", color="#444", fontSize=24, fontWeight="bold", dy=15
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(75),
    x=alt.value(100),
)

kpi_section = (label + value).properties(width=300, height=100)

# D. Legend
legend_chart = (
    alt.Chart(q2_full)
    .mark_circle(opacity=0)
    .encode(
        color=alt.Color(
            "grants_count:Q",
            scale=alt.Scale(scheme="blues"),
            legend=alt.Legend(
                title="Grant Count Intensity",
                orient="bottom",
                direction="horizontal",
                titleAnchor="middle",
                gradientLength=200,
            ),
        )
    )
    .transform_filter(year_select)
    .properties(width=300, height=40)
)

# 5. ASSEMBLE
right_col = alt.vconcat(
    top_spacer,
    trend_chart,
    kpi_section,
    legend_chart,
    spacing=5,
)

final_q2 = (
    (bars | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=10)
)

final_q2

o analyze grant distribution across the 47+ NSF directorates, I designed a **composite dashboard** centered on a sorted horizontal bar chart. This provides a clear 'Leaderboard' of funding volume, which is essential for comparing such a large number of categories.

Addressing the need to see data 'for a certain year,' I implemented a **Dropdown Selector** that allows users to instantly toggle between a global 'All-Time' summary and specific fiscal years. This satisfies the multi-level granularity requirement without cluttering the interface.

Adhering to the **Details-on-Demand** principle, clicking a directorate reveals its specific historical context in the side panel: a **Trend Line** showing funding evolution over the last 5 years and a **KPI Text** displaying the exact dollar amount for the selected timeframe. This separation ensures the main view remains an uncluttered overview while providing deep-dive data when needed.

## Q3: Are the cancelled grants especially hitting a certain directorate?

In [22]:
import pandas as pd
import altair as alt

# --- Aggregations ---
q3_cancel_df = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

q3_base_df = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)

q3_df = q3_base_df.merge(q3_cancel_df, on="directorate", how="outer").fillna(0)

q3_df["cancel_rate"] = (
    q3_df["cancelled_count"] / q3_df["base_count"].replace(0, pd.NA)
).fillna(0)

# ✅ IMPORTANT: for the scatter (especially if you use log), remove base_count == 0
q3_scatter_df = q3_df[(q3_df["base_count"] > 0) & (q3_df["cancelled_count"] > 0)].copy()

# --- Selection ---
dir_sel = alt.selection_point(fields=["directorate"], empty="all")

# --- Scatter (overview) ---
# Option A (recommended): keep it LINEAR to match your expected "spread"
q3_scatter = (
    alt.Chart(q3_scatter_df)
    .mark_circle(opacity=0.8, stroke="black", strokeWidth=0.4)
    .encode(
        x=alt.X("base_count:Q", title="Baseline grants (last 5 years)"),
        y=alt.Y("cancelled_count:Q", title="Cancelled grants (Trump era)"),
        size=alt.Size("cancelled_amount:Q", title="Cancelled amount ($)", legend=None),
        color=alt.Color(
            "cancel_rate:Q",
            title="Cancellation rate",
            scale=alt.Scale(scheme="oranges"),
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("base_count:Q", title="Baseline grants"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled grants"),
            alt.Tooltip("cancel_rate:Q", title="Cancel rate", format=".2%"),
            alt.Tooltip(
                "cancelled_amount:Q", title="Cancelled amount ($)", format=",.0f"
            ),
        ],
    )
    .add_params(dir_sel)
    .properties(
        width=750,
        height=380,
        title="Q3 — Cancelled grants vs baseline distribution (by directorate)",
    )
)

# --- Bars ---
n_dirs = q3_df[q3_df["cancelled_count"] > 0]["directorate"].nunique()
rank_height = max(300, n_dirs * 18)

q3_bars = (
    alt.Chart(q3_df)
    .mark_bar()
    .encode(
        y=alt.Y(
            "directorate:N",
            sort="-x",
            title="Directorate",
            axis=alt.Axis(labelLimit=200),
        ),
        x=alt.X("cancelled_count:Q", title="Cancelled grants"),
        color=alt.condition(dir_sel, alt.value("#d95f02"), alt.value("lightgray")),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled grants"),
            alt.Tooltip("cancel_rate:Q", title="Cancel rate", format=".2%"),
        ],
    )
    .transform_filter(alt.datum.cancelled_count > 0)
    .add_params(dir_sel)
    .properties(
        width=750, height=rank_height, title="Cancelled grants ranking (click to focus)"
    )
)

# --- Trend (details on demand) ---
q3_cancel_by_year = (
    df_trump.groupby(["directorate", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

q3_trend = (
    alt.Chart(q3_cancel_by_year)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year (Trump era)"),
        y=alt.Y("cancelled_count:Q", title="Cancelled grants"),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled grants"),
            alt.Tooltip(
                "cancelled_amount:Q", title="Cancelled amount ($)", format=",.0f"
            ),
        ],
    )
    .transform_filter(dir_sel)
    .properties(
        width=750, height=180, title="Selected directorate — cancellations over time"
    )
)

(q3_scatter & q3_bars & q3_trend)

  ).fillna(0)


In [23]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
# A. Cancelled Stats (Trump Era)
q3_cancel_df = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# B. Base Stats (Total Grants)
q3_base_df = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)

# C. Merge & Calculate Rate
q3_df = q3_base_df.merge(q3_cancel_df, on="directorate", how="outer").fillna(0)
q3_df["cancel_rate"] = (
    q3_df["cancelled_count"] / q3_df["base_count"].replace(0, 1)
).fillna(0)

# Filter noise (must have at least 1 grant to be relevant)
q3_scatter_df = q3_df[(q3_df["base_count"] > 0)].copy()

# D. Trend Data (for the line chart)
q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)

# 2. CREATE SELECTION (Robust for older Altair)
try:
    dir_select = alt.selection_point(fields=["directorate"], empty="all")
except AttributeError:
    dir_select = alt.selection_single(fields=["directorate"], empty="all")

# 3. LEFT CHART: THE RADAR (Scatter)
# X = Volume (Log scale handles the huge range)
# Y = Intensity (Rate)
scatter = (
    alt.Chart(q3_scatter_df)
    .mark_circle(size=100, stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X(
            "base_count:Q",
            scale=alt.Scale(type="log"),
            title="Total Grants (Log Scale)",
        ),
        y=alt.Y("cancel_rate:Q", title="Cancellation Rate", axis=alt.Axis(format="%")),
        color=alt.condition(
            dir_select,
            alt.Color("cancel_rate:Q", scale=alt.Scale(scheme="reds"), legend=None),
            alt.value("#f0f0f0"),  # Turn gray if not clicked
        ),
        size=alt.Size("cancelled_amount:Q", title="Lost Funding ($)", legend=None),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("base_count:Q", title="Total Grants"),
            alt.Tooltip("cancel_rate:Q", title="Cancel Rate", format=".1%"),
            alt.Tooltip("cancelled_amount:Q", title="Lost Funding", format="$,.0f"),
        ],
    )
    .add_selection(dir_select)  # <--- Interaction Driver
    .properties(
        width=450, height=400, title="Q3: Cancellation Intensity (Rate vs Volume)"
    )
)

# Add a mean line for context
mean_rate = q3_scatter_df["cancel_rate"].mean()
rule = (
    alt.Chart(pd.DataFrame({"mean_rate": [mean_rate]}))
    .mark_rule(color="gray", strokeDash=[4, 4])
    .encode(y="mean_rate:Q")
)

left_chart = scatter + rule

# 4. RIGHT COLUMN COMPONENTS

# A. Spacer (Top margin)
top_spacer = (
    alt.Chart(q3_df)
    .mark_rect()
    .encode(opacity=alt.value(0))
    .properties(width=300, height=50)
)

# B. Trend Chart (Red)
trend_chart = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        color=alt.value("#d62728"),  # Standard Red
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(dir_select)
    .properties(width=300, height=200, title="Cancellation History (Selected)")
)

# C. KPI Text
base_text = alt.Chart(q3_df).transform_filter(dir_select)

label = base_text.mark_text(align="center", color="#888", fontSize=14, dy=-15).encode(
    text=alt.value("Total Lost Funding"), y=alt.value(60), x=alt.value(150)
)

value = base_text.mark_text(
    align="center", color="#333", fontSize=24, fontWeight="bold", dy=15
).encode(
    text=alt.Text("sum(cancelled_amount):Q", format="$,.0f"),
    y=alt.value(60),
    x=alt.value(150),
)

kpi_section = (label + value).properties(width=300, height=120)

# D. Legend (Dummy Chart)
legend_chart = (
    alt.Chart(q3_df)
    .mark_circle(opacity=0)
    .encode(
        color=alt.Color(
            "cancel_rate:Q",
            scale=alt.Scale(scheme="reds"),
            legend=alt.Legend(
                title="Cancellation Intensity (Rate)",
                orient="bottom",
                direction="horizontal",
                titleAnchor="middle",
                gradientLength=200,
            ),
        )
    )
    .properties(width=300, height=50)
)

# 5. ASSEMBLE
right_col = alt.vconcat(top_spacer, trend_chart, kpi_section, legend_chart, spacing=10)

final_q3 = (
    (left_chart | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)

final_q3

Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(dir_select)  # <--- Interaction Driver


In [111]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
# A. Cancelled Stats
q3_cancel_df = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# B. Base Stats
q3_base_df = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)

# C. Merge & Rate
q3_df = q3_base_df.merge(q3_cancel_df, on="directorate", how="outer").fillna(0)
q3_df["cancel_rate"] = (
    q3_df["cancelled_count"] / q3_df["base_count"].replace(0, 1)
).fillna(0)

# Filter for plotting (must have base grants)
q3_plot_df = q3_df[q3_df["base_count"] > 0].copy()

# D. Trend Data
q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)

# 2. SELECTION
try:
    dir_select = alt.selection_point(fields=["directorate"], empty="all")
except AttributeError:
    dir_select = alt.selection_single(fields=["directorate"], empty="all")

# 3. LEFT CHART: BAR CHART (Ranking)
bars = (
    alt.Chart(q3_plot_df)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Number of Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color("cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate",
            "cancelled_count",
            alt.Tooltip("cancelled_amount", format="$,.0f"),
        ],
    )
    .add_selection(dir_select)
    .properties(width=250, height=450, title="Ranking: Total Cancellations")
)

# 4. RIGHT CHART: BUBBLE RADAR (Analysis)
bubble_radar = (
    alt.Chart(q3_plot_df)
    .mark_circle(stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X(
            "base_count:Q", scale=alt.Scale(type="log"), title="Total Grants Size (Log)"
        ),
        y=alt.Y("cancel_rate:Q", title="Cancellation Rate", axis=alt.Axis(format="%")),
        size=alt.Size(
            "cancelled_amount:Q",
            title="Lost Funding ($)",
            legend=None,
            scale=alt.Scale(range=[50, 500]),
        ),
        color=alt.condition(
            dir_select,
            alt.value("#4c78a8"),  # Fixed capitalization here
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("cancel_rate:Q", format=".1%"),
            alt.Tooltip("cancelled_amount:Q", format="$,.0f"),
        ],
    )
    .add_selection(dir_select)
    .properties(width=400, height=250, title="Analysis: Intensity vs. Size")
)

# 5. BOTTOM RIGHT: TREND LINE (Context)
trend_line = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(dir_select)
    .properties(width=400, height=150, title="Timeline: When did it happen?")
)

# 6. ASSEMBLE
right_col = alt.vconcat(bubble_radar, trend_line, spacing=10)

final_q3 = (
    (bars | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)

final_q3

Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(dir_select)
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(dir_select)


In [112]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
# A. Cancelled Stats
q3_cancel_df = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# B. Base Stats
q3_base_df = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)

# C. Merge
q3_df = q3_base_df.merge(q3_cancel_df, on="directorate", how="outer").fillna(0)
q3_df["cancel_rate"] = (
    q3_df["cancelled_count"] / q3_df["base_count"].replace(0, 1)
).fillna(0)

# Filter
q3_plot_df = q3_df[q3_df["base_count"] > 0].copy()

# D. Trend Data
q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)

# 2. SELECTION
try:
    dir_select = alt.selection_point(fields=["directorate"], empty="all")
except AttributeError:
    dir_select = alt.selection_single(fields=["directorate"], empty="all")

# 3. LEFT CHART: BAR CHART (Leaderboard)
bars = (
    alt.Chart(q3_plot_df)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Number of Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color("cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate",
            "cancelled_count",
            alt.Tooltip("cancelled_amount", format="$,.0f"),
        ],
    )
    .add_selection(dir_select)
    .properties(width=250, height=450, title="Ranking: Total Cancellations")
)

# 4. RIGHT CHART: LINEAR SCATTER (Context)
# CHANGED: Now using simple Counts on both axes
# Insight: Points high up but to the left are "Disproportionately Hit"
linear_scatter = (
    alt.Chart(q3_plot_df)
    .mark_circle(stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X("base_count:Q", title="Total Grants Issued (Size)"),
        y=alt.Y("cancelled_count:Q", title="Total Cancellations (Hits)"),
        size=alt.Size(
            "cancelled_amount:Q",
            title="Lost Funding ($)",
            legend=None,
            scale=alt.Scale(range=[50, 500]),
        ),
        color=alt.condition(dir_select, alt.value("#4c78a8"), alt.value("#f0f0f0")),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("base_count:Q", title="Total Grants"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled Grants"),
            alt.Tooltip("cancelled_amount:Q", format="$,.0f"),
        ],
    )
    .add_selection(dir_select)
    .properties(width=400, height=250, title="Context: Volume vs. Cancellations")
)

# 5. BOTTOM RIGHT: TREND LINE
trend_line = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(dir_select)
    .properties(width=400, height=150, title="Timeline: When did it happen?")
)

# 6. ASSEMBLE
right_col = alt.vconcat(linear_scatter, trend_line, spacing=10)

final_q3 = (
    (bars | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)

final_q3

Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(dir_select)
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(dir_select)


In [117]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# Load Data (Assuming files are in the same directory)
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")

# Clean columns just in case
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()


# 1. DATA PREPARATION

# --- A. Yearly Data (Specific Years) ---
# Grants per Year
base_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)

# Cancellations per Year
cancel_yearly = (
    df_trump.groupby(["directorate", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# Merge Yearly
yearly_df = base_yearly.merge(
    cancel_yearly, on=["directorate", "year"], how="outer"
).fillna(0)


# --- B. Global Data (Year 0 = "All Time") ---
# Total Grants (All Time)
base_total = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)
base_total["year"] = 0  # Assign "0" to represent the global aggregate

# Total Cancellations (All Time)
cancel_total = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
cancel_total["year"] = 0

# Merge Global
total_df = base_total.merge(
    cancel_total, on=["directorate", "year"], how="outer"
).fillna(0)


# --- C. Prepare the "Total" Rows ---
# We calculate the "Static Size" (Total Grants) for the X-axis
# This ensures the dots stay in the same X-position regardless of the year selected
base_total_fixed = base_total[["directorate", "base_count"]].rename(
    columns={"base_count": "static_base_count"}
)

# Merge Static Size into Yearly Data
yearly_df = yearly_df.merge(base_total_fixed, on="directorate", how="left").fillna(0)

# For Global rows, static_base_count is just the base_count
total_rows = total_df.copy()
total_rows["static_base_count"] = total_rows["base_count"]


# --- D. Combine & Metrics ---
q3_full = pd.concat([yearly_df, total_rows], ignore_index=True)

# Filter: Keep only Year 0 and 2018-2021
target_years = [0, 2018, 2019, 2020, 2021]
q3_full = q3_full[q3_full["year"].isin(target_years)]

# Filter for Plotting: Keep rows with either Base Grants OR Cancellations
q3_plot_full = q3_full[
    (q3_full["static_base_count"] > 0) | (q3_full["cancelled_count"] > 0)
].copy()

# Trend Data (Context Line Chart)
q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)


# 2. INTERACTION SETUP
year_options = [0, 2018, 2019, 2020, 2021]
year_labels = ["All Years (Total)", "2018", "2019", "2020", "2021"]

input_element = alt.binding_select(
    options=year_options, labels=year_labels, name="Select Year: "
)

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=input_element, value=[{"year": 0}]
    )
    dir_select = alt.selection_point(
        name="dir_select", fields=["directorate"], empty="all"
    )
except AttributeError:
    # Fallback for older Altair
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=input_element, init={"year": 0}
    )
    dir_select = alt.selection_single(
        name="dir_select", fields=["directorate"], empty="all"
    )


# 3. LEFT CHART: BAR CHART (Ranking)
bars = (
    alt.Chart(q3_plot_full)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Number of Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select,
            alt.Color(
                "cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None
            ),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate",
            "year",
            "cancelled_count",
            alt.Tooltip("cancelled_amount", format="$,.0f"),
        ],
    )
    .add_params(dir_select, year_select)
    .transform_filter(year_select)
    .properties(width=250, height=450, title="Ranking: Total Cancellations")
)


# 4. RIGHT CHART: LINEAR SCATTER (Dynamic Zoom Added!)
linear_scatter = (
    alt.Chart(q3_plot_full)
    .mark_circle(stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X("static_base_count:Q", title="General Directorate Size (Total Grants)"),
        y=alt.Y("cancelled_count:Q", title="Cancellations (Selected Year)"),
        size=alt.Size(
            "cancelled_amount:Q",
            title="Lost Funding ($)",
            legend=None,
            scale=alt.Scale(range=[50, 500]),
        ),
        color=alt.condition(dir_select, alt.value("#4c78a8"), alt.value("#f0f0f0")),
        tooltip=[
            alt.Tooltip("directorate:N"),
            alt.Tooltip("year:O", title="Data Year"),
            alt.Tooltip("static_base_count:Q", title="Directorate Size (Total)"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled Grants"),
            alt.Tooltip("cancelled_amount:Q", format="$,.0f"),
        ],
    )
    .add_params(dir_select, year_select)
    .transform_filter(year_select)
    .properties(width=400, height=250, title="Context: Volume vs. Cancellations")
    .interactive()  # <--- THIS ENABLES DYNAMIC ZOOM & PAN
)


# 5. BOTTOM RIGHT: TREND LINE
trend_line = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(dir_select)
    .properties(width=400, height=150, title="Timeline: When did it happen?")
)


# 6. ASSEMBLE
right_col = alt.vconcat(linear_scatter, trend_line, spacing=10)

final_q3 = (
    (bars | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)

final_q3

To determine if cancellations disproportionately targeted specific directorates, I designed a **composite dashboard** that distinguishes **volume** from **intensity**. A raw count is biased by directorate size, so I paired a **Ranked Bar Chart** (Left) for absolute impact with an **Interactive Scatter Plot** (Right) for relative context.

The Scatter Plot plots Static Directorate Size (X) vs. Cancellations (Y). Using a stable X-axis ensures that dots shift vertically rather than erratically when filtering, making yearly comparisons intuitive. To facilitate **temporal analysis**, I implemented a **Dropdown Selector** that toggles between a global 'All-Time' summary and specific fiscal years.

Adhering to Shneiderman's mantra, the dashboard supports **Zoom & Pan** on the scatter plot to resolve occlusion in dense clusters. This design effectively separates natural scaling (diagonal trend) from anomalies (outliers high on Y but low on X), allowing users to pinpoint specific targets across different timeframes.

## Q4: How have the total grants amount evolved over the years?

In [24]:
import pandas as pd
import altair as alt

# --------------------------------------------------
# DATA AGGREGATION
# --------------------------------------------------

q4_df = (
    df_grants.groupby("year")
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
    .sort_values("year")
)

# --------------------------------------------------
# INTERACTION: YEAR SELECTION
# --------------------------------------------------

year_sel = alt.selection_point(fields=["year"], empty="all")

# --------------------------------------------------
# MAIN TREND: TOTAL FUNDING (PRIMARY STORY)
# --------------------------------------------------

q4_funding = (
    alt.Chart(q4_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "total_amount:Q", title="Total NSF funding ($)", axis=alt.Axis(format="~s")
        ),
        color=alt.condition(year_sel, alt.value("#1f77b4"), alt.value("#b0c4de")),
        tooltip=[
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
            alt.Tooltip("grants_count:Q", title="Number of grants"),
        ],
    )
    .add_params(year_sel)
    .properties(
        width=750,
        height=300,
        title="Q4 — Evolution of total NSF funding over the last 5 years",
    )
)

# --------------------------------------------------
# CONTEXT: NUMBER OF GRANTS (SECONDARY STORY)
# --------------------------------------------------

q4_grants = (
    alt.Chart(q4_df)
    .mark_bar()
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("grants_count:Q", title="Number of grants"),
        color=alt.condition(year_sel, alt.value("#ff7f0e"), alt.value("#ffd8b1")),
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("grants_count:Q", title="Number of grants"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
        ],
    )
    .add_params(year_sel)
    .properties(width=750, height=200, title="Number of grants per year (context)")
)

# --------------------------------------------------
# FINAL COMPOSITION
# --------------------------------------------------

(q4_funding & q4_grants)

In [42]:
# ---- sizes (tweakable) ----
W_BIG = 560
H_BIG = 520
W_SMALL = 330

# LEFT (BIG): breakdown by directorate
breakdown = (
    alt.Chart(q4_dir)
    .mark_bar()
    .encode(
        x=alt.X("total_amount:Q", title="Funding ($)", axis=alt.Axis(format="~s")),
        y=alt.Y(
            "directorate:N",
            sort="-x",
            title="Directorate",
            axis=alt.Axis(labelLimit=180)  # helps a bit
        ),
        tooltip=[
            alt.Tooltip("directorate:N", title="Directorate"),
            alt.Tooltip("total_amount:Q", title="Funding", format="$,.0f"),
            alt.Tooltip("grants_count:Q", title="Grants"),
        ],
        color=alt.Color("total_amount:Q", scale=alt.Scale(scheme="blues"), legend=None),
    )
    .transform_filter(year_select)
    .properties(width=W_BIG, height=H_BIG, title="Breakdown by directorate (selected year)")
)

# RIGHT (SMALL): line over time
line = (
    alt.Chart(q4_year)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total funding ($)", axis=alt.Axis(format="~s")),
        tooltip=[
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("total_amount:Q", title="Total funding", format="$,.0f"),
            alt.Tooltip("grants_count:Q", title="Grants count"),
            alt.Tooltip("yoy_pct:Q", title="YoY %", format="+.1f"),
        ],
    )
    .properties(width=W_SMALL, height=220, title="Total funding over time")
)

highlight = (
    alt.Chart(q4_year)
    .mark_point(size=180, filled=True)
    .encode(x="year:O", y="total_amount:Q", color=alt.value("#1f77b4"))
    .transform_filter(year_select)
)

line_block = (line + highlight).add_params(year_select)

# KPIs (same as before, just keep width aligned)
kpi_totals = kpi_totals.properties(width=W_SMALL, height=140)
kpi_yoy = kpi_yoy.properties(width=W_SMALL, height=110)

right_col = alt.vconcat(line_block, kpi_totals, kpi_yoy, spacing=12)

final_q4 = (
    (breakdown | right_col)
    .configure_view(stroke=None)
    .configure_concat(spacing=24)
)

final_q4


In [122]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
# We need the finest granularity to allow filtering by both State and Directorate
q4_df = (
    df_grants.groupby(["year", "state", "directorate"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)

# 2. CREATE DROPDOWN SELECTIONS

# A. State Selector
states = sorted(q4_df["state"].unique())
state_input = alt.binding_select(
    options=[None] + states,  # None adds the "All" option
    labels=["All States"] + states,
    name="Select State: ",
)
state_select = alt.selection_point(fields=["state"], bind=state_input)

# B. Directorate Selector
dirs = sorted(q4_df["directorate"].unique())
dir_input = alt.binding_select(
    options=[None] + dirs,
    labels=["All Directorates"] + dirs,
    name="Select Directorate: ",
)
dir_select = alt.selection_point(fields=["directorate"], bind=dir_input)

# 3. MAIN CHART: EVOLUTION AREA CHART
# We use an Area chart to emphasize the "Volume" of funding over time
evolution_chart = (
    alt.Chart(q4_df)
    .mark_area(
        line={"color": "#4c78a8"},  # Darker blue line on top
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="#4c78a8", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
        opacity=0.6,
    )
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "sum(total_amount):Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        tooltip=[
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("sum(total_amount):Q", title="Total Funding", format="$,.0f"),
            alt.Tooltip("sum(grants_count):Q", title="Grants Count"),
        ],
    )
    .add_params(state_select, dir_select)
    .transform_filter(state_select)
    .transform_filter(dir_select)
    .properties(width=600, height=300, title="Evolution of Funding (Filtered View)")
)

# Add points on top for better hover interaction
points = (
    alt.Chart(q4_df)
    .mark_circle(size=60, color="#4c78a8")
    .encode(
        x="year:O",
        y="sum(total_amount):Q",
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("sum(total_amount):Q", format="$,.0f"),
            alt.Tooltip("sum(grants_count):Q"),
        ],
    )
    .transform_filter(state_select)
    .transform_filter(dir_select)
)

# 4. KPI CARDS (Dynamic)
# Base chart for KPIs
base_kpi = alt.Chart(q4_df).transform_filter(state_select).transform_filter(dir_select)

# KPI 1: Total Funding (Sum of the filtered view)
kpi_fund_text = base_kpi.mark_text(
    align="center", fontSize=24, fontWeight="bold", color="#4c78a8"
).encode(text=alt.Text("sum(total_amount):Q", format="$,.2s"))
kpi_fund_label = base_kpi.mark_text(
    align="center", fontSize=12, color="gray", dy=-20
).encode(text=alt.value("Total Funding (Selected)"))
kpi_fund = (kpi_fund_label + kpi_fund_text).properties(width=150, height=80)

# KPI 2: Total Grants
kpi_count_text = base_kpi.mark_text(
    align="center", fontSize=24, fontWeight="bold", color="#4c78a8"
).encode(text=alt.Text("sum(grants_count):Q", format=","))
kpi_count_label = base_kpi.mark_text(
    align="center", fontSize=12, color="gray", dy=-20
).encode(text=alt.value("Total Grants (Selected)"))
kpi_count = (kpi_count_label + kpi_count_text).properties(width=150, height=80)


# 5. ASSEMBLE
# Chart on Left | KPIs on Right (Stacked Vertically)
chart_layer = evolution_chart + points
kpi_col = alt.vconcat(kpi_fund, kpi_count, spacing=20)

final_q4 = (
    (chart_layer | kpi_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=20)
)

final_q4

not bad anymore

## Q5: For a selected state, how have the grants evolved? Are there cancelled grants?

In [25]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. LOAD DATA
# Ensure you have these files in your folder
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")

# Clean columns just in case
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()

# 2. AGGREGATIONS
q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

q5_trump = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# 3. CREATE SELECTION (Dropdown)
# Get list of states for the dropdown
states = sorted(q5_grants["state"].unique())
state_dropdown = alt.binding_select(options=states, name="Select State: ")

try:
    # Modern Altair
    state_selection = alt.selection_point(
        fields=["state"], bind=state_dropdown, value=[{"state": "CA"}]
    )
except AttributeError:
    # Older Altair
    state_selection = alt.selection_single(
        fields=["state"], bind=state_dropdown, init={"state": "CA"}
    )

# 4. CHART DEFINITIONS

# Chart A: Total Funding (Line)
q5_amount_line = (
    alt.Chart(q5_grants)
    .mark_line(point=True, color="#4c78a8")  # Standard Blue
    .encode(
        x=alt.X("year:O", title="Year (last 5 years)"),
        y=alt.Y(
            "total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
            alt.Tooltip("grants_count:Q", title="Number of grants"),
        ],
    )
    .add_selection(state_selection)  # <--- Selection added here
    .transform_filter(state_selection)
    .properties(
        width=750, height=200, title="Q5 — Selected State: Total Funding Evolution"
    )
)

# Chart B: Number of Grants (Bar)
q5_count_bar = (
    alt.Chart(q5_grants)
    .mark_bar(color="#72b7b2")  # Teal
    .encode(
        x=alt.X("year:O", title="Year (last 5 years)"),
        y=alt.Y("grants_count:Q", title="Number of grants"),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("grants_count:Q", title="Number of grants"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
        ],
    )
    .transform_filter(state_selection)  # Listens to the same selection
    .properties(width=750, height=150, title="Grant Count Evolution")
)

# Chart C: Cancelled Grants (Bar - Red)
q5_cancelled = (
    alt.Chart(q5_trump)
    .mark_bar(color="#e45756")  # Red
    .encode(
        x=alt.X("year:O", title="Year (Trump era)"),
        y=alt.Y("cancelled_count:Q", title="Cancelled grants"),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled grants"),
            alt.Tooltip(
                "cancelled_amount:Q", title="Cancelled amount ($)", format=",.0f"
            ),
        ],
    )
    .transform_filter(state_selection)  # Listens to the same selection
    .properties(width=750, height=150, title="Trump Era (2017–2021): Cancelled Grants")
)

# 5. ASSEMBLE
final_q5 = q5_amount_line & q5_count_bar & q5_cancelled

final_q5

Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(state_selection)  # <--- Selection added here


In [None]:
import altair as alt
import pandas as pd

alt.data_transformers.enable("default")

q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

q5_trump = (
    df_trump.groupby(["state", "year"])
    .agg(cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum"))
    .reset_index()
)

states = sorted(q5_grants["state"].dropna().unique())
state_dropdown = alt.binding_select(options=states, name="Select State: ")

min_year = int(q5_grants["year"].min())
max_year = int(q5_grants["year"].max())
year_slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    state_sel = alt.selection_point(fields=["state"], bind=state_dropdown, value=[{"state": "CA"}])
    year_sel = alt.selection_point(fields=["year"], bind=year_slider, value=[{"year": max_year}])
except AttributeError:
    state_sel = alt.selection_single(fields=["state"], bind=state_dropdown, init={"state": "CA"})
    year_sel = alt.selection_single(fields=["year"], bind=year_slider, init={"year": max_year})

# LEFT BIG

W_BIG, H_BIG = 560, 420
W_SMALL = 340

funding_line = (
    alt.Chart(q5_grants)
    .mark_line(point=True, strokeWidth=3, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("total_amount:Q", title="Total funding", format="$,.0f"),
            alt.Tooltip("grants_count:Q", title="Grants"),
        ],
    )
    .transform_filter(state_sel)
    .properties(width=W_BIG, height=H_BIG, title="Q5 — Funding evolution (selected state)")
)

# highlight selected year on the line
funding_highlight = (
    alt.Chart(q5_grants)
    .mark_point(size=180, filled=True, color="#1f77b4")
    .encode(x="year:O", y="total_amount:Q")
    .transform_filter(state_sel)
    .transform_filter(year_sel)
)

left = (funding_line + funding_highlight).add_params(state_sel, year_sel)

# 4) RIGHT TOP: KPI

kpi_base = alt.Chart(q5_grants).transform_filter(state_sel).transform_filter(year_sel)

kpi_label_1 = kpi_base.mark_text(align="center", color="#888", fontSize=13, dy=-18).encode(
    x=alt.value(170), y=alt.value(45), text=alt.value("Total Funding (Selected Year)")
)
kpi_value_1 = kpi_base.mark_text(align="center", color="#333", fontSize=22, fontWeight="bold", dy=8).encode(
    x=alt.value(170), y=alt.value(45), text=alt.Text("total_amount:Q", format="$,.0f")
)

kpi_label_2 = kpi_base.mark_text(align="center", color="#888", fontSize=13, dy=-18).encode(
    x=alt.value(170), y=alt.value(95), text=alt.value("Grants Count (Selected Year)")
)
kpi_value_2 = kpi_base.mark_text(align="center", color="#333", fontSize=20, fontWeight="bold", dy=8).encode(
    x=alt.value(170), y=alt.value(95), text=alt.Text("grants_count:Q", format=",.0f")
)

kpi_year = (kpi_label_1 + kpi_value_1 + kpi_label_2 + kpi_value_2).properties(width=W_SMALL, height=135)

# RIGHT MID:

count_chart = (
    alt.Chart(q5_grants)
    .mark_bar(color="#72b7b2")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("grants_count:Q", title="Grants"),
        tooltip=[alt.Tooltip("year:O"), alt.Tooltip("grants_count:Q", title="Grants")],
        opacity=alt.condition(year_sel, alt.value(1.0), alt.value(0.35))
    )
    .transform_filter(state_sel)
    .properties(width=W_SMALL, height=150, title="Grant count (selected state)")
)

# RIGHT BOTTOM: Trump-era cancellations 

cancel_chart = (
    alt.Chart(q5_trump)
    .mark_bar(color="#e45756")
    .encode(
        x=alt.X("year:O", title="Year (Trump era)"),
        y=alt.Y("cancelled_count:Q", title="Cancelled grants"),
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled grants"),
            alt.Tooltip("cancelled_amount:Q", title="Lost funding", format="$,.0f"),
        ],
    )
    .transform_filter(state_sel)
    .properties(width=W_SMALL, height=160, title="Trump era cancellations (2017–2021)")
)

cancel_kpi_base = alt.Chart(q5_trump).transform_filter(state_sel)

cancel_label = cancel_kpi_base.mark_text(align="center", color="#888", fontSize=13, dy=-18).encode(
    x=alt.value(170), y=alt.value(55), text=alt.value("Total Lost Funding (Trump era)")
)
cancel_value = cancel_kpi_base.mark_text(align="center", color="#333", fontSize=22, fontWeight="bold", dy=8).encode(
    x=alt.value(170), y=alt.value(55), text=alt.Text("sum(cancelled_amount):Q", format="$,.0f")
)

cancel_kpi = (cancel_label + cancel_value).properties(width=W_SMALL, height=95)

# Assemble (dashboard layout)

right = alt.vconcat(kpi_year, count_chart, cancel_chart, cancel_kpi, spacing=12)

final_q5 = (
    (left | right)
    .configure_view(stroke=None)
    .configure_concat(spacing=24)
)

final_q5


In [124]:
import altair as alt
import pandas as pd

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA PREPARATION
q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

q5_trump = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# 2. SELECTION
states = sorted(q5_grants["state"].dropna().unique())
state_input = alt.binding_select(options=states, name="Select State: ")

try:
    state_select = alt.selection_point(
        fields=["state"], bind=state_input, value=[{"state": "CA"}]
    )
except AttributeError:
    # Fallback for older Altair versions
    state_select = alt.selection_single(
        fields=["state"], bind=state_input, init={"state": "CA"}
    )

# 3. TOP CHART: EVOLUTION (Dual Axis)
# Note: We use .transform_filter(state_select) here, but we DO NOT add .add_params() yet.
base_evolution = (
    alt.Chart(q5_grants)
    .transform_filter(state_select)
    .encode(x=alt.X("year:O", title=None))
)

# Layer A: Grant Count (Bars)
bar_vol = base_evolution.mark_bar(color="#9ecae1", opacity=0.6).encode(
    y=alt.Y(
        "grants_count:Q", title="Number of Grants", axis=alt.Axis(titleColor="#6baed6")
    ),
    tooltip=["year", "grants_count"],
)

# Layer B: Total Funding (Line)
line_val = base_evolution.mark_line(color="#08519c", strokeWidth=3, point=True).encode(
    y=alt.Y(
        "total_amount:Q",
        title="Total Funding ($)",
        axis=alt.Axis(format="~s", titleColor="#08519c"),
    ),
    tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
)

# Combine layers
evolution_chart = (
    alt.layer(bar_vol, line_val)
    .resolve_scale(y="independent")
    .properties(width=600, height=250, title="Q5: State Evolution (Volume vs. Value)")
)

# 4. BOTTOM CHART: CANCELLATIONS
# Again, just filtering, no add_params
cancel_chart = (
    alt.Chart(q5_trump)
    .mark_bar(color="#de2d26")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancelled Grants"),
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled Count"),
            alt.Tooltip("cancelled_amount:Q", title="Lost Funding", format="$,.0f"),
        ],
    )
    .transform_filter(state_select)
    .properties(width=600, height=120, title="Impact: Cancelled Grants (Trump Era)")
)

# 5. ASSEMBLE
# ✅ CRITICAL FIX: Add the parameter ONCE to the final concatenated object
final_q5 = (
    alt.vconcat(evolution_chart, cancel_chart)
    .add_params(state_select)
    .configure_concat(spacing=5)
    .configure_view(stroke=None)
)

final_q5

In [126]:
import altair as alt
import pandas as pd
import itertools

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA AGGREGATION
# A. Grants (Base)
q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# B. Trump Cancellations
q5_trump_agg = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# 2. CREATE MASTER TIMELINE (The Fix)
# We need every combination of State + Year from BOTH datasets to ensure no year is dropped.

# Get all unique states
all_states = pd.concat([q5_grants["state"], q5_trump_agg["state"]]).unique()

# Get all unique years (Union of Grants years + Trump years)
all_years = pd.concat([q5_grants["year"], q5_trump_agg["year"]]).unique()

# Create a scaffold (All States x All Years)
# This ensures that if 2019 exists in Trump but not Grants, it's still in the chart.
master_rows = list(itertools.product(all_states, all_years))
q5_master = pd.DataFrame(master_rows, columns=["state", "year"])

# 3. MERGE DATA ONTO MASTER
# Merge Grants Data
q5_master = q5_master.merge(q5_grants, on=["state", "year"], how="left")

# Merge Trump Data
q5_master = q5_master.merge(
    q5_trump_agg, on=["state", "year"], how="left", suffixes=("_grant", "_cancel")
)

# Fill NaNs with 0 so the lines/bars draw continuously
q5_master = q5_master.fillna(0)


# 4. SELECTION
# Use the master list of states
states_list = sorted(all_states)
state_input = alt.binding_select(options=states_list, name="Select State: ")

try:
    state_select = alt.selection_point(
        fields=["state"], bind=state_input, value=[{"state": "CA"}]
    )
except AttributeError:
    state_select = alt.selection_single(
        fields=["state"], bind=state_input, init={"state": "CA"}
    )


# 5. TOP CHART: EVOLUTION (Dual Axis)
base_evolution = (
    alt.Chart(q5_master)
    .transform_filter(state_select)
    .encode(x=alt.X("year:O", title=None))
)

# Layer A: Grant Count (Bars)
bar_vol = base_evolution.mark_bar(color="#9ecae1", opacity=0.6).encode(
    y=alt.Y(
        "grants_count:Q", title="Number of Grants", axis=alt.Axis(titleColor="#6baed6")
    ),
    tooltip=["year", "grants_count"],
)

# Layer B: Total Funding (Line)
line_val = base_evolution.mark_line(color="#08519c", strokeWidth=3, point=True).encode(
    y=alt.Y(
        "total_amount:Q",
        title="Total Funding ($)",
        axis=alt.Axis(format="~s", titleColor="#08519c"),
    ),
    tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
)

evolution_chart = (
    alt.layer(bar_vol, line_val)
    .resolve_scale(y="independent")
    .properties(width=600, height=250, title="Q5: State Evolution (Volume vs. Value)")
)


# 6. BOTTOM CHART: CANCELLATIONS (Symmetrical Timeline)
# Now uses the SAME q5_master dataset, so the X-axis is identical
cancel_chart = (
    alt.Chart(q5_master)
    .mark_bar(color="#de2d26")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancelled Grants"),
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("cancelled_count:Q", title="Cancelled Count"),
            alt.Tooltip("cancelled_amount:Q", title="Lost Funding", format="$,.0f"),
        ],
    )
    .transform_filter(state_select)
    .properties(width=600, height=120, title="Impact: Cancelled Grants (Trump Era)")
)


# 7. ASSEMBLE
final_q5 = (
    alt.vconcat(evolution_chart, cancel_chart)
    .add_params(state_select)
    .configure_concat(spacing=5)
    .configure_view(stroke=None)
)

final_q5

In [127]:
import altair as alt
import pandas as pd
import itertools

# 0. SETUP
alt.data_transformers.enable("default")

# 1. DATA AGGREGATION
# A. Grants (Base)
q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)

# B. Trump Cancellations
q5_trump_agg = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

# 2. CREATE MASTER TIMELINE
# Ensure we have rows for every year (2017-2024), even if data is missing in one file
all_states = pd.concat([q5_grants["state"], q5_trump_agg["state"]]).unique()
all_years = pd.concat([q5_grants["year"], q5_trump_agg["year"]]).unique()

master_rows = list(itertools.product(all_states, all_years))
q5_master = pd.DataFrame(master_rows, columns=["state", "year"])

# Merge Grants Data
q5_master = q5_master.merge(q5_grants, on=["state", "year"], how="left")

# Merge Trump Data
q5_master = q5_master.merge(q5_trump_agg, on=["state", "year"], how="left")

# Fill NaNs with 0
q5_master = q5_master.fillna(0)


# 3. SELECTION
states_list = sorted(all_states)
state_input = alt.binding_select(options=states_list, name="Select State: ")

try:
    state_select = alt.selection_point(
        fields=["state"], bind=state_input, value=[{"state": "CA"}]
    )
except AttributeError:
    state_select = alt.selection_single(
        fields=["state"], bind=state_input, init={"state": "CA"}
    )


# 4. TOP CHART: EVOLUTION (Blue)
# Bars = Volume (Count), Line = Value ($)
base_evolution = (
    alt.Chart(q5_master)
    .transform_filter(state_select)
    .encode(x=alt.X("year:O", title=None))
)

bar_vol = base_evolution.mark_bar(color="#9ecae1", opacity=0.6).encode(
    y=alt.Y(
        "grants_count:Q", title="Number of Grants", axis=alt.Axis(titleColor="#6baed6")
    ),
    tooltip=["year", "grants_count"],
)

line_val = base_evolution.mark_line(color="#08519c", strokeWidth=3, point=True).encode(
    y=alt.Y(
        "total_amount:Q",
        title="Total Funding ($)",
        axis=alt.Axis(format="~s", titleColor="#08519c"),
    ),
    tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
)

evolution_chart = (
    alt.layer(bar_vol, line_val)
    .resolve_scale(y="independent")
    .properties(width=600, height=250, title="Q5: State Evolution (Volume vs. Value)")
)


# 5. BOTTOM CHART: IMPACT (Red)
# Bars = Volume (Cancelled Count), Line = Value (Lost Funding)
base_cancel = (
    alt.Chart(q5_master)
    .transform_filter(state_select)
    .encode(x=alt.X("year:O", title="Year"))
)

# Layer A: Cancelled Count (Bars - Light Red)
cancel_bar = base_cancel.mark_bar(color="#fc9272", opacity=0.6).encode(
    y=alt.Y(
        "cancelled_count:Q",
        title="Cancelled Grants",
        axis=alt.Axis(titleColor="#fc9272"),
    ),
    tooltip=["year", "cancelled_count"],
)

# Layer B: Lost Funding (Line - Dark Red)
cancel_line = base_cancel.mark_line(color="#de2d26", strokeWidth=3, point=True).encode(
    y=alt.Y(
        "cancelled_amount:Q",
        title="Lost Funding ($)",
        axis=alt.Axis(format="~s", titleColor="#de2d26"),
    ),
    tooltip=["year", alt.Tooltip("cancelled_amount", format="$,.0f")],
)

cancel_chart = (
    alt.layer(cancel_bar, cancel_line)
    .resolve_scale(y="independent")
    .properties(width=600, height=150, title="Impact: Cancellations & Lost Funding")
)


# 6. ASSEMBLE
final_q5 = (
    alt.vconcat(evolution_chart, cancel_chart)
    .add_params(state_select)
    .configure_concat(spacing=5)
    .configure_view(stroke=None)
)

final_q5

To provide a comprehensive view of state-level funding evolution, I designed a **vertically stacked, dual-axis dashboard**. This layout enables a direct 'cause-and-effect' comparison between the funding ecosystem (Top) and the cancellation impact (Bottom) on a synchronized timeline.

Both charts utilize a **Dual-Axis approach** to combine **Volume** (Bars: Grant Counts) and **Value** (Lines: Funding Amount). This is critical because a drop in grant volume doesn't always equal a drop in funding; separating these metrics reveals the true texture of the data.

A key technical decision was constructing a '**Master Timeline**' (2017–2024) that merges data from both the active grants and cancellation datasets. This ensures visual continuity, allowing users to see exactly where data is missing or where activity dips to zero, rather than having misleading gaps. The result is a rigorous, symmetrical profile that instantly contextualizes the 'Trump Era' cancellations against the broader backdrop of state funding.

## Q6: Select some attribute that has not been mentioned previously (e.g., party governing, population of the state, number of funded institutions in the state…), and let the user interactively explore the information around the attribute to get insights.

For Question 6, state population was selected as an additional attribute not previously used in the analysis. Population is a meaningful contextual variable that enables deeper exploration beyond absolute grant counts or total funding amounts. By relating funding to population size, users can investigate whether certain states receive disproportionately high or low levels of funding relative to their population, revealing patterns that are not visible through raw totals alone.

This attribute supports an analysis by enabling per capita comparisons, outlier detection, and interactive investigation of funding efficiency across states and years. It integrates naturally with the existing state based aggregations used in earlier questions.

In [48]:
import pandas as pd
import altair as alt

df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean column names
df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()

# Ensure we have a 'state' column (full names like Alabama, Alaska, ...)
if "state" not in df_pop_raw.columns:
    raise ValueError(f"estimated_population.csv must have a 'state' column. Found: {list(df_pop_raw.columns)}")

pop_cols = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
if not pop_cols:
    raise ValueError(f"Could not find pop_YYYY columns. Found: {list(df_pop_raw.columns)}")

df_pop_long = df_pop_raw.melt(
    id_vars=["state"],
    value_vars=pop_cols,
    var_name="year",
    value_name="population"
)

# Convert year from 'pop_2020' -> 2020
df_pop_long["year"] = df_pop_long["year"].str.replace("pop_", "", regex=False).astype(int)

# Convert population to numeric
df_pop_long["population"] = pd.to_numeric(df_pop_long["population"], errors="coerce")

# Keep only 2020-2024 (safety)
df_pop_long = df_pop_long[df_pop_long["year"].between(2020, 2024)]

# Standardize state name
df_pop_long = df_pop_long.rename(columns={"state": "state_name"})
df_pop_long["state_name"] = df_pop_long["state_name"].astype(str).str.strip()

df_abbr = df_abbr_raw.copy()

# Detect likely columns for state name and abbreviation
name_candidates = [c for c in df_abbr.columns if "name" in c.lower() or ("state" in c.lower() and "abbr" not in c.lower())]
abbr_candidates = [c for c in df_abbr.columns if "abbr" in c.lower() or "code" in c.lower()]

if not name_candidates or not abbr_candidates:
    raise ValueError(
        "state_abbreviations.csv must contain columns for full state name and abbreviation.\n"
        f"Columns found: {list(df_abbr.columns)}"
    )

name_col = name_candidates[0]
abbr_col = abbr_candidates[0]

df_abbr = df_abbr.rename(columns={name_col: "state_name", abbr_col: "state"})
df_abbr["state_name"] = df_abbr["state_name"].astype(str).str.strip()
df_abbr["state"] = df_abbr["state"].astype(str).str.strip()

# Normalize case (helps joins)
df_abbr["state_name_key"] = df_abbr["state_name"].str.lower()
df_pop_long["state_name_key"] = df_pop_long["state_name"].str.lower()

# Join to add 2-letter codes
df_pop_long = df_pop_long.merge(
    df_abbr[["state_name_key", "state"]],
    on="state_name_key",
    how="left"
)

# Debug unmapped names
unmapped = df_pop_long[df_pop_long["state"].isna()]["state_name"].dropna().unique()
print("Unmapped population state names (should be empty):", unmapped[:20], " ... total:", len(unmapped))

# Keep only mapped rows + required cols
df_pop_long = df_pop_long.dropna(subset=["state", "population"])
df_pop_long = df_pop_long[["state", "year", "population"]].copy()

print("Population long shape:", df_pop_long.shape)
print("Population states:", df_pop_long["state"].nunique(), "Years:", sorted(df_pop_long["year"].unique()))

# sanity: ensure expected columns exist
required_cols = {"state", "year", "award_amount", "award_id"}
missing = required_cols - set(df_grants.columns)
if missing:
    raise ValueError(f"df_grants missing required columns: {missing}. Found: {list(df_grants.columns)}")

# Ensure year numeric
df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype("Int64")

q6_grants = (
    df_grants
    .dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(
        total_amount=("award_amount", "sum"),
        grants_count=("award_id", "count")
    )
    .reset_index()
)

print("NSF aggregated shape:", q6_grants.shape)
print("NSF states:", q6_grants["state"].nunique(), "Years:", sorted(q6_grants["year"].unique()))

q6_df = q6_grants.merge(df_pop_long, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

print("Merged q6_df shape:", q6_df.shape)
print("Merged states:", q6_df["state"].nunique(), "Years:", sorted(q6_df["year"].unique()))

# If still empty, show mismatch hints
if q6_df.empty:
    print("\nq6_df is EMPTY. Debug hints:")
    print("Sample NSF states:", sorted(q6_grants["state"].unique())[:15])
    print("Sample POP states:", sorted(df_pop_long["state"].unique())[:15])
    print("Sample NSF years:", sorted(q6_grants["year"].unique()))
    print("Sample POP years:", sorted(df_pop_long["year"].unique()))
    raise ValueError("Merge produced empty q6_df. See debug hints above.")

year_selection = alt.selection_point(
    fields=["year"],
    bind=alt.binding_select(options=sorted(q6_df["year"].unique()), name="Year: "),
    value=max(q6_df["year"].unique())
)

state_click = alt.selection_point(fields=["state"], empty="all")

q6_overview = (
    alt.Chart(q6_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("funding_per_capita:Q", title="Funding per capita ($)", axis=alt.Axis(format="~s")),
        color=alt.condition(
            state_click,
            alt.Color("funding_per_capita:Q", scale=alt.Scale(scheme="purples"), title="Funding per capita"),
            alt.value("lightgray")
        ),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("population:Q", title="Population", format=",.0f"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
            alt.Tooltip("funding_per_capita:Q", title="Funding per capita ($)", format=",.2f"),
            alt.Tooltip("grants_count:Q", title="Grants count"),
        ]
    )
    .add_params(year_selection, state_click)
    .transform_filter(year_selection)
    .properties(width=750, height=380, title="Q6 — NSF funding per capita by state (select year + click a state)")
)



q6_trend = (
    alt.Chart(q6_df)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("funding_per_capita:Q", title="Funding per capita ($)", axis=alt.Axis(format="~s")),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("funding_per_capita:Q", title="Funding per capita ($)", format=",.2f"),
            alt.Tooltip("total_amount:Q", title="Total funding ($)", format=",.0f"),
            alt.Tooltip("population:Q", title="Population", format=",.0f"),
            alt.Tooltip("grants_count:Q", title="Grants count"),
        ]
    )
    .transform_filter(state_click)
    .properties(width=750, height=200, title="Selected state — funding per capita over time (2020–2024)")
)

(q6_overview & q6_trend)



Unmapped population state names (should be empty): []  ... total: 0
Population long shape: (255, 3)
Population states: 51 Years: [2020, 2021, 2022, 2023, 2024]
NSF aggregated shape: (231, 4)
NSF states: 52 Years: [2020, 2021, 2022, 2023, 2024]
Merged q6_df shape: (222, 6)
Merged states: 50 Years: [2020, 2021, 2022, 2023, 2024]


In [None]:
# Q6
import pandas as pd
import altair as alt

alt.data_transformers.enable("default")


df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()

pop_cols = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
if "state" not in df_pop_raw.columns or not pop_cols:
    raise ValueError(
        "estimated_population.csv must have 'state' + columns like pop_2020..pop_2024. "
        f"Found: {list(df_pop_raw.columns)}"
    )

df_pop_long = df_pop_raw.melt(
    id_vars=["state"],
    value_vars=pop_cols,
    var_name="year",
    value_name="population"
)
df_pop_long["year"] = df_pop_long["year"].str.replace("pop_", "", regex=False).astype(int)
df_pop_long["population"] = pd.to_numeric(df_pop_long["population"], errors="coerce")
df_pop_long = df_pop_long[df_pop_long["year"].between(2020, 2024)].copy()

df_pop_long = df_pop_long.rename(columns={"state": "state_name"})
df_pop_long["state_name"] = df_pop_long["state_name"].astype(str).str.strip()

# detect columns in abbreviations
df_abbr = df_abbr_raw.copy()
name_candidates = [c for c in df_abbr.columns if "name" in c.lower() or (c.lower() == "state")]
abbr_candidates = [c for c in df_abbr.columns if "abbr" in c.lower() or "code" in c.lower()]

if not name_candidates or not abbr_candidates:
    raise ValueError(
        "state_abbreviations.csv must contain full state-name + abbreviation columns.\n"
        f"Columns found: {list(df_abbr.columns)}"
    )

name_col = name_candidates[0]
abbr_col = abbr_candidates[0]

df_abbr = df_abbr.rename(columns={name_col: "state_name", abbr_col: "state"})
df_abbr["state_name"] = df_abbr["state_name"].astype(str).str.strip()
df_abbr["state"] = df_abbr["state"].astype(str).str.strip()

df_abbr["state_name_key"] = df_abbr["state_name"].str.lower()
df_pop_long["state_name_key"] = df_pop_long["state_name"].str.lower()

df_pop_long = df_pop_long.merge(
    df_abbr[["state_name_key", "state"]],
    on="state_name_key",
    how="left"
)

df_pop_long = df_pop_long.dropna(subset=["state", "population"])
df_pop_long = df_pop_long[["state", "year", "population"]].copy()

# NSF AGGREGATION 

required_cols = {"state", "year", "award_amount", "award_id"}
missing = required_cols - set(df_grants.columns)
if missing:
    raise ValueError(f"df_grants missing required columns: {missing}. Found: {list(df_grants.columns)}")

df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)

q6_grants = (
    df_grants.dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(
        total_amount=("award_amount", "sum"),
        grants_count=("award_id", "count"),
    )
    .reset_index()
)

q6_df = q6_grants.merge(df_pop_long, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

if q6_df.empty:
    raise ValueError("q6_df is empty after merge. Check state mapping + years.")

min_year = int(q6_df["year"].min())
max_year = int(q6_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    # allow empty selection so right panel can be blank
    state_select = alt.selection_point(
        name="state_select", fields=["state"], empty="all", on="click", clear="dblclick"
    )
except AttributeError:
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    state_select = alt.selection_single(fields=["state"], empty="all")

# LEFT bars
bars = (
    alt.Chart(q6_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("funding_per_capita:Q", title="Funding per capita ($/person)", axis=alt.Axis(format=",.2f")),
        color=alt.Color("funding_per_capita:Q", scale=alt.Scale(scheme="purples"), legend=None),
        stroke=alt.condition(state_select, alt.value("black"), alt.value(None)),
        strokeWidth=alt.condition(state_select, alt.value(1.5), alt.value(0)),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("population:Q", title="Population", format=",.0f"),
            alt.Tooltip("total_amount:Q", title="Total NSF funding", format="$,.0f"),
            alt.Tooltip("funding_per_capita:Q", title="Funding per capita", format="$,.2f"),
            alt.Tooltip("grants_count:Q", title="Grants count"),
        ],
    )
    .transform_filter(year_select)
    .add_params(year_select, state_select)
    .properties(width=560, height=420, title="Q6 — Funding per capita by state (click a bar)")
)

# RIGHT panel

history = (
    alt.Chart(q6_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("funding_per_capita:Q", title="$/person", axis=alt.Axis(format=",.2f")),
        color=alt.value("#6a3d9a"),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("funding_per_capita:Q", title="Funding per capita", format="$,.2f"),
            alt.Tooltip("total_amount:Q", title="Total NSF funding", format="$,.0f"),
            alt.Tooltip("population:Q", title="Population", format=",.0f"),
            alt.Tooltip("grants_count:Q", title="Grants count"),
        ],
    )
    .transform_filter(state_select)
    .properties(width=340, height=200, title="History (selected state)")
)

# KPI block 
kpi_base = alt.Chart(q6_df).transform_filter(year_select).transform_filter(state_select)

def kpi(label_text, expr, fmt, y):
    lbl = kpi_base.mark_text(align="center", color="#888", fontSize=13, dy=-10).encode(
        text=alt.value(label_text), x=alt.value(170), y=alt.value(y)
    )
    val = kpi_base.mark_text(align="center", color="#333", fontSize=22, fontWeight="bold", dy=12).encode(
        text=alt.Text(expr, format=fmt), x=alt.value(170), y=alt.value(y)
    )
    return lbl + val

kpi_panel = (
    alt.Chart(pd.DataFrame({"x":[0]}))
    .mark_rect(opacity=0)
    .encode()
    .properties(width=340, height=220)
    + kpi("Funding per capita (selected year)", "mean(funding_per_capita):Q", "$,.2f", y=60)
    + kpi("Total NSF funding (selected year)", "sum(total_amount):Q", "$,.0f", y=135)
    + kpi("Population (selected year)", "mean(population):Q", ",.0f", y=210)
)

# Placeholder text when no state is selected
placeholder = (
    alt.Chart(pd.DataFrame())
    .mark_text(align="center", color="#999", fontSize=14)
    .encode(text="msg:N", x=alt.value(170), y=alt.value(120))
    .properties(width=340, height=200, title="History (selected state)")
)

# Show placeholder when empty selection; otherwise show history
right_top = alt.layer(
    placeholder.transform_filter(~state_select),
    history.transform_filter(state_select),
)

# For KPIs: show a light placeholder block when empty
kpi_placeholder = (
    alt.Chart(pd.DataFrame())
    .mark_text(align="center", color="#bbb", fontSize=13)
    .encode(text="msg:N", x=alt.value(170), y=alt.value(110))
    .properties(width=340, height=220)
)

right_bottom = alt.layer(
    kpi_placeholder.transform_filter(~state_select),
    kpi_panel.transform_filter(state_select),
)

right_col = right_top & right_bottom


final_q6 = (
    (bars | right_col)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=18)
)

final_q6


In [129]:
import pandas as pd
import altair as alt

alt.data_transformers.enable("default")

# 1. LOAD & CLEAN
df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean cols
df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()

# Melt Population
pop_cols = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
df_pop_long = df_pop_raw.melt(
    id_vars=["state"], value_vars=pop_cols, var_name="year", value_name="population"
)
df_pop_long["year"] = (
    df_pop_long["year"].str.replace("pop_", "", regex=False).astype(int)
)
df_pop_long["population"] = pd.to_numeric(df_pop_long["population"], errors="coerce")
df_pop_long = df_pop_long[df_pop_long["year"].between(2020, 2024)].copy()
df_pop_long = df_pop_long.rename(columns={"state": "state_name"})
df_pop_long["state_name"] = df_pop_long["state_name"].astype(str).str.strip()

# Clean Abbreviations
df_abbr = df_abbr_raw.copy()
name_col = [
    c for c in df_abbr.columns if "name" in c.lower() or (c.lower() == "state")
][0]
abbr_col = [c for c in df_abbr.columns if "abbr" in c.lower() or "code" in c.lower()][0]
df_abbr = df_abbr.rename(columns={name_col: "state_name", abbr_col: "state"})
df_abbr["state_name"] = df_abbr["state_name"].astype(str).str.strip()
df_abbr["state"] = df_abbr["state"].astype(str).str.strip()

# Merge Pop + Abbr
df_abbr["state_name_key"] = df_abbr["state_name"].str.lower()
df_pop_long["state_name_key"] = df_pop_long["state_name"].str.lower()
df_pop_long = df_pop_long.merge(
    df_abbr[["state_name_key", "state"]], on="state_name_key", how="left"
)
df_pop_long = df_pop_long.dropna(subset=["state", "population"])
df_pop_long = df_pop_long[["state", "year", "population"]].copy()

# NSF Data Prep
df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)
q6_grants = (
    df_grants.dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)

# Merge All
q6_df = q6_grants.merge(df_pop_long, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

# CALCULATE NATIONAL AVERAGES
us_avg = q6_df.groupby("year")["funding_per_capita"].mean().reset_index()
us_avg = us_avg.rename(columns={"funding_per_capita": "us_avg_per_capita"})
q6_df = q6_df.merge(us_avg, on="year", how="left")


# 2. INTERACTION SETUP
min_year = int(q6_df["year"].min())
max_year = int(q6_df["year"].max())

slider = alt.binding_range(min=min_year, max=max_year, step=1, name="Select Year: ")

try:
    year_select = alt.selection_point(
        name="year_select", fields=["year"], bind=slider, value=[{"year": max_year}]
    )
    state_select = alt.selection_point(
        name="state_select", fields=["state"], empty="all", on="click", clear="dblclick"
    )
except AttributeError:
    year_select = alt.selection_single(
        name="year_select", fields=["year"], bind=slider, init={"year": max_year}
    )
    state_select = alt.selection_single(fields=["state"], empty="all")


# 3. LEFT CHART: SCATTER PLOT
base_scatter = alt.Chart(q6_df).transform_filter(year_select)

points = (
    base_scatter.mark_circle(size=120, opacity=0.8, stroke="white", strokeWidth=1)
    .encode(
        x=alt.X("population:Q", title="State Population", axis=alt.Axis(format="~s")),
        y=alt.Y(
            "funding_per_capita:Q",
            title="Funding Per Capita ($)",
            axis=alt.Axis(format="$,.0f"),
        ),
        color=alt.condition(
            state_select,
            alt.Color(
                "funding_per_capita:Q", scale=alt.Scale(scheme="viridis"), legend=None
            ),
            alt.value("lightgray"),
        ),
        size=alt.condition(state_select, alt.value(150), alt.value(80)),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("population:Q", format=",.0f"),
            alt.Tooltip("total_amount:Q", format="$,.0f", title="Total Funding"),
            alt.Tooltip("funding_per_capita:Q", format="$,.2f", title="Per Capita"),
        ],
    )
    .add_params(state_select, year_select)
)

rule = base_scatter.mark_rule(color="red", strokeDash=[5, 5], size=2).encode(
    y="mean(us_avg_per_capita):Q",
    tooltip=[
        alt.Tooltip(
            "mean(us_avg_per_capita):Q", format="$,.2f", title="National Average"
        )
    ],
)

rule_text = base_scatter.mark_text(
    align="left", dx=5, dy=-5, color="red", fontWeight="bold"
).encode(
    y=alt.Y("mean(us_avg_per_capita):Q"), x=alt.value(0), text=alt.value("National Avg")
)

left_chart = (points + rule + rule_text).properties(
    width=500, height=400, title="Efficiency Matrix: Population vs. Funding Intensity"
)


# 4. RIGHT PANEL: DETAILS & HISTORY

# A. Trend Comparison
history_base = alt.Chart(q6_df).transform_filter(state_select)

state_line = history_base.mark_line(point=True, strokeWidth=4, color="#440154").encode(
    x=alt.X("year:O", title="Year"),
    y=alt.Y("funding_per_capita:Q", title="$/Person"),
    tooltip=["year", alt.Tooltip("funding_per_capita", format="$,.2f")],
)

avg_line = (
    alt.Chart(us_avg)
    .mark_line(strokeDash=[5, 5], color="red", opacity=0.5)
    .encode(x=alt.X("year:O"), y=alt.Y("us_avg_per_capita:Q"))
)

# We wrap this in a filter so it disappears if nothing is selected
history_chart = (
    (avg_line + state_line)
    .transform_filter(state_select)
    .properties(
        width=350, height=200, title="History: Selected State vs. National Avg (Red)"
    )
)

# B. KPI Block
kpi_base = alt.Chart(q6_df).transform_filter(year_select).transform_filter(state_select)


def make_kpi(label, value_col, fmt, y_pos):
    lbl = kpi_base.mark_text(align="center", color="#666", fontSize=12).encode(
        text=alt.value(label), x=alt.value(175), y=alt.value(y_pos)
    )
    val = kpi_base.mark_text(
        align="center", color="#333", fontSize=20, fontWeight="bold"
    ).encode(
        text=alt.Text(value_col, format=fmt), x=alt.value(175), y=alt.value(y_pos + 20)
    )
    return lbl + val


kpis = (
    alt.Chart(pd.DataFrame({"x": [0]}))
    .mark_rect(opacity=0)
    .properties(width=350, height=180)
    + make_kpi("State Population", "mean(population):Q", ",.0f", 20)
    + make_kpi("Total Funding Received", "sum(total_amount):Q", "$,.2s", 80)
    + make_kpi("Per Capita Funding", "mean(funding_per_capita):Q", "$,.2f", 140)
)


# 5. ASSEMBLE
# We just stack them vertically. If no state is selected, they will simply be blank/empty charts, which is fine.
right_panel = alt.vconcat(history_chart, kpis, spacing=20)

final_q6 = (
    (left_chart | right_panel).configure_view(stroke=None).configure_concat(spacing=20)
)

final_q6

comment

# Final Visualization

In [131]:
import altair as alt
import pandas as pd
import itertools

# 0. GLOBAL SETUP
alt.data_transformers.enable("default")

# --- LOAD DATA ONCE ---
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")
df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean columns globally
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()
df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)

# ==============================================================================
# Q1: GRANTS BY STATE (Bar + History + KPI)
# ==============================================================================

# Data
q1_yearly = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total = (
    df_grants.groupby(["state"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total["year"] = 0
q1_full = pd.concat([q1_yearly, q1_total], ignore_index=True)

# Interaction
q1_year_options = [0] + sorted(q1_yearly["year"].unique())
q1_input = alt.binding_select(
    options=q1_year_options,
    labels=["All Years (Total)"] + [str(y) for y in q1_year_options[1:]],
    name="Q1 Year: ",
)
q1_year_select = alt.selection_point(
    fields=["year"], bind=q1_input, value=[{"year": 0}], name="q1_year_sel"
)
q1_state_select = alt.selection_point(
    fields=["state"], empty="all", name="q1_state_sel"
)

# Charts
q1_bars = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Number of Grants"),
        color=alt.condition(
            q1_state_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "state",
            "year",
            "grants_count",
            alt.Tooltip("total_amount", format="$,.0f"),
        ],
    )
    .add_params(q1_year_select, q1_state_select)
    .transform_filter(q1_year_select)
    .properties(width=500, height=400, title="Q1: Grants Distribution by State")
)

q1_trend = (
    alt.Chart(q1_yearly)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Total Amount ($)", axis=alt.Axis(format="~s")),
        color=alt.value("#4c78a8"),
        tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
    )
    .transform_filter(q1_state_select)
    .properties(width=250, height=180, title="History (Selected State)")
)

q1_kpi_base = (
    alt.Chart(q1_full)
    .transform_filter(q1_year_select)
    .transform_filter(q1_state_select)
)
q1_kpi = (
    q1_kpi_base.mark_text(color="#888", dy=-15).encode(
        text=alt.value("Total Funding"), x=alt.value(110), y=alt.value(75)
    )
    + q1_kpi_base.mark_text(color="#444", fontSize=24, fontWeight="bold", dy=15).encode(
        text=alt.Text("sum(total_amount):Q", format="$,.0f"),
        x=alt.value(110),
        y=alt.value(75),
    )
).properties(width=225, height=155)

# Structure only (No config yet)
final_q1 = q1_bars | (q1_trend & q1_kpi)


# ==============================================================================
# Q2: GRANTS BY DIRECTORATE (Bar + Trend + KPI)
# ==============================================================================

# Data
q2_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total = (
    df_grants.groupby(["directorate"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total["year"] = 0
q2_full = pd.concat([q2_yearly, q2_total], ignore_index=True)

# Interaction
q2_year_options = [0] + sorted(q2_yearly["year"].unique())
q2_input = alt.binding_select(
    options=q2_year_options,
    labels=["All Years (Total)"] + [str(y) for y in q2_year_options[1:]],
    name="Q2 Year: ",
)
q2_year_select = alt.selection_point(
    fields=["year"], bind=q2_input, value=[{"year": 0}], name="q2_year_sel"
)
q2_dir_select = alt.selection_point(
    fields=["directorate"], empty="all", name="q2_dir_sel"
)

# Charts
q2_bars = (
    alt.Chart(q2_full)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Number of Grants"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            q2_dir_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate",
            "year",
            "grants_count",
            alt.Tooltip("total_amount", format="$,.0f"),
        ],
    )
    .add_params(q2_dir_select, q2_year_select)
    .transform_filter(q2_year_select)
    .properties(width=450, height=550, title="Q2: Grants by Directorate")
)

q2_trend = (
    alt.Chart(q2_yearly)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "total_amount:Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        color=alt.value("#4c78a8"),
        tooltip=["directorate", "year", alt.Tooltip("total_amount", format="$,.0f")],
    )
    .transform_filter(q2_dir_select)
    .properties(width=300, height=220, title="Funding History")
)

q2_kpi_base = (
    alt.Chart(q2_full).transform_filter(q2_year_select).transform_filter(q2_dir_select)
)
q2_kpi = (
    q2_kpi_base.mark_text(color="#888", dy=-15).encode(
        text=alt.value("Total Funding"), x=alt.value(100), y=alt.value(75)
    )
    + q2_kpi_base.mark_text(color="#444", fontSize=24, fontWeight="bold", dy=15).encode(
        text=alt.Text("sum(total_amount):Q", format="$,.0f"),
        x=alt.value(100),
        y=alt.value(75),
    )
).properties(width=300, height=100)

q2_spacer = alt.Chart(q2_full).mark_rect(opacity=0).encode().properties(height=30)
q2_legend = (
    alt.Chart(q2_full)
    .mark_circle(opacity=0)
    .encode(color=alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues")))
    .transform_filter(q2_year_select)
    .properties(width=300, height=40)
)

# Structure only
final_q2 = q2_bars | alt.vconcat(q2_spacer, q2_trend, q2_kpi, q2_legend, spacing=5)


# ==============================================================================
# Q3: CANCELLATIONS (Ranking + Scatter)
# ==============================================================================

# Data
base_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(base_count=("award_id", "count"))
    .reset_index()
)
cancel_yearly = (
    df_trump.groupby(["directorate", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
yearly_df = base_yearly.merge(
    cancel_yearly, on=["directorate", "year"], how="outer"
).fillna(0)

base_total = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"))
    .reset_index()
)
base_total["year"] = 0
cancel_total = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
cancel_total["year"] = 0
total_df = base_total.merge(
    cancel_total, on=["directorate", "year"], how="outer"
).fillna(0)

base_total_fixed = base_total[["directorate", "base_count"]].rename(
    columns={"base_count": "static_base_count"}
)
yearly_df = yearly_df.merge(base_total_fixed, on="directorate", how="left").fillna(0)
total_rows = total_df.copy()
total_rows["static_base_count"] = total_rows["base_count"]

q3_full = pd.concat([yearly_df, total_rows], ignore_index=True)
q3_full = q3_full[q3_full["year"].isin([0, 2018, 2019, 2020, 2021])]
q3_plot = q3_full[
    (q3_full["static_base_count"] > 0) | (q3_full["cancelled_count"] > 0)
].copy()
q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)

# Interaction
q3_input = alt.binding_select(
    options=[0, 2018, 2019, 2020, 2021],
    labels=["All Years", "2018", "2019", "2020", "2021"],
    name="Q3 Year: ",
)
q3_year_select = alt.selection_point(
    fields=["year"], bind=q3_input, value=[{"year": 0}], name="q3_year_sel"
)
q3_dir_select = alt.selection_point(
    fields=["directorate"], empty="all", name="q3_dir_sel"
)

# Charts
q3_bars = (
    alt.Chart(q3_plot)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Number of Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            q3_dir_select,
            alt.Color(
                "cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None
            ),
            alt.value("#f0f0f0"),
        ),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .add_params(q3_dir_select, q3_year_select)
    .transform_filter(q3_year_select)
    .properties(width=250, height=450, title="Q3: Cancellation Ranking")
)

q3_scatter = (
    alt.Chart(q3_plot)
    .mark_circle(stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X("static_base_count:Q", title="Directorate Size (Total Grants)"),
        y=alt.Y("cancelled_count:Q", title="Cancellations (Selected Year)"),
        size=alt.Size(
            "cancelled_amount:Q", scale=alt.Scale(range=[50, 500]), legend=None
        ),
        color=alt.condition(q3_dir_select, alt.value("#4c78a8"), alt.value("#f0f0f0")),
        tooltip=["directorate", "year", "static_base_count", "cancelled_count"],
    )
    .add_params(q3_dir_select, q3_year_select)
    .transform_filter(q3_year_select)
    .interactive()
    .properties(width=400, height=250, title="Context: Volume vs. Cancellations")
)

q3_trend = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(q3_dir_select)
    .properties(width=400, height=150, title="Timeline: When did it happen?")
)

# Structure only
final_q3 = q3_bars | alt.vconcat(q3_scatter, q3_trend, spacing=10)


# ==============================================================================
# Q4: EVOLUTION OF FUNDING
# ==============================================================================

q4_df = (
    df_grants.groupby(["year", "state", "directorate"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)
states = sorted(q4_df["state"].unique())
q4_state_sel = alt.selection_point(
    fields=["state"],
    bind=alt.binding_select(
        options=[None] + states, labels=["All States"] + states, name="Q4 State: "
    ),
    name="q4_state_sel",
)
dirs = sorted(q4_df["directorate"].unique())
q4_dir_sel = alt.selection_point(
    fields=["directorate"],
    bind=alt.binding_select(
        options=[None] + dirs,
        labels=["All Directorates"] + dirs,
        name="Q4 Directorate: ",
    ),
    name="q4_dir_sel",
)

q4_area = (
    alt.Chart(q4_df)
    .mark_area(
        line={"color": "#4c78a8"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="#4c78a8", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
        opacity=0.6,
    )
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y(
            "sum(total_amount):Q", title="Total Funding ($)", axis=alt.Axis(format="~s")
        ),
        tooltip=["year", alt.Tooltip("sum(total_amount):Q", format="$,.0f")],
    )
    .add_params(q4_state_sel, q4_dir_sel)
    .transform_filter(q4_state_sel)
    .transform_filter(q4_dir_sel)
    .properties(width=600, height=300, title="Q4: Evolution of Funding")
)

q4_kpi_base = (
    alt.Chart(q4_df).transform_filter(q4_state_sel).transform_filter(q4_dir_sel)
)
q4_kpi1 = (
    q4_kpi_base.mark_text(color="gray", dy=-20).encode(text=alt.value("Total Funding"))
    + q4_kpi_base.mark_text(color="#4c78a8", fontSize=24, fontWeight="bold").encode(
        text=alt.Text("sum(total_amount):Q", format="$,.2s")
    )
).properties(width=150, height=80)
q4_kpi2 = (
    q4_kpi_base.mark_text(color="gray", dy=-20).encode(text=alt.value("Total Grants"))
    + q4_kpi_base.mark_text(color="#4c78a8", fontSize=24, fontWeight="bold").encode(
        text=alt.Text("sum(grants_count):Q", format=",")
    )
).properties(width=150, height=80)

# Structure only
final_q4 = q4_area | alt.vconcat(q4_kpi1, q4_kpi2, spacing=20)


# ==============================================================================
# Q5: STATE EVOLUTION & CANCELLATIONS
# ==============================================================================

q5_grants_agg = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q5_trump_agg = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

all_states = pd.concat([q5_grants_agg["state"], q5_trump_agg["state"]]).unique()
all_years = pd.concat([q5_grants_agg["year"], q5_trump_agg["year"]]).unique()
q5_master = pd.DataFrame(
    list(itertools.product(all_states, all_years)), columns=["state", "year"]
)
q5_master = (
    q5_master.merge(q5_grants_agg, on=["state", "year"], how="left")
    .merge(q5_trump_agg, on=["state", "year"], how="left")
    .fillna(0)
)

q5_state_input = alt.binding_select(options=sorted(all_states), name="Q5 State: ")
q5_state_sel = alt.selection_point(
    fields=["state"], bind=q5_state_input, value=[{"state": "CA"}], name="q5_state_sel"
)

base_q5 = (
    alt.Chart(q5_master)
    .transform_filter(q5_state_sel)
    .encode(x=alt.X("year:O", title=None))
)
q5_top = (
    alt.layer(
        base_q5.mark_bar(color="#9ecae1", opacity=0.6).encode(
            y=alt.Y(
                "grants_count:Q",
                title="Grants Count",
                axis=alt.Axis(titleColor="#6baed6"),
            ),
            tooltip=["year", "grants_count"],
        ),
        base_q5.mark_line(color="#08519c", strokeWidth=3, point=True).encode(
            y=alt.Y(
                "total_amount:Q",
                title="Total Funding ($)",
                axis=alt.Axis(format="~s", titleColor="#08519c"),
            ),
            tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
        ),
    )
    .resolve_scale(y="independent")
    .properties(width=600, height=250, title="Q5: Evolution (Volume vs Value)")
)

q5_bot = (
    alt.layer(
        base_q5.mark_bar(color="#fc9272", opacity=0.6).encode(
            x=alt.X("year:O", title="Year"),
            y=alt.Y(
                "cancelled_count:Q",
                title="Cancelled Grants",
                axis=alt.Axis(titleColor="#fc9272"),
            ),
            tooltip=["year", "cancelled_count"],
        ),
        base_q5.mark_line(color="#de2d26", strokeWidth=3, point=True).encode(
            x=alt.X("year:O", title="Year"),
            y=alt.Y(
                "cancelled_amount:Q",
                title="Lost Funding ($)",
                axis=alt.Axis(format="~s", titleColor="#de2d26"),
            ),
            tooltip=["year", alt.Tooltip("cancelled_amount", format="$,.0f")],
        ),
    )
    .resolve_scale(y="independent")
    .properties(width=600, height=150, title="Impact: Cancellations")
)

# Structure only
final_q5 = (
    alt.vconcat(q5_top, q5_bot).add_params(q5_state_sel).configure_concat(spacing=5)
)  # Note: add_params here is fine as it's a vconcat unit, but configure_concat usually acts as global config.
# To be safe, removing configure_concat from individual chart
final_q5 = alt.vconcat(q5_top, q5_bot, spacing=5).add_params(q5_state_sel)


# ==============================================================================
# Q6: POPULATION
# ==============================================================================

df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()
pop_cols = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
df_pop_long = df_pop_raw.melt(
    id_vars=["state"], value_vars=pop_cols, var_name="year", value_name="population"
)
df_pop_long["year"] = (
    df_pop_long["year"].str.replace("pop_", "", regex=False).astype(int)
)
df_pop_long["population"] = pd.to_numeric(df_pop_long["population"], errors="coerce")
df_pop_long = df_pop_long[df_pop_long["year"].between(2020, 2024)].copy()
df_pop_long = df_pop_long.rename(columns={"state": "state_name"})
df_abbr = df_abbr_raw.rename(
    columns={
        [c for c in df_abbr_raw.columns if "name" in c.lower()][0]: "state_name",
        [c for c in df_abbr_raw.columns if "abbr" in c.lower()][0]: "state",
    }
)
df_abbr["state_name_key"] = df_abbr["state_name"].str.strip().str.lower()
df_pop_long["state_name_key"] = df_pop_long["state_name"].str.strip().str.lower()
df_pop_long = df_pop_long.merge(
    df_abbr[["state_name_key", "state"]], on="state_name_key", how="left"
).dropna(subset=["state", "population"])[["state", "year", "population"]]

q6_grants = (
    df_grants.dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)
q6_df = q6_grants.merge(df_pop_long, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

q6_min, q6_max = int(q6_df["year"].min()), int(q6_df["year"].max())
q6_slider = alt.binding_range(min=q6_min, max=q6_max, step=1, name="Q6 Year: ")
q6_year_sel = alt.selection_point(
    fields=["year"], bind=q6_slider, value=[{"year": q6_max}], name="q6_year_sel"
)
q6_state_sel = alt.selection_point(
    fields=["state"], empty="all", name="q6_state_sel", on="click", clear="dblclick"
)

q6_bars = (
    alt.Chart(q6_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("funding_per_capita:Q", title="$/Person"),
        color=alt.Color(
            "funding_per_capita:Q", scale=alt.Scale(scheme="purples"), legend=None
        ),
        stroke=alt.condition(q6_state_sel, alt.value("black"), alt.value(None)),
        strokeWidth=alt.condition(q6_state_sel, alt.value(1.5), alt.value(0)),
        tooltip=["state", "year", alt.Tooltip("funding_per_capita", format="$,.2f")],
    )
    .add_params(q6_year_sel, q6_state_sel)
    .transform_filter(q6_year_sel)
    .properties(width=560, height=420, title="Q6: Funding Per Capita")
)

q6_hist = (
    alt.Chart(q6_df)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x="year:O",
        y=alt.Y("funding_per_capita:Q", title="$/Person"),
        color=alt.value("#6a3d9a"),
        tooltip=["year", alt.Tooltip("funding_per_capita", format="$,.2f")],
    )
    .transform_filter(q6_state_sel)
    .properties(width=340, height=200, title="History (Selected State)")
)

q6_kpi_base = (
    alt.Chart(q6_df).transform_filter(q6_year_sel).transform_filter(q6_state_sel)
)
q6_kpi1 = q6_kpi_base.mark_text(color="#888", dy=-10).encode(
    text=alt.value("Per Capita"), x=alt.value(170), y=alt.value(60)
) + q6_kpi_base.mark_text(color="#333", fontSize=22, fontWeight="bold", dy=12).encode(
    text=alt.Text("mean(funding_per_capita):Q", format="$,.2f"),
    x=alt.value(170),
    y=alt.value(60),
)
q6_kpi2 = q6_kpi_base.mark_text(color="#888", dy=-10).encode(
    text=alt.value("Total Funding"), x=alt.value(170), y=alt.value(135)
) + q6_kpi_base.mark_text(color="#333", fontSize=22, fontWeight="bold", dy=12).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    x=alt.value(170),
    y=alt.value(135),
)
q6_kpi_panel = (
    alt.Chart(pd.DataFrame({"x": [0]}))
    .mark_rect(opacity=0)
    .encode()
    .properties(width=340, height=220)
    + q6_kpi1
    + q6_kpi2
)

q6_right = alt.vconcat(
    alt.layer(
        alt.Chart(pd.DataFrame({"t": ["Select a State"]}))
        .mark_text(color="#ccc")
        .encode(text="t", x=alt.value(170), y=alt.value(100))
        .properties(width=340, height=200)
        .transform_filter(~q6_state_sel),
        q6_hist.transform_filter(q6_state_sel),
    ),
    alt.layer(
        alt.Chart(pd.DataFrame({"t": [""]}))
        .mark_text()
        .encode(text="t")
        .properties(width=340, height=220)
        .transform_filter(~q6_state_sel),
        q6_kpi_panel.transform_filter(q6_state_sel),
    ),
)

# Structure only
final_q6 = q6_bars | q6_right


# ==============================================================================
# FINAL ASSEMBLY (Global Config)
# ==============================================================================

dashboard = (
    alt.vconcat(final_q1, final_q2, final_q3, final_q4, final_q5, final_q6)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=60)
)

dashboard

In [140]:
import altair as alt
import pandas as pd
import itertools

# 0. GLOBAL SETUP
alt.data_transformers.enable("default")

# --- LOAD DATA ONCE ---
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")
df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean columns globally
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()
df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)

# ==============================================================================
# Q1: GRANTS BY STATE
# ==============================================================================
q1_yearly = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total = (
    df_grants.groupby(["state"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total["year"] = 0
q1_full = pd.concat([q1_yearly, q1_total], ignore_index=True)

q1_year_options = [0] + sorted(q1_yearly["year"].unique())
q1_input = alt.binding_select(
    options=q1_year_options,
    labels=["All Years"] + [str(y) for y in q1_year_options[1:]],
    name="Q1 Year: ",
)
q1_year_select = alt.selection_point(
    fields=["year"], bind=q1_input, value=[{"year": 0}], name="q1_year_sel"
)
q1_state_select = alt.selection_point(
    fields=["state"], empty="all", name="q1_state_sel"
)

q1_bars = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title=None),  # Compact axis
        y=alt.Y("grants_count:Q", title="Grants"),
        color=alt.condition(
            q1_state_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=["state", "year", "grants_count"],
    )
    .add_params(q1_year_select, q1_state_select)
    .transform_filter(q1_year_select)
    .properties(width=350, height=300, title="Q1: Grants by State")
)

q1_trend = (
    alt.Chart(q1_yearly)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", axis=alt.Axis(format="~s"), title="Funding ($)"),
        color=alt.value("#4c78a8"),
        tooltip=["year", "total_amount"],
    )
    .transform_filter(q1_state_select)
    .properties(width=200, height=140, title="History")
)

q1_kpi_text = (
    alt.Chart(q1_full)
    .transform_filter(q1_year_select)
    .transform_filter(q1_state_select)
    .mark_text(color="#444", fontSize=18, fontWeight="bold")
    .encode(
        text=alt.Text("sum(total_amount):Q", format="$,.2s"),
        y=alt.value(20),
        x=alt.value(100),
    )
    .properties(width=200, height=40)
)

final_q1 = (q1_bars | (q1_kpi_text & q1_trend)).resolve_scale(color="independent")


# ==============================================================================
# Q2: GRANTS BY DIRECTORATE
# ==============================================================================
q2_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total = (
    df_grants.groupby(["directorate"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total["year"] = 0
q2_full = pd.concat([q2_yearly, q2_total], ignore_index=True)

q2_input = alt.binding_select(
    options=q1_year_options,
    labels=["All Years"] + [str(y) for y in q1_year_options[1:]],
    name="Q2 Year: ",
)
q2_year_select = alt.selection_point(
    fields=["year"], bind=q2_input, value=[{"year": 0}], name="q2_year_sel"
)
q2_dir_select = alt.selection_point(
    fields=["directorate"], empty="all", name="q2_dir_sel"
)

q2_bars = (
    alt.Chart(q2_full)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Grants"),
        y=alt.Y("directorate:N", sort="-x", title=None),
        color=alt.condition(
            q2_dir_select,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=["directorate", "grants_count"],
    )
    .add_params(q2_dir_select, q2_year_select)
    .transform_filter(q2_year_select)
    .properties(width=280, height=350, title="Q2: By Directorate")
)

q2_trend = (
    alt.Chart(q2_yearly)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", axis=alt.Axis(format="~s"), title="Funding ($)"),
        color=alt.value("#4c78a8"),
    )
    .transform_filter(q2_dir_select)
    .properties(width=220, height=150, title="Funding Trend")
)

final_q2 = (q2_bars | q2_trend).resolve_scale(color="independent")


# ==============================================================================
# Q3: CANCELLATIONS
# ==============================================================================
base_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(base_count=("award_id", "count"))
    .reset_index()
)
cancel_yearly = (
    df_trump.groupby(["directorate", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
yearly_df = base_yearly.merge(
    cancel_yearly, on=["directorate", "year"], how="outer"
).fillna(0)
base_total = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"))
    .reset_index()
)
base_total["year"] = 0
cancel_total = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
cancel_total["year"] = 0
total_df = base_total.merge(
    cancel_total, on=["directorate", "year"], how="outer"
).fillna(0)
base_total_fixed = base_total[["directorate", "base_count"]].rename(
    columns={"base_count": "static_base_count"}
)
yearly_df = yearly_df.merge(base_total_fixed, on="directorate", how="left").fillna(0)
total_rows = total_df.copy()
total_rows["static_base_count"] = total_rows["base_count"]
q3_full = pd.concat([yearly_df, total_rows], ignore_index=True)
q3_full = q3_full[q3_full["year"].isin([0, 2018, 2019, 2020, 2021])]
q3_plot = q3_full[
    (q3_full["static_base_count"] > 0) | (q3_full["cancelled_count"] > 0)
].copy()

q3_input = alt.binding_select(
    options=[0, 2018, 2019, 2020, 2021],
    labels=["All", "2018", "2019", "2020", "2021"],
    name="Q3 Year: ",
)
q3_year_select = alt.selection_point(
    fields=["year"], bind=q3_input, value=[{"year": 0}], name="q3_year_sel"
)
q3_dir_select = alt.selection_point(
    fields=["directorate"], empty="all", name="q3_dir_sel"
)

q3_bars = (
    alt.Chart(q3_plot)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title=None),
        color=alt.condition(
            q3_dir_select,
            alt.Color(
                "cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None
            ),
            alt.value("#f0f0f0"),
        ),
    )
    .add_params(q3_dir_select, q3_year_select)
    .transform_filter(q3_year_select)
    .properties(width=200, height=350, title="Q3: Cancelled")
)

q3_scatter = (
    alt.Chart(q3_plot)
    .mark_circle(stroke="black")
    .encode(
        x=alt.X("static_base_count:Q", title="Size (Total Grants)"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        size=alt.Size("cancelled_amount:Q", legend=None),
        color=alt.condition(q3_dir_select, alt.value("#4c78a8"), alt.value("#f0f0f0")),
        tooltip=["directorate", "cancelled_count"],
    )
    .add_params(q3_dir_select, q3_year_select)
    .transform_filter(q3_year_select)
    .interactive()
    .properties(width=300, height=200, title="Scale vs. Hits")
)

final_q3 = (q3_bars | q3_scatter).resolve_scale(color="independent")


# ==============================================================================
# Q4: EVOLUTION OF FUNDING
# ==============================================================================
q4_df = (
    df_grants.groupby(["year", "state", "directorate"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)
states = sorted(q4_df["state"].unique())
q4_state_sel = alt.selection_point(
    fields=["state"],
    bind=alt.binding_select(
        options=[None] + states, labels=["All"] + states, name="Q4 State: "
    ),
    name="q4_state_sel",
)
dirs = sorted(q4_df["directorate"].unique())
q4_dir_sel = alt.selection_point(
    fields=["directorate"],
    bind=alt.binding_select(
        options=[None] + dirs, labels=["All"] + dirs, name="Q4 Directorate: "
    ),
    name="q4_dir_sel",
)

q4_area = (
    alt.Chart(q4_df)
    .mark_area(
        line={"color": "#4c78a8"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="#4c78a8", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
        opacity=0.6,
    )
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("sum(total_amount):Q", title="Funding ($)", axis=alt.Axis(format="~s")),
        tooltip=["year", alt.Tooltip("sum(total_amount):Q", format="$,.0f")],
    )
    .add_params(q4_state_sel, q4_dir_sel)
    .transform_filter(q4_state_sel)
    .transform_filter(q4_dir_sel)
    .properties(width=500, height=250, title="Q4: Funding Evolution")
)

q4_kpi = (
    alt.Chart(q4_df)
    .transform_filter(q4_state_sel)
    .transform_filter(q4_dir_sel)
    .mark_text(color="#4c78a8", fontSize=24, fontWeight="bold")
    .encode(text=alt.Text("sum(total_amount):Q", format="$,.2s"))
    .properties(width=100, height=50)
)

final_q4 = (q4_area | q4_kpi).resolve_scale(color="independent")


# ==============================================================================
# Q5: STATE EVOLUTION & CANCELLATIONS
# ==============================================================================
q5_grants_agg = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q5_trump_agg = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
all_states = pd.concat([q5_grants_agg["state"], q5_trump_agg["state"]]).unique()
all_years = pd.concat([q5_grants_agg["year"], q5_trump_agg["year"]]).unique()
q5_master = pd.DataFrame(
    list(itertools.product(all_states, all_years)), columns=["state", "year"]
)
q5_master = (
    q5_master.merge(q5_grants_agg, on=["state", "year"], how="left")
    .merge(q5_trump_agg, on=["state", "year"], how="left")
    .fillna(0)
)
q5_state_input = alt.binding_select(options=sorted(all_states), name="Q5 State: ")
q5_state_sel = alt.selection_point(
    fields=["state"], bind=q5_state_input, value=[{"state": "CA"}], name="q5_state_sel"
)

base_q5 = (
    alt.Chart(q5_master)
    .transform_filter(q5_state_sel)
    .encode(x=alt.X("year:O", title=None))
)
q5_top = (
    alt.layer(
        base_q5.mark_bar(color="#9ecae1").encode(
            y=alt.Y("grants_count:Q", title="Grants")
        ),
        base_q5.mark_line(color="#08519c").encode(
            y=alt.Y("total_amount:Q", title="Funding ($)", axis=alt.Axis(format="~s"))
        ),
    )
    .resolve_scale(y="independent")
    .properties(width=550, height=180, title="Q5: Evolution")
)

q5_bot = (
    alt.layer(
        base_q5.mark_bar(color="#fc9272").encode(
            x=alt.X("year:O"), y=alt.Y("cancelled_count:Q", title="Cancelled")
        ),
        base_q5.mark_line(color="#de2d26").encode(
            x=alt.X("year:O"),
            y=alt.Y("cancelled_amount:Q", title="Lost ($)", axis=alt.Axis(format="~s")),
        ),
    )
    .resolve_scale(y="independent")
    .properties(width=550, height=120, title="Impact")
)

final_q5 = alt.vconcat(q5_top, q5_bot, spacing=5).add_params(q5_state_sel)


# ==============================================================================
# Q6: POPULATION
# ==============================================================================
df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()
pop_cols = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
df_pop_long = df_pop_raw.melt(
    id_vars=["state"], value_vars=pop_cols, var_name="year", value_name="population"
)
df_pop_long["year"] = (
    df_pop_long["year"].str.replace("pop_", "", regex=False).astype(int)
)
df_pop_long["population"] = pd.to_numeric(df_pop_long["population"], errors="coerce")
df_pop_long = df_pop_long[df_pop_long["year"].between(2020, 2024)].copy()
df_pop_long = df_pop_long.rename(columns={"state": "state_name"})
df_abbr = df_abbr_raw.rename(
    columns={
        [c for c in df_abbr_raw.columns if "name" in c.lower()][0]: "state_name",
        [c for c in df_abbr_raw.columns if "abbr" in c.lower()][0]: "state",
    }
)
df_abbr["state_name_key"] = df_abbr["state_name"].str.strip().str.lower()
df_pop_long["state_name_key"] = df_pop_long["state_name"].str.strip().str.lower()
df_pop_long = df_pop_long.merge(
    df_abbr[["state_name_key", "state"]], on="state_name_key", how="left"
).dropna(subset=["state", "population"])[["state", "year", "population"]]

q6_grants = (
    df_grants.dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)
q6_df = q6_grants.merge(df_pop_long, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

q6_min, q6_max = int(q6_df["year"].min()), int(q6_df["year"].max())
q6_slider = alt.binding_range(min=q6_min, max=q6_max, step=1, name="Q6 Year: ")
q6_year_sel = alt.selection_point(
    fields=["year"], bind=q6_slider, value=[{"year": q6_max}], name="q6_year_sel"
)
q6_state_sel = alt.selection_point(
    fields=["state"], empty="all", name="q6_state_sel", on="click", clear="dblclick"
)

q6_bars = (
    alt.Chart(q6_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title=None),
        y=alt.Y("funding_per_capita:Q", title="$/Person"),
        color=alt.Color(
            "funding_per_capita:Q", scale=alt.Scale(scheme="purples"), legend=None
        ),
        stroke=alt.condition(q6_state_sel, alt.value("black"), alt.value(None)),
        strokeWidth=alt.condition(q6_state_sel, alt.value(1.5), alt.value(0)),
    )
    .add_params(q6_year_sel, q6_state_sel)
    .transform_filter(q6_year_sel)
    .properties(width=350, height=300, title="Q6: Per Capita")
)

q6_hist = (
    alt.Chart(q6_df)
    .mark_line(point=True)
    .encode(
        x="year:O",
        y=alt.Y("funding_per_capita:Q", title="$/Person"),
        color=alt.value("#6a3d9a"),
    )
    .transform_filter(q6_state_sel)
    .properties(width=200, height=150, title="History")
)

final_q6 = (q6_bars | q6_hist).resolve_scale(color="independent")


# ==============================================================================
# MASTER TABS LAYOUT (The Compact Fix)
# ==============================================================================

# We create a tabbed interface by using .tab() or creating a master selection.
# Since Altair standard VConcat is huge, we use the "Tabs" layout.

dashboard = (
    alt.vconcat(
        alt.hconcat(final_q1, final_q2).resolve_scale(color="independent"),
        alt.hconcat(final_q3, final_q4).resolve_scale(color="independent"),
        alt.hconcat(final_q5, final_q6).resolve_scale(color="independent"),
    )
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)

dashboard

In [139]:
import altair as alt
import pandas as pd
import itertools

# 0. SETUP
alt.data_transformers.enable("default")

# Load all necessary data
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")
df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean columns
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()
df_pop_raw.columns = df_pop_raw.columns.str.strip()
df_abbr_raw.columns = df_abbr_raw.columns.str.strip()

# ============================================================================
# Q1: GRANTS BY STATE
# ============================================================================
q1_yearly = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total = (
    df_grants.groupby(["state"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q1_total["year"] = 0
q1_full = pd.concat([q1_yearly, q1_total], ignore_index=True)

years = sorted(q1_yearly["year"].unique())
year_options_q1 = [0] + years
year_labels_q1 = ["All Years (Total)"] + [str(y) for y in years]
input_element_q1 = alt.binding_select(
    options=year_options_q1, labels=year_labels_q1, name="Year: "
)

year_select_q1 = alt.selection_point(
    name="year_select_q1", fields=["year"], bind=input_element_q1, value=[{"year": 0}]
)
state_select_q1 = alt.selection_point(
    name="state_select_q1", fields=["state"], empty="all"
)

bars_q1 = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("grants_count:Q", title="Grants"),
        color=alt.condition(
            state_select_q1,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "state:N",
            "year:O",
            "grants_count:Q",
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .add_params(year_select_q1, state_select_q1)
    .transform_filter(year_select_q1)
    .properties(width=300, height=250, title="Q1: Grants by State")
)

trend_q1 = (
    alt.Chart(q1_yearly)
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Amount ($)", axis=alt.Axis(format="~s")),
        color=alt.value("#4c78a8"),
        tooltip=["year:O", alt.Tooltip("total_amount:Q", format="$,.0f")],
    )
    .transform_filter(state_select_q1)
    .properties(width=150, height=120, title="History")
)

base_text_q1 = (
    alt.Chart(q1_full)
    .transform_filter(year_select_q1)
    .transform_filter(state_select_q1)
)
label_q1 = base_text_q1.mark_text(
    align="center", color="#888", fontSize=11, dy=-10
).encode(text=alt.value("Total Funding"), y=alt.value(50), x=alt.value(75))
value_q1 = base_text_q1.mark_text(
    align="center", color="#444", fontSize=18, fontWeight="bold", dy=10
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(50),
    x=alt.value(75),
)
kpi_q1 = (label_q1 + value_q1).properties(width=150, height=100)

right_col_q1 = trend_q1 & kpi_q1
final_q1 = (bars_q1 | right_col_q1).resolve_scale(color="independent")

# ============================================================================
# Q2: GRANTS BY DIRECTORATE
# ============================================================================
q2_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total = (
    df_grants.groupby(["directorate"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q2_total["year"] = 0
q2_full = pd.concat([q2_yearly, q2_total], ignore_index=True)

year_options_q2 = [0] + years
year_labels_q2 = ["All Years (Total)"] + [str(y) for y in years]
input_element_q2 = alt.binding_select(
    options=year_options_q2, labels=year_labels_q2, name="Year: "
)

year_select_q2 = alt.selection_point(
    name="year_select_q2", fields=["year"], bind=input_element_q2, value=[{"year": 0}]
)
dir_select_q2 = alt.selection_point(
    name="dir_select_q2", fields=["directorate"], empty="all"
)

bars_q2 = (
    alt.Chart(q2_full)
    .mark_bar()
    .encode(
        x=alt.X("grants_count:Q", title="Grants"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select_q2,
            alt.Color("grants_count:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate:N",
            "year:O",
            "grants_count:Q",
            alt.Tooltip("total_amount:Q", format=",.0f"),
        ],
    )
    .add_params(dir_select_q2, year_select_q2)
    .transform_filter(year_select_q2)
    .properties(width=280, height=350, title="Q2: Grants by Directorate")
)

trend_q2 = (
    alt.Chart(q2_yearly)
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("total_amount:Q", title="Funding ($)", axis=alt.Axis(format="~s")),
        color=alt.value("#4c78a8"),
        tooltip=[
            "directorate:N",
            "year:O",
            alt.Tooltip("total_amount:Q", format="$,.0f"),
        ],
    )
    .transform_filter(dir_select_q2)
    .properties(width=180, height=140, title="History")
)

base_text_q2 = (
    alt.Chart(q2_full).transform_filter(year_select_q2).transform_filter(dir_select_q2)
)
label_q2 = base_text_q2.mark_text(
    align="center", color="#888", fontSize=11, dy=-10
).encode(text=alt.value("Total Funding"), y=alt.value(40), x=alt.value(90))
value_q2 = base_text_q2.mark_text(
    align="center", color="#444", fontSize=18, fontWeight="bold", dy=10
).encode(
    text=alt.Text("sum(total_amount):Q", format="$,.0f"),
    y=alt.value(40),
    x=alt.value(90),
)
kpi_q2 = (label_q2 + value_q2).properties(width=180, height=80)

right_col_q2 = alt.vconcat(trend_q2, kpi_q2, spacing=5)
final_q2 = (bars_q2 | right_col_q2).resolve_scale(color="independent")

# ============================================================================
# Q3: TRUMP CANCELLATIONS
# ============================================================================
base_yearly_q3 = (
    df_grants.groupby(["directorate", "year"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)
cancel_yearly_q3 = (
    df_trump.groupby(["directorate", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
yearly_df_q3 = base_yearly_q3.merge(
    cancel_yearly_q3, on=["directorate", "year"], how="outer"
).fillna(0)

base_total_q3 = (
    df_grants.groupby(["directorate"])
    .agg(base_count=("award_id", "count"), base_amount=("award_amount", "sum"))
    .reset_index()
)
base_total_q3["year"] = 0
cancel_total_q3 = (
    df_trump.groupby(["directorate"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)
cancel_total_q3["year"] = 0
total_df_q3 = base_total_q3.merge(
    cancel_total_q3, on=["directorate", "year"], how="outer"
).fillna(0)

base_total_fixed_q3 = base_total_q3[["directorate", "base_count"]].rename(
    columns={"base_count": "static_base_count"}
)
yearly_df_q3 = yearly_df_q3.merge(
    base_total_fixed_q3, on="directorate", how="left"
).fillna(0)
total_rows_q3 = total_df_q3.copy()
total_rows_q3["static_base_count"] = total_rows_q3["base_count"]

q3_full = pd.concat([yearly_df_q3, total_rows_q3], ignore_index=True)
target_years_q3 = [0, 2018, 2019, 2020, 2021]
q3_full = q3_full[q3_full["year"].isin(target_years_q3)]
q3_plot_full = q3_full[
    (q3_full["static_base_count"] > 0) | (q3_full["cancelled_count"] > 0)
].copy()

q3_trend_data = (
    df_trump.groupby(["directorate", "year"])
    .agg(cancelled_count=("award_id", "count"))
    .reset_index()
)

year_options_q3 = [0, 2018, 2019, 2020, 2021]
year_labels_q3 = ["All Years", "2018", "2019", "2020", "2021"]
input_element_q3 = alt.binding_select(
    options=year_options_q3, labels=year_labels_q3, name="Year: "
)

year_select_q3 = alt.selection_point(
    name="year_select_q3", fields=["year"], bind=input_element_q3, value=[{"year": 0}]
)
dir_select_q3 = alt.selection_point(
    name="dir_select_q3", fields=["directorate"], empty="all"
)

bars_q3 = (
    alt.Chart(q3_plot_full)
    .mark_bar()
    .encode(
        x=alt.X("cancelled_count:Q", title="Cancellations"),
        y=alt.Y("directorate:N", sort="-x", title="Directorate"),
        color=alt.condition(
            dir_select_q3,
            alt.Color(
                "cancelled_count:Q", scale=alt.Scale(scheme="blues"), legend=None
            ),
            alt.value("#f0f0f0"),
        ),
        tooltip=[
            "directorate",
            "year",
            "cancelled_count",
            alt.Tooltip("cancelled_amount", format="$,.0f"),
        ],
    )
    .add_params(dir_select_q3, year_select_q3)
    .transform_filter(year_select_q3)
    .properties(width=180, height=280, title="Q3: Trump Cancellations")
)

scatter_q3 = (
    alt.Chart(q3_plot_full)
    .mark_circle(stroke="black", strokeWidth=0.5, opacity=0.8)
    .encode(
        x=alt.X("static_base_count:Q", title="Directorate Size"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        size=alt.Size(
            "cancelled_amount:Q", legend=None, scale=alt.Scale(range=[30, 300])
        ),
        color=alt.condition(dir_select_q3, alt.value("#4c78a8"), alt.value("#f0f0f0")),
        tooltip=[
            "directorate:N",
            "year:O",
            "static_base_count:Q",
            "cancelled_count:Q",
            alt.Tooltip("cancelled_amount:Q", format="$,.0f"),
        ],
    )
    .add_params(dir_select_q3, year_select_q3)
    .transform_filter(year_select_q3)
    .properties(width=250, height=160, title="Context")
    .interactive()
)

trend_q3 = (
    alt.Chart(q3_trend_data)
    .mark_line(point=True, color="#4c78a8")
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("cancelled_count:Q", title="Cancellations"),
        tooltip=["directorate", "year", "cancelled_count"],
    )
    .transform_filter(dir_select_q3)
    .properties(width=250, height=100, title="Timeline")
)

right_col_q3 = alt.vconcat(scatter_q3, trend_q3, spacing=5)
final_q3 = (bars_q3 | right_col_q3).resolve_scale(color="independent")

# ============================================================================
# Q4: EVOLUTION WITH FILTERS
# ============================================================================
q4_df = (
    df_grants.groupby(["year", "state", "directorate"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)

states = sorted(q4_df["state"].unique())
state_input_q4 = alt.binding_select(
    options=[None] + states, labels=["All States"] + states, name="State: "
)
state_select_q4 = alt.selection_point(fields=["state"], bind=state_input_q4)

dirs = sorted(q4_df["directorate"].unique())
dir_input_q4 = alt.binding_select(
    options=[None] + dirs, labels=["All Directorates"] + dirs, name="Directorate: "
)
dir_select_q4 = alt.selection_point(fields=["directorate"], bind=dir_input_q4)

evolution_q4 = (
    alt.Chart(q4_df)
    .mark_area(
        line={"color": "#4c78a8"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="#4c78a8", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
        opacity=0.6,
    )
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("sum(total_amount):Q", title="Funding ($)", axis=alt.Axis(format="~s")),
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("sum(total_amount):Q", format="$,.0f"),
            alt.Tooltip("sum(grants_count):Q"),
        ],
    )
    .add_params(state_select_q4, dir_select_q4)
    .transform_filter(state_select_q4)
    .transform_filter(dir_select_q4)
    .properties(width=350, height=180, title="Q4: Evolution (Filtered)")
)

points_q4 = (
    alt.Chart(q4_df)
    .mark_circle(size=40, color="#4c78a8")
    .encode(
        x="year:O",
        y="sum(total_amount):Q",
        tooltip=[
            alt.Tooltip("year:O"),
            alt.Tooltip("sum(total_amount):Q", format="$,.0f"),
            alt.Tooltip("sum(grants_count):Q"),
        ],
    )
    .transform_filter(state_select_q4)
    .transform_filter(dir_select_q4)
)

base_kpi_q4 = (
    alt.Chart(q4_df).transform_filter(state_select_q4).transform_filter(dir_select_q4)
)
kpi_fund_text_q4 = base_kpi_q4.mark_text(
    align="center", fontSize=18, fontWeight="bold", color="#4c78a8"
).encode(text=alt.Text("sum(total_amount):Q", format="$,.2s"))
kpi_fund_label_q4 = base_kpi_q4.mark_text(
    align="center", fontSize=10, color="gray", dy=-15
).encode(text=alt.value("Total Funding"))
kpi_fund_q4 = (kpi_fund_label_q4 + kpi_fund_text_q4).properties(width=100, height=50)

kpi_count_text_q4 = base_kpi_q4.mark_text(
    align="center", fontSize=18, fontWeight="bold", color="#4c78a8"
).encode(text=alt.Text("sum(grants_count):Q", format=","))
kpi_count_label_q4 = base_kpi_q4.mark_text(
    align="center", fontSize=10, color="gray", dy=-15
).encode(text=alt.value("Total Grants"))
kpi_count_q4 = (kpi_count_label_q4 + kpi_count_text_q4).properties(width=100, height=50)

chart_layer_q4 = evolution_q4 + points_q4
kpi_col_q4 = alt.vconcat(kpi_fund_q4, kpi_count_q4, spacing=15)
final_q4 = (chart_layer_q4 | kpi_col_q4).resolve_scale(color="independent")

# ============================================================================
# Q5: STATE EVOLUTION vs CANCELLATIONS
# ============================================================================
q5_grants = (
    df_grants.groupby(["state", "year"])
    .agg(grants_count=("award_id", "count"), total_amount=("award_amount", "sum"))
    .reset_index()
)
q5_trump_agg = (
    df_trump.groupby(["state", "year"])
    .agg(
        cancelled_count=("award_id", "count"), cancelled_amount=("award_amount", "sum")
    )
    .reset_index()
)

all_states_q5 = pd.concat([q5_grants["state"], q5_trump_agg["state"]]).unique()
all_years_q5 = pd.concat([q5_grants["year"], q5_trump_agg["year"]]).unique()
master_rows_q5 = list(itertools.product(all_states_q5, all_years_q5))
q5_master = pd.DataFrame(master_rows_q5, columns=["state", "year"])

q5_master = q5_master.merge(q5_grants, on=["state", "year"], how="left")
q5_master = q5_master.merge(q5_trump_agg, on=["state", "year"], how="left")
q5_master = q5_master.fillna(0)

states_list_q5 = sorted(all_states_q5)
state_input_q5 = alt.binding_select(options=states_list_q5, name="State: ")
state_select_q5 = alt.selection_point(
    fields=["state"], bind=state_input_q5, value=[{"state": "CA"}]
)

base_evolution_q5 = (
    alt.Chart(q5_master)
    .transform_filter(state_select_q5)
    .encode(x=alt.X("year:O", title=None))
)

bar_vol_q5 = base_evolution_q5.mark_bar(color="#9ecae1", opacity=0.6).encode(
    y=alt.Y("grants_count:Q", title="Grants", axis=alt.Axis(titleColor="#6baed6")),
    tooltip=["year", "grants_count"],
)
line_val_q5 = base_evolution_q5.mark_line(
    color="#08519c", strokeWidth=2, point=True
).encode(
    y=alt.Y(
        "total_amount:Q",
        title="Funding ($)",
        axis=alt.Axis(format="~s", titleColor="#08519c"),
    ),
    tooltip=["year", alt.Tooltip("total_amount", format="$,.0f")],
)

evolution_chart_q5 = (
    alt.layer(bar_vol_q5, line_val_q5)
    .resolve_scale(y="independent")
    .properties(width=450, height=160, title="Q5: Evolution vs Impact")
)

base_cancel_q5 = (
    alt.Chart(q5_master)
    .transform_filter(state_select_q5)
    .encode(x=alt.X("year:O", title="Year"))
)

cancel_bar_q5 = base_cancel_q5.mark_bar(color="#fc9272", opacity=0.6).encode(
    y=alt.Y(
        "cancelled_count:Q", title="Cancelled", axis=alt.Axis(titleColor="#fc9272")
    ),
    tooltip=["year", "cancelled_count"],
)
cancel_line_q5 = base_cancel_q5.mark_line(
    color="#de2d26", strokeWidth=2, point=True
).encode(
    y=alt.Y(
        "cancelled_amount:Q",
        title="Lost ($)",
        axis=alt.Axis(format="~s", titleColor="#de2d26"),
    ),
    tooltip=["year", alt.Tooltip("cancelled_amount", format="$,.0f")],
)

cancel_chart_q5 = (
    alt.layer(cancel_bar_q5, cancel_line_q5)
    .resolve_scale(y="independent")
    .properties(width=450, height=100, title="Cancellations")
)

final_q5 = (
    alt.vconcat(evolution_chart_q5, cancel_chart_q5, spacing=3)
    .add_params(state_select_q5)
)

# ============================================================================
# Q6: FUNDING PER CAPITA
# ============================================================================
pop_cols_q6 = [c for c in df_pop_raw.columns if c.lower().startswith("pop_")]
df_pop_long_q6 = df_pop_raw.melt(
    id_vars=["state"], value_vars=pop_cols_q6, var_name="year", value_name="population"
)
df_pop_long_q6["year"] = (
    df_pop_long_q6["year"].str.replace("pop_", "", regex=False).astype(int)
)
df_pop_long_q6["population"] = pd.to_numeric(
    df_pop_long_q6["population"], errors="coerce"
)
df_pop_long_q6 = df_pop_long_q6[df_pop_long_q6["year"].between(2020, 2024)].copy()
df_pop_long_q6 = df_pop_long_q6.rename(columns={"state": "state_name"})
df_pop_long_q6["state_name"] = df_pop_long_q6["state_name"].astype(str).str.strip()

df_abbr = df_abbr_raw.copy()
name_candidates = [
    c for c in df_abbr.columns if "name" in c.lower() or (c.lower() == "state")
]
abbr_candidates = [
    c for c in df_abbr.columns if "abbr" in c.lower() or "code" in c.lower()
]
name_col = name_candidates[0]
abbr_col = abbr_candidates[0]
df_abbr = df_abbr.rename(columns={name_col: "state_name", abbr_col: "state"})
df_abbr["state_name"] = df_abbr["state_name"].astype(str).str.strip()
df_abbr["state"] = df_abbr["state"].astype(str).str.strip()
df_abbr["state_name_key"] = df_abbr["state_name"].str.lower()
df_pop_long_q6["state_name_key"] = df_pop_long_q6["state_name"].str.lower()
df_pop_long_q6 = df_pop_long_q6.merge(
    df_abbr[["state_name_key", "state"]], on="state_name_key", how="left"
)
df_pop_long_q6 = df_pop_long_q6.dropna(subset=["state", "population"])
df_pop_long_q6 = df_pop_long_q6[["state", "year", "population"]].copy()

df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)
q6_grants = (
    df_grants.dropna(subset=["state", "year", "award_amount"])
    .groupby(["state", "year"])
    .agg(total_amount=("award_amount", "sum"), grants_count=("award_id", "count"))
    .reset_index()
)
q6_df = q6_grants.merge(df_pop_long_q6, on=["state", "year"], how="inner")
q6_df["funding_per_capita"] = q6_df["total_amount"] / q6_df["population"]

min_year_q6 = int(q6_df["year"].min())
max_year_q6 = int(q6_df["year"].max())
slider_q6 = alt.binding_range(min=min_year_q6, max=max_year_q6, step=1, name="Year: ")
year_select_q6 = alt.selection_point(
    name="year_select_q6",
    fields=["year"],
    bind=slider_q6,
    value=[{"year": max_year_q6}],
)
state_select_q6 = alt.selection_point(
    name="state_select_q6", fields=["state"], empty="all", on="click", clear="dblclick"
)

bars_q6 = (
    alt.Chart(q6_df)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title="State"),
        y=alt.Y("funding_per_capita:Q", title="$/person", axis=alt.Axis(format=",.2f")),
        color=alt.Color(
            "funding_per_capita:Q", scale=alt.Scale(scheme="purples"), legend=None
        ),
        stroke=alt.condition(state_select_q6, alt.value("black"), alt.value(None)),
        strokeWidth=alt.condition(state_select_q6, alt.value(1.5), alt.value(0)),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("population:Q", format=",.0f"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
            alt.Tooltip("funding_per_capita:Q", format="$,.2f"),
            alt.Tooltip("grants_count:Q"),
        ],
    )
    .transform_filter(year_select_q6)
    .add_params(year_select_q6, state_select_q6)
    .properties(width=350, height=260, title="Q6: Funding per Capita")
)

history_q6 = (
    alt.Chart(q6_df)
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X("year:O", title="Year"),
        y=alt.Y("funding_per_capita:Q", title="$/person", axis=alt.Axis(format=",.2f")),
        color=alt.value("#6a3d9a"),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("year:O"),
            alt.Tooltip("funding_per_capita:Q", format="$,.2f"),
            alt.Tooltip("total_amount:Q", format="$,.0f"),
            alt.Tooltip("population:Q", format=",.0f"),
            alt.Tooltip("grants_count:Q"),
        ],
    )
    .transform_filter(state_select_q6)
    .properties(width=210, height=130, title="History")
)

kpi_base_q6 = (
    alt.Chart(q6_df).transform_filter(year_select_q6).transform_filter(state_select_q6)
)


def kpi_q6(label_text, expr, fmt, y):
    lbl = kpi_base_q6.mark_text(
        align="center", color="#888", fontSize=10, dy=-8
    ).encode(text=alt.value(label_text), x=alt.value(105), y=alt.value(y))
    val = kpi_base_q6.mark_text(
        align="center", color="#333", fontSize=16, fontWeight="bold", dy=10
    ).encode(text=alt.Text(expr, format=fmt), x=alt.value(105), y=alt.value(y))
    return lbl + val


kpi_panel_q6 = (
    alt.Chart(pd.DataFrame({"x": [0]}))
    .mark_rect(opacity=0)
    .encode()
    .properties(width=210, height=130)
    + kpi_q6("$/capita", "mean(funding_per_capita):Q", "$,.2f", y=35)
    + kpi_q6("Total Funding", "sum(total_amount):Q", "$,.0f", y=75)
    + kpi_q6("Population", "mean(population):Q", ",.0f", y=115)
)

right_col_q6 = history_q6 & kpi_panel_q6
final_q6 = (bars_q6 | right_col_q6).resolve_scale(color="independent")

# ============================================================================
# FINAL ASSEMBLY: ALL 6 CHARTS IN ONE DASHBOARD
# ============================================================================
row1 = alt.hconcat(final_q1, final_q2, spacing=20)
row2 = alt.hconcat(final_q3, final_q4, spacing=20)
row3 = alt.hconcat(final_q5, final_q6, spacing=20)


"""
final_dashboard = (
    alt.vconcat(row1, row2, row3, spacing=30)
    .configure_view(stroke=None)
    .configure_concat(spacing=20)
    .properties(
        title=alt.TitleParams(
            text="NSF Grants Analysis Dashboard",
            fontSize=24,
            fontWeight="bold",
            anchor="middle",
        )
    )
)
final_dashboard
"""

dashboard = (
    alt.vconcat(row1, row2, row3)
    .resolve_scale(color="independent")
    .configure_view(stroke=None)
    .configure_concat(spacing=60)
)

dashboard

In [142]:
import altair as alt
import pandas as pd
import itertools

# 0. GLOBAL SETUP
alt.data_transformers.enable("default")

# --- LOAD DATA ---
# (Assuming files are in the local directory)
df_grants = pd.read_csv("NSF_Grants_Last5Years_Clean.csv")
df_trump = pd.read_csv("trump17-21-csv.csv")
df_pop_raw = pd.read_csv("estimated_population.csv")
df_abbr_raw = pd.read_csv("state_abbreviations.csv")

# Clean
df_grants.columns = df_grants.columns.str.strip()
df_trump.columns = df_trump.columns.str.strip()
df_grants["year"] = pd.to_numeric(df_grants["year"], errors="coerce").astype(int)

# ==============================================================================
# Q1 (Top Left): GRANTS BY STATE
# ==============================================================================
q1_yearly = (
    df_grants.groupby(["state", "year"])
    .agg(cnt=("award_id", "count"), amt=("award_amount", "sum"))
    .reset_index()
)
q1_total = (
    df_grants.groupby(["state"])
    .agg(cnt=("award_id", "count"), amt=("award_amount", "sum"))
    .reset_index()
)
q1_total["year"] = 0
q1_full = pd.concat([q1_yearly, q1_total], ignore_index=True)

q1_yr_sel = alt.selection_point(
    fields=["year"],
    bind=alt.binding_select(
        options=[0] + sorted(q1_yearly["year"].unique()), name="Q1 Year:"
    ),
    value=[{"year": 0}],
    name="q1_yr",
)
q1_st_sel = alt.selection_point(fields=["state"], empty="all", name="q1_st")

q1_main = (
    alt.Chart(q1_full)
    .mark_bar()
    .encode(
        x=alt.X("state:N", sort="-y", title=None),
        y=alt.Y("cnt:Q", title="Grants"),
        color=alt.condition(
            q1_st_sel,
            alt.Color("cnt:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#eee"),
        ),
        tooltip=["state", "cnt"],
    )
    .add_params(q1_yr_sel, q1_st_sel)
    .transform_filter(q1_yr_sel)
    .properties(width=280, height=220, title="Q1: Grants by State")
)

q1_sub = (
    alt.Chart(q1_yearly)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title=None),
        y=alt.Y("amt:Q", axis=alt.Axis(format="~s"), title=None),
        color=alt.value("#4c78a8"),
        tooltip=["year", "amt"],
    )
    .transform_filter(q1_st_sel)
    .properties(width=120, height=80, title="History")
)

q1_kpi = (
    alt.Chart(q1_full)
    .transform_filter(q1_yr_sel)
    .transform_filter(q1_st_sel)
    .mark_text(color="#444", fontWeight="bold")
    .encode(text=alt.Text("sum(amt):Q", format="$.2s"))
    .properties(width=120, height=30)
)

final_q1 = q1_main | (q1_sub & q1_kpi)

# ==============================================================================
# Q2 (Top Right): GRANTS BY DIRECTORATE
# ==============================================================================
q2_yearly = (
    df_grants.groupby(["directorate", "year"])
    .agg(cnt=("award_id", "count"), amt=("award_amount", "sum"))
    .reset_index()
)
q2_total = (
    df_grants.groupby(["directorate"])
    .agg(cnt=("award_id", "count"), amt=("award_amount", "sum"))
    .reset_index()
)
q2_total["year"] = 0
q2_full = pd.concat([q2_yearly, q2_total], ignore_index=True)

q2_yr_sel = alt.selection_point(
    fields=["year"],
    bind=alt.binding_select(
        options=[0] + sorted(q2_yearly["year"].unique()), name="Q2 Year:"
    ),
    value=[{"year": 0}],
    name="q2_yr",
)
q2_dir_sel = alt.selection_point(fields=["directorate"], empty="all", name="q2_dir")

q2_main = (
    alt.Chart(q2_full)
    .mark_bar()
    .encode(
        x=alt.X("cnt:Q", title=None),
        y=alt.Y("directorate:N", sort="-x", title=None),
        # ✅ FIX: Used correct variable q2_dir_sel
        color=alt.condition(
            q2_dir_sel,
            alt.Color("cnt:Q", scale=alt.Scale(scheme="blues"), legend=None),
            alt.value("#eee"),
        ),
        tooltip=["directorate", "cnt"],
    )
    .add_params(q2_dir_sel, q2_yr_sel)
    .transform_filter(q2_yr_sel)
    .properties(width=200, height=220, title="Q2: By Directorate")
)

q2_sub = (
    alt.Chart(q2_yearly)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:O", title=None),
        y=alt.Y("amt:Q", axis=alt.Axis(format="~s"), title=None),
        color=alt.value("#4c78a8"),
    )
    .transform_filter(q2_dir_sel)
    .properties(width=150, height=100, title="History")
)

final_q2 = q2_main | q2_sub

# ==============================================================================
# Q3 (Mid Left): CANCELLATIONS
# ==============================================================================
q3_base = (
    df_grants.groupby(["directorate"]).agg(base=("award_id", "count")).reset_index()
)
q3_cancel = (
    df_trump.groupby(["directorate"])
    .agg(cancel=("award_id", "count"), lost=("award_amount", "sum"))
    .reset_index()
)
q3_df = q3_base.merge(q3_cancel, on="directorate", how="outer").fillna(0)
q3_df["rate"] = q3_df["cancel"] / q3_df["base"]

q3_sel = alt.selection_point(fields=["directorate"], empty="all", name="q3_sel")

q3_bars = (
    alt.Chart(q3_df)
    .mark_bar()
    .encode(
        x=alt.X("cancel:Q", title="Cancelled"),
        y=alt.Y("directorate:N", sort="-x", title=None),
        color=alt.condition(
            q3_sel,
            alt.Color("cancel:Q", scale=alt.Scale(scheme="reds"), legend=None),
            alt.value("#eee"),
        ),
        tooltip=["directorate", "cancel", "lost"],
    )
    .add_params(q3_sel)
    .properties(width=160, height=220, title="Q3: Cancellations")
)

q3_scat = (
    alt.Chart(q3_df)
    .mark_circle()
    .encode(
        x=alt.X("base:Q", title="Size", axis=alt.Axis(format="~s")),
        y=alt.Y("cancel:Q", title="Hits"),
        size=alt.Size("lost:Q", legend=None),
        color=alt.condition(q3_sel, alt.value("red"), alt.value("#eee")),
        tooltip=["directorate", "base", "cancel"],
    )
    .transform_filter(q3_sel)
    .properties(width=200, height=200, title="Scale vs Hits")
    .interactive()
)

final_q3 = q3_bars | q3_scat

# ==============================================================================
# Q4 (Mid Right): FUNDING EVOLUTION
# ==============================================================================
q4_df = df_grants.groupby(["year"]).agg(amt=("award_amount", "sum")).reset_index()

final_q4 = (
    alt.Chart(q4_df)
    .mark_area(
        line={"color": "#4c78a8"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="#4c78a8", offset=0),
                alt.GradientStop(color="white", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
        opacity=0.6,
    )
    .encode(
        x=alt.X("year:O", title=None),
        y=alt.Y("amt:Q", title="Total ($)", axis=alt.Axis(format="~s")),
        tooltip=["year", alt.Tooltip("amt", format="$,.2s")],
    )
    .properties(width=380, height=200, title="Q4: Funding Trend")
)

# ==============================================================================
# Q5 (Bot Left): TRUMP IMPACT
# ==============================================================================
all_states = sorted(df_grants["state"].unique())
years = [2017, 2018, 2019, 2020, 2021]
q5_data = pd.DataFrame(
    list(itertools.product(all_states, years)), columns=["state", "year"]
)
q5_data = q5_data.merge(
    df_grants.groupby(["state", "year"])["award_amount"]
    .sum()
    .rename("fund")
    .reset_index(),
    on=["state", "year"],
    how="left",
)
q5_data = q5_data.merge(
    df_trump.groupby(["state", "year"])["award_amount"]
    .sum()
    .rename("lost")
    .reset_index(),
    on=["state", "year"],
    how="left",
).fillna(0)

q5_sel = alt.selection_point(
    fields=["state"],
    bind=alt.binding_select(options=all_states, name="Q5 State:"),
    value=[{"state": "CA"}],
    name="q5_sel",
)

q5_base = (
    alt.Chart(q5_data).transform_filter(q5_sel).encode(x=alt.X("year:O", title=None))
)
q5_top = (
    q5_base.mark_line(color="#08519c")
    .encode(y=alt.Y("fund:Q", axis=alt.Axis(format="~s"), title="Funded"))
    .properties(width=250, height=100)
)
q5_bot = (
    q5_base.mark_bar(color="#de2d26")
    .encode(y=alt.Y("lost:Q", axis=alt.Axis(format="~s"), title="Lost"))
    .properties(width=250, height=80)
)

final_q5 = (
    alt.vconcat(q5_top, q5_bot, spacing=5)
    .add_params(q5_sel)
    .properties(title="Q5: Trump Impact")
)

# ==============================================================================
# Q6 (Bot Right): POPULATION
# ==============================================================================
# Simplified robust loading for compactness
q6_pop = df_pop_raw.melt(
    id_vars=["state"],
    value_vars=[c for c in df_pop_raw.columns if "pop_" in c],
    var_name="year",
    value_name="pop",
)
q6_pop["year"] = q6_pop["year"].str.replace("pop_", "").astype(int)
q6_pop["pop"] = pd.to_numeric(q6_pop["pop"], errors="coerce")
q6_pop = q6_pop[q6_pop["year"] == 2023][
    ["state", "pop"]
]  # Just use latest year for simplicity in summary

# Join via Abbr
df_abbr = df_abbr_raw.rename(
    columns={df_abbr_raw.columns[0]: "state_name", df_abbr_raw.columns[1]: "state"}
)
q6_pop = q6_pop.merge(
    df_abbr, left_on="state", right_on="state_name", suffixes=("", "_y")
)
if "state_y" in q6_pop.columns:
    q6_pop = q6_pop.rename(columns={"state_y": "abbr"}).drop(columns=["state"])
else:
    q6_pop["abbr"] = q6_pop["state"]  # Fallback

q6_funds = df_grants.groupby("state")["award_amount"].sum().reset_index()
q6_df = q6_funds.merge(q6_pop, left_on="state", right_on="abbr", how="inner")
q6_df["per_cap"] = q6_df["award_amount"] / q6_df["pop"]

q6_st_sel = alt.selection_point(fields=["state"], empty="all", name="q6_sel")

final_q6 = (
    alt.Chart(q6_df)
    .mark_circle(size=100)
    .encode(
        x=alt.X("pop:Q", axis=alt.Axis(format="~s"), title="Pop"),
        y=alt.Y("per_cap:Q", axis=alt.Axis(format="$~s"), title="$/Cap"),
        color=alt.condition(
            q6_st_sel,
            alt.Color("per_cap:Q", scale=alt.Scale(scheme="purples"), legend=None),
            alt.value("#eee"),
        ),
        tooltip=["state", "pop", "per_cap"],
    )
    .add_params(q6_st_sel)
    .properties(width=280, height=200, title="Q6: Efficiency")
    .interactive()
)

# ==============================================================================
# ASSEMBLE 2x3 GRID
# ==============================================================================
row1 = alt.hconcat(final_q1, final_q2).resolve_scale(color="independent")
row2 = alt.hconcat(final_q3, final_q4).resolve_scale(color="independent")
row3 = alt.hconcat(final_q5, final_q6).resolve_scale(color="independent")

dashboard = (
    alt.vconcat(row1, row2, row3)
    .configure_view(stroke=None)
    .configure_concat(spacing=30)
)
dashboard