In [None]:
#pip install vega_datasets


Collecting vega_datasets
  Using cached vega_datasets-0.9.0-py3-none-any.whl.metadata (5.5 kB)
Using cached vega_datasets-0.9.0-py3-none-any.whl (210 kB)
Installing collected packages: vega_datasets
Successfully installed vega_datasets-0.9.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import altair as alt
from vega_datasets import data as vega_data

# Load your data
df = pd.read_csv("shopping_behavior_updated.csv")

# Map full state names to FIPS codes used in the us_10m topojson
state_fips = {
    "Alabama": 1,
    "Alaska": 2,
    "Arizona": 4,
    "Arkansas": 5,
    "California": 6,
    "Colorado": 8,
    "Connecticut": 9,
    "Delaware": 10,
    "Florida": 12,
    "Georgia": 13,
    "Hawaii": 15,
    "Idaho": 16,
    "Illinois": 17,
    "Indiana": 18,
    "Iowa": 19,
    "Kansas": 20,
    "Kentucky": 21,
    "Louisiana": 22,
    "Maine": 23,
    "Maryland": 24,
    "Massachusetts": 25,
    "Michigan": 26,
    "Minnesota": 27,
    "Mississippi": 28,
    "Missouri": 29,
    "Montana": 30,
    "Nebraska": 31,
    "Nevada": 32,
    "New Hampshire": 33,
    "New Jersey": 34,
    "New Mexico": 35,
    "New York": 36,
    "North Carolina": 37,
    "North Dakota": 38,
    "Ohio": 39,
    "Oklahoma": 40,
    "Oregon": 41,
    "Pennsylvania": 42,
    "Rhode Island": 44,
    "South Carolina": 45,
    "South Dakota": 46,
    "Tennessee": 47,
    "Texas": 48,
    "Utah": 49,
    "Vermont": 50,
    "Virginia": 51,
    "Washington": 53,
    "West Virginia": 54,
    "Wisconsin": 55,
    "Wyoming": 56,
}

# Aggregate to state level
state_stats = (
    df.groupby("Location")
    .agg(
        avg_purchase=("Purchase Amount (USD)", "mean"),
        total_customers=("Customer ID", "nunique"),
        transaction_count=("Customer ID", "size"),
    )
    .reset_index()
)

# Add FIPS state id for joining to the topojson
state_stats["state_id"] = state_stats["Location"].map(state_fips)

# Drop any rows that did not map correctly
state_stats = state_stats.dropna(subset=["state_id"])

# Topojson for US states
states = alt.topo_feature(vega_data.us_10m.url, "states")

# Hover selection
hover = alt.selection_single(
    fields=["id"],
    on="mouseover",
    empty="none",
    clear="mouseout",
)

# Build the choropleth
choropleth = (
    alt.Chart(states)
    .mark_geoshape()
    .encode(
        color=alt.Color(
            "avg_purchase:Q",
            title="Average purchase (USD)",
            scale=alt.Scale(scheme="blues"),
        ),
        tooltip=[
            alt.Tooltip("Location:N", title="State"),
            alt.Tooltip("avg_purchase:Q", title="Average purchase", format=".2f"),
            alt.Tooltip("total_customers:Q", title="Total customers"),
            alt.Tooltip("transaction_count:Q", title="Transaction count"),
        ],
        stroke=alt.condition(hover, alt.value("black"), alt.value("white")),
        strokeWidth=alt.condition(hover, alt.value(2), alt.value(0.5)),
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(
            state_stats,
            key="state_id",
            fields=["Location", "avg_purchase", "total_customers", "transaction_count"],
        ),
    )
    .add_selection(hover)
    .project(type="albersUsa")
    .properties(
        width=700,
        height=450,
        title="Average Purchase Amount by State",
    )
)

choropleth


Deprecated since `altair=5.0.0`. Use selection_point instead.
  hover = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(hover)
