# Analyse compl√®te : S√©ismes & Prix Immobiliers aux √âtats-Unis  
Ce notebook regroupe :
- Deux cartes US (s√©ismes & prix immobiliers)
- Un scatter avec r√©gression
- Une analyse High/Low n_earthquake group√©e
- Une visualisation centr√©e (taille de points)

In [1]:
import pandas as pd
import altair as alt
import numpy as np
import us
from vega_datasets import data
import os

# Load dataset
df_state_aggreg = pd.read_csv("data/agg_state_year.csv")
df_county_aggreg = pd.read_csv("data/agg_county_year.csv")

print(df_state_aggreg, df_county_aggreg)

              state  year  n_earthquakes  avg_magnitude  max_magnitude  \
0         Louisiana  2006            1.0       5.300000            5.3   
1           Georgia  1991           76.0       4.023684            7.0   
2     Massachusetts  1992            1.0       4.800000            4.8   
3           Indiana  2002            1.0       4.600000            4.6   
4            Alaska  1996         5138.0       2.369093            7.9   
...             ...   ...            ...            ...            ...   
2407      Wisconsin  2019            NaN            NaN            NaN   
2408      Wisconsin  2020            NaN            NaN            NaN   
2409      Wisconsin  2021            NaN            NaN            NaN   
2410      Wisconsin  2022            NaN            NaN            NaN   
2411      Wisconsin  2023            NaN            NaN            NaN   

      avg_depth  intensity_score  n_properties     avg_price  median_price  \
0      5.000000         5.300000 

## G√©n√©ration des codes FIPS pour les cartes US

In [2]:
# ---------------------------
# FIPS CODES (STATE LEVEL)
# ---------------------------

def get_fips(state_name):
    """Return 2-digit FIPS code or None if invalid."""
    st = us.states.lookup(state_name)
    return st.fips if st else None

df_state_aggreg["fips"] = df_state_aggreg["state"].apply(get_fips)

# Convert FIPS to zero-padded string (Altair requires strings)
df_state_aggreg["fips"] = df_state_aggreg["fips"].astype(str).str.zfill(2)



# ---------------------------
# CLEAN MISSING VALUES
# ---------------------------

df_state_aggreg["n_earthquakes"] = df_state_aggreg["n_earthquakes"].fillna(0)

# If your county dataframe is named df_county_aggreg:
df_county_aggreg["n_earthquakes"] = df_county_aggreg["n_earthquakes"].fillna(0)



# ---------------------------
# LOAD US STATES TOPOJSON
# ---------------------------

us_states = alt.topo_feature(data.us_10m.url, "states")

df_state_aggreg.head()

Unnamed: 0,state,year,n_earthquakes,avg_magnitude,max_magnitude,avg_depth,intensity_score,n_properties,avg_price,median_price,avg_bedrooms,avg_bathrooms,price_per_bedroom,price_per_bathroom,eq_per_100_properties,fips
0,Louisiana,2006,1.0,5.3,5.3,5.0,5.3,539.0,290772.7,235000.0,3.534323,2.476809,82271.128084,117398.126592,0.185529,22
1,Georgia,1991,76.0,4.023684,7.0,11.725,5.214211,189.0,573270.3,450000.0,3.592593,2.798942,159570.094256,204816.812854,40.21164,13
2,Massachusetts,1992,1.0,4.8,4.8,10.0,4.8,481.0,1387046.0,724900.0,3.893971,2.993763,356203.604912,463312.05,0.2079,25
3,Indiana,2002,1.0,4.6,4.6,16.05,4.6,100.0,320872.0,257450.0,3.18,2.35,100903.144654,136541.276596,1.0,18
4,Alaska,1996,5138.0,2.369093,7.9,39.403387,4.581456,,,,,,,,,2


## S√©lection de l‚Äôann√©e

In [3]:
year = 2015
df_state_aggreg_year = df_state_aggreg[df_state_aggreg["year"] == year]
df_county_aggreg_year = df_county_aggreg[df_county_aggreg["year"] == year]

print(df_state_aggreg_year.head(), df_county_aggreg_year.head())

          state  year  n_earthquakes  avg_magnitude  max_magnitude  avg_depth  \
88       Alaska  2015        32967.0       1.568790            6.8  40.964495   
125    Oklahoma  2015         2997.0       2.787000            4.7   5.332159   
130    Michigan  2015            4.0       3.075000            4.2   2.370000   
154    Nebraska  2015            3.0       3.366667            3.6   8.250000   
189  New Mexico  2015           13.0       2.992308            3.9   4.819231   

     intensity_score  n_properties      avg_price  median_price  avg_bedrooms  \
88          3.661274          54.0  406611.111111      339000.0      3.259259   
125         3.552200        1054.0  320944.660342      220000.0      3.399431   
130         3.525000        2434.0  299116.164339      225000.0      3.371816   
154         3.460000        1337.0  287675.089005      250000.0      3.362004   
189         3.355385         149.0  460353.671141      329000.0      3.308725   

     avg_bathrooms  price_

# Carte US : Intensit√© des S√©ismes et Prix M√©dian des Maisons

In [4]:
heatmap_eq = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color(
        "n_earthquakes:Q",
        scale=alt.Scale(range=["#ffe6e6", "#800000"]),
        title="Number of earthquake"
    ),
    tooltip=["State:N", "n_earthquakes:Q"]
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df_state_aggreg_year, "fips", ["n_earthquakes", "State"])
).project("albersUsa").properties(
    title=f"USA earthquake map ({year})", width=400, height=300
)

heatmap_price = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color(
        "avg_price:Q",
        scale=alt.Scale(range=["#e6f2ff", "#0055aa"]),
        title="Average real estate price"
    ),
    tooltip=["state:N", "avg_price:Q"]
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df_state_aggreg_year, "fips", ["avg_price", "State"])
).project("albersUsa").properties(
    title=f"USA Average real estate price ({year})", width=400, height=300
)

(heatmap_eq | heatmap_price).resolve_scale(color="independent")

# Corr√©lation S√©ismes ‚Üî Prix

## üîé Analyse de la Relation entre l‚ÄôActivit√© Sismique et les Prix Immobiliers  
### Une visualisation combinant √©chelle logarithmique, intensit√© sismique et tendance g√©n√©rale

Cette visualisation explore comment le nombre de s√©ismes dans un √âtat am√©ricain est associ√© au prix m√©dian de l‚Äôimmobilier.  
Elle combine plusieurs √©l√©ments compl√©mentaires :

- **Scatter plot** : chaque point repr√©sente un √âtat pour l‚Äôann√©e s√©lectionn√©e.  
- **√âchelle logarithmique sur l‚Äôaxe des X** : permet de visualiser correctement des niveaux de sismicit√© tr√®s diff√©rents (de 1 √† plusieurs centaines).  
- **Couleur continue (Viridis)** : encode l‚Äôintensit√© des s√©ismes, renfor√ßant la lecture des variations.  
- **Regression Lin√©aire** : une tendance lin√©aire qui met en √©vidence la relation g√©n√©rale.  

L‚Äôobjectif final est d‚Äôoffrir une lecture claire, √©quilibr√©e et robuste de la relation potentielle entre l‚Äôactivit√© sismique et les prix domiciliaires, malgr√© la grande variabilit√© des √âtats.

In [5]:
# Filter out invalid values
df_corr = df_state_aggreg_year[
    (df_state_aggreg_year["n_earthquakes"] > 0) &
    (df_state_aggreg_year["avg_price"] > 0)
].copy()

# Ensure numeric
df_corr["n_earthquakes"] = pd.to_numeric(df_corr["n_earthquakes"], errors="coerce")
df_corr["avg_price"] = pd.to_numeric(df_corr["avg_price"], errors="coerce")

# Color scale
color_scale = alt.Scale(type="log", scheme="viridis")

scatter = (
    alt.Chart(df_corr)
    .mark_circle(size=90, opacity=0.75)
    .encode(
        x=alt.X("n_earthquakes:Q", title="Earthquakes (log scale)", scale=alt.Scale(type="log")),
        y=alt.Y("avg_price:Q", title="Average House Price ($)", scale=alt.Scale(zero=False)),
        color=alt.Color("n_earthquakes:Q", title="Earthquake Count", scale=color_scale,
                        legend=alt.Legend(labelExpr="datum.label")),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("n_earthquakes:Q", title="Earthquakes"),
            alt.Tooltip("avg_price:Q", title="Avg Price ($)")
        ]
    )
)

smooth = (
    alt.Chart(df_corr)
    .transform_regression("n_earthquakes", "avg_price", method="linear")
    .mark_line(color="black", size=3)
    .encode(
        x=alt.X("n_earthquakes:Q", scale=alt.Scale(type="log")),
        y="avg_price:Q"
    )
)

correlation = (scatter + smooth).properties(
    width=900,
    height=550,
    title="Earthquake Frequency vs House Prices (Log Scale)"
)

correlation

In [14]:
import altair as alt
import pandas as pd
import numpy as np

# --------------------------
# 1. FILTER AND CLEAN DATA
# --------------------------

df_corr = df_state_aggreg_year[
    (df_state_aggreg_year["n_earthquakes"] > 0) &
    (df_state_aggreg_year["avg_price"] > 0) &
    (df_state_aggreg_year["avg_magnitude"].notna())
].copy()

# Ensure numeric
df_corr["n_earthquakes"] = pd.to_numeric(df_corr["n_earthquakes"], errors="coerce")
df_corr["avg_price"] = pd.to_numeric(df_corr["avg_price"], errors="coerce")
df_corr["avg_magnitude"] = pd.to_numeric(df_corr["avg_magnitude"], errors="coerce")

# --------------------------
# 2. CREATE DYNAMIC EXCLUSIVE BINS
# --------------------------

# Determine min and max for avg_magnitude
min_mag = df_corr["avg_magnitude"].min()
max_mag = df_corr["avg_magnitude"].max()

# Create 5 equal-width bins between min and max
bins = np.linspace(min_mag, max_mag, num=6)  # 6 edges ‚Üí 5 bins
labels = [f"{round(bins[i],1)}-{round(bins[i+1],1)}" for i in range(len(bins)-1)]

# right=False ensures exclusive upper boundary
df_corr["magnitude_bin"] = pd.cut(df_corr["avg_magnitude"], bins=bins, labels=labels, right=False)

# --------------------------
# 3. DEFINE DARK COLOR SCALE
# --------------------------

color_scale = alt.Scale(
    domain=labels,
    range=["#7f3c8d", "#11a579", "#3969ac", "#f2b701", "#e73f74"]  # dark, distinct
)

# --------------------------
# 4. SCATTER PLOT
# --------------------------

scatter = (
    alt.Chart(df_corr)
    .mark_circle(size=90, opacity=0.75)
    .encode(
        x=alt.X("n_earthquakes:Q", title="Earthquakes (log scale)", scale=alt.Scale(type="log")),
        y=alt.Y("avg_price:Q", title="Average House Price ($)", scale=alt.Scale(zero=False)),
        color=alt.Color("magnitude_bin:N", title="Avg Magnitude Bin", scale=color_scale),
        tooltip=[
            alt.Tooltip("state:N"),
            alt.Tooltip("n_earthquakes:Q", title="Earthquakes"),
            alt.Tooltip("avg_price:Q", title="Avg Price ($)"),
            alt.Tooltip("avg_magnitude:Q", title="Avg Magnitude"),
            alt.Tooltip("magnitude_bin:N", title="Magnitude Bin")
        ]
    )
)

# --------------------------
# 5. REGRESSION TRENDLINE
# --------------------------

smooth = (
    alt.Chart(df_corr)
    .transform_regression("n_earthquakes", "avg_price", method="linear")
    .mark_line(color="black", size=3)
    .encode(
        x=alt.X("n_earthquakes:Q", scale=alt.Scale(type="log")),
        y="avg_price:Q"
    )
)

# --------------------------
# 6. COMBINE PLOT
# --------------------------

correlation = (scatter + smooth).properties(
    width=900,
    height=550,
    title="Earthquake Frequency vs House Prices (Log Scale, Dynamic Magnitude Bins)"
)

correlation


# Classification des √âtats : High vs Low n_earthquake
Seuls les √âtats dans les quantiles 20% et 80% sont conserv√©s.

In [7]:
# Sum of earthquakes per state
state_totals = df_state_aggreg_year.groupby('state')['n_earthquakes'].sum().sort_values()

threshold_high = state_totals.quantile(0.8)
threshold_low = state_totals.quantile(0.2)

def classify_state(state):
    total = state_totals[state]
    if total >= threshold_high:
        return 'High n_earthquake'
    elif total <= threshold_low:
        return 'Low n_earthquake'
    else:
        return 'Medium'

# Apply classification
df_state_aggreg_year['quake_group'] = df_state_aggreg_year['state'].apply(classify_state)

# Select only high and low groups
df_selected = df_state_aggreg_year[df_state_aggreg_year['quake_group'].isin(['High n_earthquake', 'Low n_earthquake'])]

df_selected.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_state_aggreg_year['quake_group'] = df_state_aggreg_year['state'].apply(classify_state)


Unnamed: 0,state,year,n_earthquakes,avg_magnitude,max_magnitude,avg_depth,intensity_score,n_properties,avg_price,median_price,avg_bedrooms,avg_bathrooms,price_per_bedroom,price_per_bathroom,eq_per_100_properties,fips,quake_group
88,Alaska,2015,32967.0,1.56879,6.8,40.964495,3.661274,54.0,406611.1,339000.0,3.259259,2.666667,124755.681818,152479.166667,61050.0,2,High n_earthquake
125,Oklahoma,2015,2997.0,2.787,4.7,5.332159,3.5522,1054.0,320944.7,220000.0,3.399431,2.42315,94411.295562,132449.362569,284.345351,40,High n_earthquake
458,Hawaii,2015,4944.0,1.426958,5.2,6.815867,2.936175,143.0,1536643.0,875000.0,2.993007,2.461538,513411.186916,624261.329545,3457.342657,15,High n_earthquake
508,California,2015,46149.0,0.965967,5.72,5.767649,2.86758,7174.0,1150948.0,699900.0,3.400753,2.678561,338439.224126,429688.892121,643.281294,6,High n_earthquake
549,Kansas,2015,1835.0,1.966458,4.1,5.054834,2.819875,499.0,298272.2,284900.0,3.577154,2.703407,83382.537815,110331.971831,367.735471,20,High n_earthquake


# Evolution des prix selon High / Low n_earthquake

In [8]:
import altair as alt
import pandas as pd

# --- Filter data after 1990 ---
df_state_aggreg_filtered = df_state_aggreg[df_state_aggreg['year'] > 1990].copy()

# Y-axis domain
buffer = 1_000_000
ymin = df_state_aggreg_filtered['avg_price'].min()
ymax = df_state_aggreg_filtered['avg_price'].max() - buffer

# --- Clip scatter points within domain ---
df_state_aggreg_filtered = df_state_aggreg_filtered[
    (df_state_aggreg_filtered['avg_price'] >= ymin) &
    (df_state_aggreg_filtered['avg_price'] <= ymax)
].copy()

# --- Separate Alaska & California ---
df_alaska = df_state_aggreg_filtered[df_state_aggreg_filtered['state'] == 'Alaska']
df_california = df_state_aggreg_filtered[df_state_aggreg_filtered['state'] == 'California']

# --- Scatter with variable size ---
scatter = (
    alt.Chart(df_state_aggreg_filtered)
    .mark_circle(opacity=0.7)
    .encode(
        x=alt.X('year:O', title='Year'),
        y=alt.Y('avg_price:Q', title='Average House Price ($)',
                scale=alt.Scale(domain=[ymin, ymax])),
        size=alt.Size('n_earthquakes:Q', title='Number of Earthquakes',
                      scale=alt.Scale(range=[30, 400])),
        color=alt.value("#999999"),  # scatter = neutral grey
        tooltip=['state:N', 'year:O', 'n_earthquakes:Q', 'avg_price:Q']
    )
)

# --- Lines with legend ---
line_data = pd.concat([df_alaska, df_california])
line_data['line_label'] = line_data['state']  # used for the legend

line = (
    alt.Chart(line_data)
    .mark_line(size=3)
    .encode(
        x='year:O',
        y='avg_price:Q',
        color=alt.Color(
            'line_label:N',
            title='States',
            scale=alt.Scale(
                domain=['Alaska', 'California'],
                range=['#1f77b4', '#d62728']  # blue, red
            )
        ),
        tooltip=['state:N', 'year:O', 'avg_price:Q']
    )
)

# --- Combine ---
chart = (
    (scatter + line)
    .properties(
        width=900,
        height=500,
        title='House Prices Over Time ‚Äì Alaska vs California (After 1990)'
    )
    .configure_axis(
        labelFontSize=12,
        titleFontSize=14,
        gridOpacity=0.2
    )
    .configure_legend(
        titleFontSize=14,
        labelFontSize=13,
        symbolSize=150
    )
    .configure_title(
        fontSize=18,
        anchor='start'
    )
)

chart

In [9]:
import altair as alt
import numpy as np

# --------------------------
# 1. CLEAN DATA
# --------------------------

df_corr = df_state_aggreg.copy()

# Remove invalid values
df_corr = df_corr[
    (df_corr["n_earthquakes"] > 0) &
    (df_corr["avg_price"] > 0)
].copy()

# Add log value for regression
df_corr["log_eq"] = np.log(df_corr["n_earthquakes"])

# --------------------------
# 2. SCATTER (log scale)
# --------------------------

color_scale = alt.Scale(scheme="viridis")

scatter_all = (
    alt.Chart(df_corr)
    .mark_circle(size=90, opacity=0.6)
    .encode(
        x=alt.X(
            "n_earthquakes:Q",
            title="Earthquakes (log scale)",
            scale=alt.Scale(type="log"),
            axis=alt.Axis(format="~s")
        ),
        y=alt.Y(
            "avg_price:Q",
            title="Median House Price ($)",
            scale=alt.Scale(zero=False, padding=10)
        ),
        color=alt.Color(
            "log_eq:Q",
            title="Earthquakes (log)",
            scale=color_scale
        ),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("n_earthquakes:Q", title="Earthquakes"),
            alt.Tooltip("avg_price:Q", title="House Price ($)")
        ]
    )
)

# --------------------------
# 3. TRENDLINE with log transform
# --------------------------

smooth_all = (
    alt.Chart(df_corr)
    .transform_regression(
        "log_eq", "avg_price"
    )
    .mark_line(
        color="black",
        size=3
    )
    .encode(
        x=alt.X(
            "n_earthquakes:Q",
            scale=alt.Scale(type="log")
        ),
        y="avg_price:Q"
    )
)

# --------------------------
# 4. COMBINE BOTH
# --------------------------

correlation_all = (
    (scatter_all + smooth_all)
    .properties(
        width=900,
        height=550,
        title="Relationship between Earthquake Frequency and House Prices (All Years, Log Scale)"
    )
    .configure_axis(
        gridOpacity=0.20,
        labelFontSize=12,
        titleFontSize=14
    )
    .configure_title(
        fontSize=22,
        anchor="start"
    )
)

correlation_all

In [15]:
import altair as alt
import numpy as np
import pandas as pd

# --------------------------
# 1. CLEAN DATA
# --------------------------

df_corr = df_state_aggreg.copy()

# Remove invalid values
df_corr = df_corr[
    (df_corr["n_earthquakes"] > 0) &
    (df_corr["avg_price"] > 0) &
    (df_corr["avg_magnitude"].notna())
].copy()

# Add log value for regression
df_corr["log_eq"] = np.log(df_corr["n_earthquakes"])

# --------------------------
# 2. CREATE DYNAMIC EXCLUSIVE BINS
# --------------------------

# Determine min and max of avg_magnitude
min_mag = df_corr["avg_magnitude"].min()
max_mag = df_corr["avg_magnitude"].max()

# Create 5 equal-width bins
bins = np.linspace(min_mag, max_mag, num=6)  # 6 edges ‚Üí 5 bins
labels = [f"{round(bins[i],1)}-{round(bins[i+1],1)}" for i in range(len(bins)-1)]

# right=False ensures upper boundary is exclusive
df_corr["magnitude_bin"] = pd.cut(df_corr["avg_magnitude"], bins=bins, labels=labels, right=False)

# --------------------------
# 3. SCATTER WITH DARK COLORS
# --------------------------

color_scale = alt.Scale(
    domain=labels,
    range=["#7f3c8d", "#11a579", "#3969ac", "#f2b701", "#e73f74"]  # dark, distinct
)

scatter_all = (
    alt.Chart(df_corr)
    .mark_circle(size=90, opacity=0.7)
    .encode(
        x=alt.X(
            "n_earthquakes:Q",
            title="Earthquakes (log scale)",
            scale=alt.Scale(type="log"),
            axis=alt.Axis(format="~s")
        ),
        y=alt.Y(
            "avg_price:Q",
            title="Median House Price ($)",
            scale=alt.Scale(zero=False, padding=10)
        ),
        color=alt.Color(
            "magnitude_bin:N",
            title="Avg Magnitude Bin",
            scale=color_scale
        ),
        tooltip=[
            alt.Tooltip("state:N", title="State"),
            alt.Tooltip("year:O", title="Year"),
            alt.Tooltip("n_earthquakes:Q", title="Earthquakes"),
            alt.Tooltip("avg_price:Q", title="House Price ($)"),
            alt.Tooltip("avg_magnitude:Q", title="Avg Magnitude"),
            alt.Tooltip("magnitude_bin:N", title="Magnitude Bin")
        ]
    )
)

# --------------------------
# 4. TRENDLINE
# --------------------------

smooth_all = (
    alt.Chart(df_corr)
    .transform_regression("log_eq", "avg_price")
    .mark_line(color="black", size=3)
    .encode(
        x=alt.X("n_earthquakes:Q", scale=alt.Scale(type="log")),
        y="avg_price:Q"
    )
)

# --------------------------
# 5. COMBINE BOTH
# --------------------------

correlation_all = (
    (scatter_all + smooth_all)
    .properties(
        width=900,
        height=550,
        title="Earthquake Frequency vs House Prices (Dynamic Magnitude Bins)"
    )
    .configure_axis(
        gridOpacity=0.20,
        labelFontSize=12,
        titleFontSize=14
    )
    .configure_title(
        fontSize=22,
        anchor="start"
    )
    .configure_legend(
        titleFontSize=14,
        labelFontSize=13,
        symbolSize=150
    )
)

correlation_all



In [11]:
# Base scatter
base = alt.Chart(df_state_aggreg).mark_circle(size=60, opacity=0.5).encode(
    x=alt.X("n_earthquake:Q", scale=alt.Scale(type="log"), title="Earthquakes (log scale)"),
    y=alt.Y("avg_price:Q", title="Median House avg_price ($)"),
    tooltip=["State:N", "Year:O", "n_earthquake:Q", "avg_price:Q"]
)

# R√©gression lin√©aire par ann√©e
smooth = alt.Chart(df_state_aggreg).transform_regression(
    "n_earthquake", "avg_price", groupby=["Year"]  # par ann√©e
).mark_line(size=2, color="black").encode(
    x=alt.X("n_earthquake:Q", scale=alt.Scale(type="log")),
    y="avg_price:Q"
)

# Scatter + ligne
layered = base + smooth

# Facet par ann√©e
chart = layered.facet(
    column=alt.Column("Year:O", title="Year")
).properties(
    title="House Prices vs Earthquakes ‚Äì Linear Trend per Year"
).configure_axis(
    labelFontSize=12,
    titleFontSize=14,
    gridOpacity=0.2
).configure_title(
    fontSize=18,
    anchor="start"
)

chart

# Focus sur un √©tat (Californie ou autre)

In [12]:
import altair as alt
from vega_datasets import data

# TopoJSON
states = alt.topo_feature(data.us_10m.url, "states")
counties = alt.topo_feature(data.us_10m.url, "counties")

# Filtrer Alaska + California
df_state_AC = df_state_aggreg_year[df_state_aggreg_year["state"].isin(["Alaska", "California"])].copy()
df_county_AC = df_county_aggreg_year[df_county_aggreg_year["state"].isin(["Alaska", "California"])].copy()

# 1) --- STATE EARTHQUAKE MAP ---
map_state_eq = (
    alt.Chart(states)
    .mark_geoshape()
    .encode(
        color=alt.Color(
            "n_earthquakes:Q",
            scale=alt.Scale(range=["#ffe6e6", "#800000"]),
            title="Earthquakes"
        ),
        tooltip=["state:N", "n_earthquakes:Q"]
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(df_state_AC, "fips", ["state", "n_earthquakes"])
    )
    .project("albersUsa")
    .properties(title="Earthquakes ‚Äì Alaska & California", width=400, height=300)
)

# 2) --- STATE PRICE MAP ---
map_state_price = (
    alt.Chart(states)
    .mark_geoshape()
    .encode(
        color=alt.Color(
            "avg_price:Q",
            scale=alt.Scale(range=["#e6f2ff", "#0055aa"]),
            title="Avg House Price"
        ),
        tooltip=["state:N", "avg_price:Q"]
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(df_state_AC, "fips", ["state", "avg_price"])
    )
    .project("albersUsa")
    .properties(title="Avg Price ‚Äì Alaska & California", width=400, height=300)
)

# 3) --- COUNTY EARTHQUAKE MAP ---
map_county_eq = (
    alt.Chart(counties)
    .mark_geoshape(stroke="white", strokeWidth=0.25)
    .encode(
        color=alt.Color(
            "n_earthquakes:Q",
            scale=alt.Scale(range=["#ffe6e6", "#800000"]),
            title="Earthquakes"
        ),
        tooltip=["county:N", "state:N", "n_earthquakes:Q"]
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(df_county_AC, "county_fips", ["county", "state", "n_earthquakes"])
    )
    .project("albersUsa")
    .properties(title="County Earthquakes ‚Äì Alaska & California", width=400, height=300)
)

# 4) --- COUNTY PRICE MAP ---
map_county_price = (
    alt.Chart(counties)
    .mark_geoshape(stroke="white", strokeWidth=0.25)
    .encode(
        color=alt.Color(
            "avg_price:Q",
            scale=alt.Scale(range=["#e6f2ff", "#0055aa"]),
            title="Avg House Price"
        ),
        tooltip=["county:N", "state:N", "avg_price:Q"]
    )
    .transform_lookup(
        lookup="id",
        from_=alt.LookupData(df_county_AC, "county_fips", ["county", "state", "avg_price"])
    )
    .project("albersUsa")
    .properties(title="County Prices ‚Äì Alaska & California", width=400, height=300)
)

# Combine ‚Äì 2√ó2 grid
final_maps = (map_state_eq | map_state_price) & (map_county_eq | map_county_price)
final_maps

KeyError: 'state'

In [18]:
df_county_aggreg_year.columns


Index(['county', 'year', 'n_earthquakes', 'avg_magnitude', 'max_magnitude',
       'avg_depth', 'intensity_score', 'n_properties', 'avg_price',
       'median_price', 'avg_bedrooms', 'avg_bathrooms', 'price_per_bedroom',
       'price_per_bathroom', 'eq_per_100_properties'],
      dtype='object')