# Obsidian Geochemical Sourcing â€“ Anillo Source (Arequipa, Peru)
This notebook explores obsidian geochemistry using interactive maps, biplots, and ternary plots.

---

## How to Navigate

- Sections are collapsed by default where appropriate.
- Expand **Source Map** to select sources.
- Biplot and Ternary Plot sections use your selection.
- Collapse sections like Imports or Data Cleaning for easier focus.
- Use the **Outline panel** (JupyterLab left sidebar) to quickly jump between sections.


## 1. Imports & Configuration


In [2]:
!pip install jupyter-dash --quiet
# Dash allows for an interactive table showing Selected Sources


In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

import plotly.express as px
import plotly.graph_objects as go

from dash import Dash, dash_table, dcc, html, Output, Input


## Configuration 
### Google Sheets Loader is off by default

In [4]:
# -----------------------------
# Configuration
# -----------------------------

USE_GOOGLE_SHEETS = True   # <- set to True to load live Sheets
DATA_DIR = Path("../data") # <- path to local /data directory adjacent to /notebooks directory


## Data Loading from Local CSV or Google Sheets
### based on "Use Google Sheets" flag

In [5]:
def get_df_csv(filename):
    """Load data from local CSV (Binder / archival safe)."""
    return pd.read_csv(DATA_DIR / filename)


def get_df_sheets(sheet_id, sheet_name):
    """Load data from a Google Sheets tab."""
    url = (
        f"https://docs.google.com/spreadsheets/d/"
        f"{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}"
    )
    return pd.read_csv(url)


## Load Data

In [6]:
SHEET_ID = "1R4PlMACBn0l8ZguwtYDlZLnbvORzH5CHhKGFBezjSvk"
if USE_GOOGLE_SHEETS:
    srcs = get_df_sheets(SHEET_ID, "KRA21_sources")
    srcs_locs = get_df_sheets(SHEET_ID, "source_coords")
    study = get_df_sheets(SHEET_ID, "samples")
else:
    srcs = get_df_csv("KRA21_sources.csv")
    srcs_locs = get_df_csv("source_coords.csv")
    study = get_df_csv("study_samples.csv")


## Data Cleaning

In [7]:
def clean_geochem_df(df):
    # Remove Bruker artifacts and spaces
    df.columns = df.columns.str.replace(r"(Ka1|La1|\s+)", "", regex=True)

    # Ensure string columns
    for col in ["Group", "Sample", "Name"]:
        if col in df.columns:
            df[col] = df[col].astype("string")

    # Convert remaining columns to numeric
    string_cols = df.select_dtypes("string").columns
    numeric_cols = df.columns.difference(string_cols)
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")

    # Drop columns that are entirely NaN
    df.dropna(axis=1, how="all", inplace=True)

    return df


srcs = clean_geochem_df(srcs)
study = clean_geochem_df(study)


## Map Selection
### 4.1 Source Selection & Summary Table

In [15]:
app.layout = html.Div([
    # Source Map
    dcc.Graph(id="map", figure=fig_map),

    # Store selected sources
    dcc.Store(id="selection-store"),

    # Summary table of selected sources
    html.H4(
        "Selected Sources Summary",
        style={
            "backgroundColor": "white",
            "textAlign": "center",
            "fontFamily": "sans-serif",
            "padding": "4px"
        }
    ),
    dash_table.DataTable(
        id="summary-table",
        columns=[
            {"name": "Source Name", "id": "Name"},
            {"name": "Rows in srcs", "id": "Count"}
        ],
        data=[],
        style_cell={
            "textAlign": "center",
            "fontFamily": "sans-serif",  # Sans-serif font
            "backgroundColor": "white",  # White background
            "color": "black"
        },
        style_header={
            "fontWeight": "bold",
            "fontFamily": "sans-serif",
            "backgroundColor": "white",
            "color": "black"
        },
        style_table={
            "overflowX": "auto",
            "border": "1px solid lightgray"
        }
    )
])


NameError: name 'fig_map' is not defined

In [12]:
# Callback to store selected sources
@app.callback(
    Output("summary-table", "data"),
    Input("selection-store", "data")
)
def update_summary_table(sel):
    selected_names = sel.get("selected_names", []) if sel else []

    if not selected_names:
        return []

    # Use "Group" in srcs to match the "Name" in srcs_locs
    sub = srcs[srcs["Group"].isin(selected_names)]

    # Count rows per source
    summary = sub.groupby("Group").size().reset_index(name="Count")
    summary.rename(columns={"Group": "Name"}, inplace=True)  # Rename for table
    return summary.to_dict("records")


# Run Dash inline in notebook
app.run(jupyter_mode="inline")


NameError: name 'app' is not defined

## Ellipse Utility

In [13]:
def confidence_ellipse(x, y, n_std=1.96, n_points=100):
    cov = np.cov(x, y)
    mean = np.array([x.mean(), y.mean()])

    eigvals, eigvecs = np.linalg.eigh(cov)
    order = eigvals.argsort()[::-1]
    eigvals, eigvecs = eigvals[order], eigvecs[:, order]

    theta = np.linspace(0, 2 * np.pi, n_points)
    ellipse = np.column_stack((np.cos(theta), np.sin(theta)))
    ellipse = ellipse @ np.diag(np.sqrt(eigvals) * n_std) @ eigvecs.T
    ellipse += mean

    return ellipse


## Geochemical Plot

In [91]:
x_col = "Sr"
y_col = "Zr"
group = "Group"

fig_bi = go.Figure()

# Study ellipses
for g in study[group].dropna().unique():
    sub = study[study[group] == g]
    ell = confidence_ellipse(sub[x_col], sub[y_col])

    fig_bi.add_trace(go.Scatter(
        x=ell[:,0],
        y=ell[:,1],
        mode="lines",
        name=f"{g} (study)",
        showlegend=True
    ))

# Selected source points
fig_bi.add_trace(go.Scatter(
    x=selected_srcs[x_col],
    y=selected_srcs[y_col],
    mode="markers",
    name="Sources",
    marker=dict(symbol="x", size=6, color="black")
))

fig_bi.update_layout(
    title="Biplot: Selected Sources and Study Groups",
    xaxis_title=x_col,
    yaxis_title=y_col
)

fig_bi.show()


In [92]:
def normalize(df, cols):
    vals = df[cols].values
    return vals / vals.sum(axis=1, keepdims=True)

cols = ["Rb", "Sr", "Zr"]

srcs_frac  = normalize(selected_srcs, cols)
study_frac = normalize(study, cols)

fig_tern = go.Figure()

# Sources as points
fig_tern.add_trace(go.Scatterternary(
    a=srcs_frac[:,0],
    b=srcs_frac[:,1],
    c=srcs_frac[:,2],
    mode="markers",
    name="Sources",
    marker=dict(symbol="x", size=6)
))

# Study ellipses by Group (projected)
for g in study[group].dropna().unique():
    sub = study[study[group] == g]
    frac = normalize(sub, cols)

    ell = confidence_ellipse(frac[:,0], frac[:,1])
    fig_tern.add_trace(go.Scatterternary(
        a=ell[:,0],
        b=ell[:,1],
        c=1 - ell[:,0] - ell[:,1],
        mode="lines",
        name=f"{g} (study)"
    ))

fig_tern.update_layout(
    ternary=dict(
        sum=1,
        aaxis_title="Rb",
        baxis_title="Sr",
        caxis_title="Zr"
    ),
    title="Ternary Plot: Selected Sources and Study Groups"
)

fig_tern.show()
