<span style="font-size: 13px;">

### Manual depth alignment: core plugs → well logs

This notebook manually aligns **core plug depths** to **well log depths** and exports an ML-ready dataset.

#### What it does
- Splits the dataset into **chunks** where there is a depth gap larger than **3 meters**.
- For each chunk, you define one or more **tie points** (depth matches) to compute the required shift.
- Applies the shift within the selected chunk(s) and exports the corrected data.

#### How to define tie points (Core → Log)
In the tie-point input area:
- Enter the original depth in the **Core@** box (core plug depth).
- Enter the desired aligned depth in the **Log →** box (target depth in the well logs).

You can define **multiple tie points** within the same chunk to improve alignment. 

#### Multiple ties
If you want to add another tie point:
- Enter the next **Core@** value,
- then enter its corresponding **Log →** value.
The notebook will use these tie points to visualize and apply shifts across the chunk. The shift is **not constant** for the full chunk.
Instead, the shift varies **gradually between tie points**.

#### Visual check
After entering tie points, use the plotting/visualization section to confirm the ties on the depth track.

#### Export
1. Select the shifted chunk(s).
2. Click **Export selected chunks** (at the beginning of the notebook).
This exports the aligned dataset for downstream ML (e.g., porosity prediction from logs).
</span>

In [None]:
# Plotting and selective export of aligned chunks (multi-tie warping) 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import VBox, HBox, FloatSlider, Output, Layout, Checkbox, Button, HTML, Label, Dropdown
from IPython.display import display, clear_output
from pathlib import Path
from datetime import datetime

# === Config ===
WELL_CSV = "data/IJS-57.csv"
PLUG_CSV = "data/IJS-57_coreplug_averaged.csv"
RHO_MA, RHO_FL = 2.65, 1.00  # matrix & fluid densities (g/cc)

# === Load log data ===
log_data = (
    pd.read_csv(WELL_CSV)[['DEPTH', 'GR', 'RHOB', 'DT', 'NPHI']]
    .dropna()
    .sort_values('DEPTH')
    .reset_index(drop=True)
)

# === Load UI subset (only rows that can be plotted for core tracks) ===
plug_data = (
    pd.read_csv(PLUG_CSV)[['DEPTH', 'POROSITY', 'GRAIN_DENSITY']]
    .dropna()
    .sort_values('DEPTH')
    .reset_index(drop=True)
)

# === Load full averaged file (all cols) for export ===
plug_full_raw = (
    pd.read_csv(PLUG_CSV)
    .dropna(subset=['DEPTH'])
    .sort_values('DEPTH')
    .reset_index(drop=True)
)
_original_cols = plug_full_raw.columns.tolist()

# === Build CHUNK_ID ONCE from full file (drop only DEPTH NaNs) ===
plug_full = plug_full_raw.copy()
plug_full['DEPTH_DIFF'] = plug_full['DEPTH'].diff()
plug_full['CHUNK_ID'] = (plug_full['DEPTH_DIFF'] > 3).cumsum().astype(int)

# === Map CHUNK_ID to UI subset (so UI and export share same chunk IDs) ===
plug_data = (
    plug_data.merge(plug_full[['DEPTH', 'CHUNK_ID']], on='DEPTH', how='left')
    .dropna(subset=['CHUNK_ID'])
    .copy()
)
plug_data['CHUNK_ID'] = plug_data['CHUNK_ID'].astype(int)

# Only iterate chunks that exist in UI subset (prevents empty UI chunks)
unique_chunks = plug_data['CHUNK_ID'].drop_duplicates().astype(int).values

chunk_dfs = {cid: plug_data[plug_data['CHUNK_ID'] == cid].copy() for cid in unique_chunks}
chunk_full_dfs = {cid: plug_full[plug_full['CHUNK_ID'] == cid].copy() for cid in unique_chunks}

# === Helpers ===
def to_pct(a):
    a = np.asarray(a, float)
    if a.size == 0:
        return a
    if np.isnan(a).all():
        return a
    return a * 100.0 if np.nanmax(a) <= 2.0 else a

def _sanitize_points(pts, tol=1e-6):
    if not pts:
        return []
    pts = sorted(pts, key=lambda t: float(t[0]))
    din, dout = [], []
    for x, y in pts:
        x = float(x); y = float(y)
        if din and abs(x - din[-1]) <= tol:
            din[-1] = x; dout[-1] = y
        else:
            din.append(x); dout.append(y)
    return list(zip(din, dout))

def warp_depth(depth_series, points):
    d = depth_series.to_numpy().astype(float)
    if not points:
        return d
    pts = _sanitize_points(points)
    din  = np.array([p[0] for p in pts], dtype=float)
    dout = np.array([p[1] for p in pts], dtype=float)
    delta = dout - din
    off = np.interp(d, din, delta, left=delta[0], right=delta[-1])
    return d + off

def format_points_html(pts):
    if not pts:
        return "<i>No ties yet</i>"
    pts = _sanitize_points(pts)
    rows = [f"{i+1}. core {x:.2f} → log {y:.2f} (Δ={y-x:+.2f} m)" for i, (x, y) in enumerate(pts)]
    return "<br>".join(rows)

# === State ===
current_shifts  = {int(cid): 0.0 for cid in unique_chunks}   # used only if no ties
selected_chunks = {int(cid): False for cid in unique_chunks}
control_points  = {int(cid): [] for cid in unique_chunks}

# log arrays
depth_log = log_data['DEPTH'].to_numpy()
dmin_all, dmax_all = float(depth_log.min()), float(depth_log.max())

# export UI
well_name = Path(WELL_CSV).stem
btn_export = Button(description="Export selected chunks", layout=Layout(width='220px'))
export_status = HTML(value="")

widgets = []

# === Build per-chunk UI ===
for raw_cid in unique_chunks:
    chunk_id = int(raw_cid)
    chunk_df = chunk_dfs[chunk_id]
    if chunk_df.empty:
        continue

    dmin_fixed = float(chunk_df['DEPTH'].min() - 10)
    dmax_fixed = float(chunk_df['DEPTH'].max() + 10)

    # main controls
    slider = FloatSlider(min=-10, max=10, step=0.01, value=0.0,
                         description=f"Chunk {chunk_id}", readout_format='.2f',
                         layout=Layout(width='45%'))
    chk = Checkbox(value=False, description="Include in export", indent=False)
    output = Output()

    # tie controls
    tie_core = FloatSlider(min=float(chunk_df['DEPTH'].min()), max=float(chunk_df['DEPTH'].max()),
                           step=0.01, value=float(chunk_df['DEPTH'].median()),
                           description="Core @", readout_format='.2f', layout=Layout(width='30%'))
    tie_log = FloatSlider(min=dmin_all, max=dmax_all,
                          step=0.01, value=float(chunk_df['DEPTH'].median()),
                          description="Log →", readout_format='.2f', layout=Layout(width='30%'))

    btn_add_tie = Button(description="Add/Update tie", layout=Layout(width='150px'))
    dd_remove   = Dropdown(options=[("— select tie —", -1)], description="Delete", layout=Layout(width='36%'))
    btn_del_tie = Button(description="Delete tie", layout=Layout(width='120px'))
    btn_clear_ties = Button(description="Clear all ties (chunk)", layout=Layout(width='180px'))

    ties_count = Label(value="0 tie(s)")
    delta_preview = Label(value="Δ preview: +0.00 m")
    tie_debug = Label(value="")  # shows any UI refresh errors
    ties_html = HTML(value="<i>No ties yet</i>")

    def make_plot(shift, slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed):
        with output:
            output.clear_output(wait=True)

            # slice logs for window
            log_chunk = log_data[(log_data['DEPTH'] >= dmin_fixed) & (log_data['DEPTH'] <= dmax_fixed)]
            if log_chunk.empty:
                print(f"No log samples in [{dmin_fixed:.2f}, {dmax_fixed:.2f}] for chunk {chunk_id}.")
                return

            depth_chunk = log_chunk['DEPTH'].to_numpy()
            rhob_chunk  = log_chunk['RHOB'].to_numpy()

            pts = control_points.get(chunk_id, [])
            has_warp = len(pts) > 0
            if has_warp:
                shifted_depth = warp_depth(chunk_df['DEPTH'], pts)
            else:
                shifted_depth = chunk_df['DEPTH'].to_numpy() + float(shift)

            fig, axes = plt.subplots(1, 6, figsize=(18, 12), sharey=False)

            # GR
            axes[0].plot(log_chunk['GR'], log_chunk['DEPTH'], color='green')
            axes[0].set_title("GR"); axes[0].set_xlabel("GR")

            # RHOB + Grain Density
            axes[1].plot(log_chunk['RHOB'], log_chunk['DEPTH'], label='RHOB', color='blue')
            axes[1].set_title("RHOB + Grain Density"); axes[1].set_xlabel("RHOB (g/cc)")
            ax1_top = axes[1].twiny()
            ax1_top.plot(chunk_df['GRAIN_DENSITY'], shifted_depth, 'r--', label='Grain Density')
            ax1_top.set_xlabel("Grain Density (g/cc)")
            ax1_top.xaxis.set_label_position('top'); ax1_top.xaxis.tick_top()
            ax1_top.set_ylim(axes[1].get_ylim()); ax1_top.invert_yaxis(); ax1_top.grid(False)
            axes[1].legend(loc='lower left')

            # DT
            axes[2].plot(log_chunk['DT'], log_chunk['DEPTH'], color='purple')
            axes[2].set_title("DT"); axes[2].set_xlabel("DT")

            # NPHI + Core Porosity
            axes[3].plot(chunk_df['POROSITY'], shifted_depth, 'ro--', label='Core Porosity', markersize=4)
            axes[3].set_title("PORO + NPHI"); axes[3].set_xlabel("Porosity")
            ax3_top = axes[3].twiny()
            ax3_top.plot(log_chunk['NPHI'], log_chunk['DEPTH'], color='orange', label='NPHI')
            ax3_top.set_xlabel("NPHI"); ax3_top.xaxis.set_label_position('top'); ax3_top.xaxis.tick_top()
            ax3_top.set_ylim(axes[3].get_ylim()); ax3_top.invert_yaxis(); ax3_top.grid(False)
            axes[3].legend(loc='lower left')

            # Porosity comparison
            axes[4].set_title("Porosity comparison")
            axes[4].set_xlabel("Core Plug Porosity (%)")

            core_phi_pct = to_pct(chunk_df['POROSITY'].to_numpy())
            if core_phi_pct.size > 0 and not np.isnan(core_phi_pct).all():
                axes[4].plot(core_phi_pct, shifted_depth, 'r^--', markersize=4, label='Core φ (plug)')
                cmin, cmax = np.nanmin(core_phi_pct), np.nanmax(core_phi_pct)
                if np.isfinite([cmin, cmax]).all() and cmin != cmax:
                    axes[4].set_xlim(cmin - 0.5, cmax + 0.5)
            else:
                axes[4].text(0.5, 0.5, "No POROSITY data", transform=axes[4].transAxes,
                             ha='center', va='center')

            ax_top = axes[4].twiny()
            phi_rhob_full_pct = to_pct((RHO_MA - rhob_chunk) / (RHO_MA - RHO_FL))
            ax_top.plot(phi_rhob_full_pct, depth_chunk, '-', label='RHOB φ (log-res)')
            rhob_at_plugs = np.interp(shifted_depth, depth_chunk, rhob_chunk)
            phi_rhob_plug_pct = to_pct((RHO_MA - rhob_at_plugs) / (RHO_MA - RHO_FL))
            ax_top.plot(phi_rhob_plug_pct, shifted_depth, 'o--', markersize=4, label='RHOB φ @ plug')
            rmin, rmax = np.nanmin(phi_rhob_full_pct), np.nanmax(phi_rhob_full_pct)
            if np.isfinite([rmin, rmax]).all() and rmin != rmax:
                ax_top.set_xlim(rmin - 0.5, rmax + 0.5)
            ax_top.set_xlabel("RHOB-Derived Porosity (%)")
            ax_top.xaxis.set_label_position("top"); ax_top.xaxis.tick_top()
            ax_top.set_ylim(axes[4].get_ylim()); ax_top.invert_yaxis(); ax_top.grid(False)

            axes[4].legend(loc="lower left")
            ax_top.legend(loc="upper left")

            axes[5].axis("off")

            for ax in axes[:5]:
                ax.set_ylim(dmin_fixed, dmax_fixed)
                ax.set_yticks(np.arange(dmin_fixed, dmax_fixed + 0.5, 0.5))
                ax.grid(True)
                ax.invert_yaxis()

            title = f"Chunk {chunk_id} | "
            if has_warp:
                pts2 = _sanitize_points(pts)
                first_delta = pts2[0][1] - pts2[0][0]
                last_delta  = pts2[-1][1] - pts2[-1][0]
                title += f"WARPING {len(pts2)} tie(s) | Δ(first)={first_delta:+.2f} m, Δ(last)={last_delta:+.2f} m"
            else:
                title += f"Uniform shift={float(shift):.2f} m"
            plt.suptitle(title, fontsize=14)
            plt.tight_layout()
            plt.show()

    # slider callback (uniform shift fallback)
    def bind_plot(slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed):
        def on_value_change(change, slider=slider, chunk_df=chunk_df, chunk_id=chunk_id,
                            output=output, dmin_fixed=dmin_fixed, dmax_fixed=dmax_fixed):
            current_shifts[chunk_id] = float(change['new'])
            make_plot(change['new'], slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed)
        slider.observe(on_value_change, names='value')
        on_value_change({'new': slider.value})

    def on_chk_change(change, chunk_id=chunk_id):
        selected_chunks[chunk_id] = bool(change['new'])
    chk.observe(on_chk_change, names='value')

    # ---- ties UI (fully bound, cannot late-bind) ----
    def _refresh_ties_ui_bound(
        chunk_id=chunk_id,
        ties_html=ties_html,
        dd_remove=dd_remove,
        ties_count=ties_count,
        tie_core=tie_core,
        tie_log=tie_log,
        delta_preview=delta_preview,
        tie_debug=tie_debug
    ):
        try:
            pts = control_points.get(chunk_id, [])
            ties_html.value = format_points_html(pts)
            ties_count.value = f"{len(pts)} tie(s)"

            opts = [("— select tie —", -1)]
            for i, (x, y) in enumerate(_sanitize_points(pts)):
                opts.append((f"{i+1}: core {x:.2f} → log {y:.2f}", i))
            dd_remove.options = opts

            delta_preview.value = f"Δ preview: {(float(tie_log.value) - float(tie_core.value)):+0.2f} m"
            tie_debug.value = ""
        except Exception as e:
            tie_debug.value = f"Refresh error: {type(e).__name__}: {e}"

    def _on_add_tie_bound(_,
        chunk_id=chunk_id, tie_core=tie_core, tie_log=tie_log, slider=slider,
        chunk_df=chunk_df, output=output, dmin_fixed=dmin_fixed, dmax_fixed=dmax_fixed
    ):
        x = float(tie_core.value)
        y = float(tie_log.value)
        pts = control_points.get(chunk_id, [])

        # upsert (overwrite) if very close in core depth
        for i, (xi, yi) in enumerate(pts):
            if abs(x - xi) <= 0.05:
                pts[i] = (x, y)
                break
        else:
            pts.append((x, y))

        control_points[chunk_id] = _sanitize_points(pts)
        _refresh_ties_ui_bound()
        make_plot(slider.value, slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed)

    def _on_del_tie_bound(_,
        chunk_id=chunk_id, dd_remove=dd_remove, slider=slider,
        chunk_df=chunk_df, output=output, dmin_fixed=dmin_fixed, dmax_fixed=dmax_fixed
    ):
        idx = dd_remove.value
        if idx is None or idx < 0:
            return
        pts = _sanitize_points(control_points.get(chunk_id, []))
        if 0 <= idx < len(pts):
            del pts[idx]
        control_points[chunk_id] = pts
        _refresh_ties_ui_bound()
        make_plot(slider.value, slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed)

    def _on_clear_ties_bound(_,
        chunk_id=chunk_id, slider=slider, chunk_df=chunk_df, output=output,
        dmin_fixed=dmin_fixed, dmax_fixed=dmax_fixed
    ):
        control_points[chunk_id] = []
        _refresh_ties_ui_bound()
        make_plot(slider.value, slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed)

    # update preview when sliders move
    tie_core.observe(lambda ch, _f=_refresh_ties_ui_bound: _f(), names='value')
    tie_log.observe(lambda ch, _f=_refresh_ties_ui_bound: _f(), names='value')

    btn_add_tie.on_click(_on_add_tie_bound)
    btn_del_tie.on_click(_on_del_tie_bound)
    btn_clear_ties.on_click(_on_clear_ties_bound)
    _refresh_ties_ui_bound()

    bind_plot(slider, chunk_df, chunk_id, output, dmin_fixed, dmax_fixed)

    controls_row1 = HBox([slider, chk])
    controls_row2 = HBox([tie_core, tie_log, btn_add_tie, delta_preview])
    controls_row3 = HBox([dd_remove, btn_del_tie, btn_clear_ties, ties_count, tie_debug])
    ties_box = VBox([HTML("<b>Tie points (core → log)</b>"), ties_html])

    widgets.append(
        VBox([controls_row1, controls_row2, controls_row3, ties_box, output],
             layout=Layout(border='1px solid #ddd', padding='6px', margin='6px 0'))
    )

# === Export ===
def on_export_clicked(_):
    dfs = []
    for cid in unique_chunks:
        cid = int(cid)
        if not selected_chunks.get(cid, False):
            continue
        if cid not in chunk_full_dfs:
            continue

        df_full = chunk_full_dfs[cid].copy()
        pts = control_points.get(cid, [])

        if pts:
            df_full['DEPTH'] = warp_depth(df_full['DEPTH'], pts)
        else:
            df_full['DEPTH'] = df_full['DEPTH'] + float(current_shifts.get(cid, 0.0))

        df_full = df_full[_original_cols]
        dfs.append(df_full)

    if not dfs:
        export_status.value = "<b>No chunks selected.</b>"
        return

    out_df = pd.concat(dfs, ignore_index=True)
    out_path = Path("../data1") / f"{well_name}_shifted_chunks11.csv"
    out_df.to_csv(out_path, index=False)
    export_status.value = f"<b>Saved:</b> {out_path} ({len(out_df)} rows)"

btn_export.on_click(on_export_clicked)

# === Show UI ===
clear_output(wait=True)
display(HTML(f"<b>UI built at:</b> {datetime.now().strftime('%H:%M:%S')}"))
display(VBox([
    HBox([btn_export, export_status]),
    VBox(widgets)
]))
