In [1]:
# In 01_exploratory_cruise_eda.ipynb ─ first code cell
import pandas as pd
from ydata_profiling import ProfileReport
from pathlib import Path

DATA_DIR = Path.cwd().parents[0] / "data" / "interim"
df = pd.read_parquet(DATA_DIR / "water_co2.parquet")

# ---------- quick profile (takes 5–20 s depending on rows) ----------
profile = ProfileReport(
    df,
    title="Cruise EDA – water_co2",
    explorative=True
)

REPORT_DIR = Path.cwd().parents[0] / "results" / "reports"
REPORT_DIR.mkdir(parents=True, exist_ok=True)

profile_path = REPORT_DIR / "water_co2_profile.html"
profile.to_file(profile_path)

print("✔ Profile saved to:", profile_path)

import seaborn as sns
import matplotlib.pyplot as plt

FIG_DIR = Path.cwd().parents[0] / "results" / "figures"
FIG_DIR.mkdir(exist_ok=True)

# 3-a  depth profile (temp vs depth)
fig1, ax1 = plt.subplots()
sns.lineplot(data=df, x="temp_wat", y="depth_m",
             marker="o", hue="cruise", ax=ax1)
ax1.invert_yaxis()
ax1.set_xlabel("Water temperature (°C)")
ax1.set_ylabel("Depth (m)")
fig1.savefig(FIG_DIR / "temp_depth_profile.png", dpi=300, bbox_inches="tight")

# 3-b  θ-S diagram
fig2, ax2 = plt.subplots()
sns.scatterplot(data=df, x="sal_wat", y="temp_wat",
                hue="depth_m", palette="viridis_r", ax=ax2)
ax2.set_xlabel("Salinity (PSU)")
ax2.set_ylabel("Temperature (°C)")
fig2.savefig(FIG_DIR / "theta_s_diagram.png", dpi=300, bbox_inches="tight")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 1808.01it/s][A


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

✔ Profile saved to: C:\Users\OA_2023-03\Documents\dev\ghana_carbonate_OMI\results\reports\water_co2_profile.html
