This notebook reproduces the **interfacial tension analysis** of droplets described in the manuscript.  

## Analysis Workflow

- **Display each measurements**

- **Find proper fitting**
  - Tried many methods, and used exponential decay in the end
  - Identify the fitting plateus

- **Visualize**

## Outputs
- Raw measured values
- Fitted values
- Fitting paramaters

In [2]:
# import
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import mean_squared_error, r2_score

### Step1: define pathways 

In [None]:
excel_path = "input_datasheet.xlsx"
save_path = "output_folder-name"

# sheet containing IF values, with each measurements in columns, and value in each time points in rows
sheet_name = "sheet1" 

### Step2: determine figure cosmetics

In [None]:
title = "All Measurements_HFE7700+2%KP600 Oil"
x_unit_label = "Time (min)"
y_unit_label = "Interfacial tension (mN/m)"

# how far to extend the fitted curve (in minutes) beyond your data
fit_extend_minutes = 500  # adjust if you want longer/shorter tails
dense_points = 500       # resolution for smooth fitted lines
dot_size = 5
line_width = 1.0
cmap = plt.cm.get_cmap('tab10', 10)  # up to 10 distinct colors

### Step3: visualize each measurements and its fitting into 1 plot, and compute the fitting plateau values

In [None]:
df = pd.read_excel(excel_path, sheet_name=sheet_name, header=None)

# X in first column, convert to numeric and scale by 1/6 
x_raw = pd.to_numeric(df.iloc[:, 0], errors="coerce")
x = (x_raw / 6.0).to_numpy()  # convert to minutes (measurements time interval: 10s)

# remaining columns are replicates; keep only those that aren’t entirely NaN
replicate_cols = []
for i in range(1, df.shape[1]):
    col = pd.to_numeric(df.iloc[:, i], errors="coerce")
    if not col.dropna().empty:
        replicate_cols.append(col.to_numpy())

if len(replicate_cols) == 0:
    raise ValueError("No non-empty replicate columns found in the worksheet.")

os.makedirs(save_path, exist_ok=True)

# ---------- **model & helpers** ----------
def exp_decay(x, a, k, c):
    """a * exp(-k*x) + c"""
    return a * np.exp(-k * x) + c

def initial_guess(y):
    y_clean = y[~np.isnan(y)]
    if y_clean.size == 0:
        return (1.0, 0.1, 0.0)
    y0 = y_clean[0]
    ymin, ymax = np.nanmin(y_clean), np.nanmax(y_clean)
    a0 = (ymax - ymin) if (ymax - ymin) != 0 else max(abs(y0), 1.0)
    k0 = 0.1  # a modest decay
    c0 = np.nanmedian(y_clean[-max(3, y_clean.size//5):])  # tail median as plateau guess
    return (a0, k0, c0)

def fit_one_series(x, y):
    """Fit one y-series against x, dropping NaNs and returning params & metrics."""
    mask = ~np.isnan(x) & ~np.isnan(y)
    x_fit = x[mask]
    y_fit = y[mask]
    if x_fit.size < 3:
        return None  # not enough points to fit

    p0 = initial_guess(y_fit)
    # bounds: a free, k >= 0, c free (use wide bounds to be safe)
    bounds = ((-np.inf, 0.0, -np.inf), (np.inf, 10.0, np.inf))

    try:
        popt, pcov = curve_fit(
            exp_decay, x_fit, y_fit,
            p0=p0, bounds=bounds, maxfev=10000, check_finite=True
        )
    except Exception as e:
        # fit failed; return None so we can still plot scatter
        print(f"Fit failed for a series: {e}")
        return None

    y_pred = exp_decay(x_fit, *popt)
    mse = mean_squared_error(y_fit, y_pred)
    r2 = r2_score(y_fit, y_pred)
    return {"params": popt, "cov": pcov, "mse": mse, "r2": r2, "n": x_fit.size}

# ---------- **do fits** ----------
fit_rows = []
fig, ax = plt.subplots(figsize=(9, 6), dpi=150)

xmin, xmax = np.nanmin(x), np.nanmax(x)
xmax_plot = max(xmax, xmin) + fit_extend_minutes
x_dense = np.linspace(xmin, xmax_plot, dense_points)

for idx, y in enumerate(replicate_cols, start=1):
    color = cmap((idx - 1) % 10)

    ax.scatter(x, y, s=dot_size, color=color, alpha=0.85, label=f"Rep {idx} data")

    fit = fit_one_series(x, y)
    if fit is not None:
        a, k, c = fit["params"]
        y_line = exp_decay(x_dense, a, k, c)
        ax.plot(x_dense, y_line, color=color, lw=line_width,
                label=f"Rep {idx} fit (k={k:.3g}, c={c:.3g}, R²={fit['r2']:.3f})")
        fit_rows.append({
            "replicate": idx,
            "a": a, "k": k, "c": c,
            "MSE": fit["mse"], "R2": fit["r2"], "n_points": fit["n"]
        })
    else:
        fit_rows.append({
            "replicate": idx,
            "a": np.nan, "k": np.nan, "c": np.nan,
            "MSE": np.nan, "R2": np.nan, "n_points": np.count_nonzero(~np.isnan(y))
        })

# ---------- axes cosmetics ----------
ax.set_title(title, fontsize=14)
ax.set_xlabel(x_unit_label, fontsize=12)
ax.set_ylabel(y_unit_label, fontsize=12)
ax.set_xlim(0, 200)   

all_y = np.concatenate([col[~np.isnan(col)] for col in replicate_cols])
if all_y.size > 0:
    y_min = float(np.floor(np.nanmin(all_y) * 2) / 2.0)
    y_max = float(np.ceil(np.nanmax(all_y) * 2) / 2.0)
    ax.set_yticks(np.arange(y_min, y_max + 0.001, 0.5))

ax.grid(True, which="both", color="#dcdcdc", lw=0.7, alpha=0.7)
ax.legend(ncol=1, fontsize=9, frameon=True)

ax.set_ylim(6.0, 14.0)
fig.tight_layout()

# ---------- save outputs ----------
fig_path = os.path.join(save_path, "all_scatter_and_fits.pdf")
csv_path = os.path.join(save_path, "fit_parameters.csv")
plt.savefig(fig_path, dpi=300)
plt.show()
plt.close(fig)

pd.DataFrame(fit_rows).to_csv(csv_path, index=False)

print(f"Saved plot to: {fig_path}")
print(f"Saved fit parameters to: {csv_path}")