In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import re
import pickle

from itertools import combinations
from tqdm import tqdm
from pathlib import Path

from scipy import signal



matplotlib.use("Agg")     
matplotlib.rcParams["agg.path.chunksize"] = 10_000  

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

RAW_DATA_DIR = Path("./data/raw")
PROCESSED_DATA_DIR = Path("./data/processed")
IMAGES_DIR = Path("./images")
DUMP_DIR = Path("./dump")

# Sections to Run
column_permutation_plots = False
oscillator_parameter_space = True


# Column Permutation Plotting

In [14]:
def generate_permutation_plots():
    for device_dir in tqdm(list(PROCESSED_DATA_DIR.iterdir()), desc="Device folders", unit="folder"):
        if not device_dir.is_dir():
            continue
        csv_files = sorted(device_dir.glob("*.csv"))
        if not csv_files:
            continue

        dfs = {
            csv.name: {
                col: pd.read_csv(csv)[col].to_numpy(dtype=np.float64, copy=False)[::10]
                for col in pd.read_csv(csv).columns
            }
            for csv in csv_files
        }
        cols = list(next(iter(dfs.values())).keys())

        out_dir = IMAGES_DIR / "permutation_plots" / device_dir.name
        out_dir.mkdir(parents=True, exist_ok=True)

        fig, ax = plt.subplots()

        for x, y in tqdm(combinations(cols, 2), desc=device_dir.name, unit="pair", leave=False):
            ax.clear()
            for arrays in dfs.values():
                ax.plot(arrays[x], arrays[y])
            ax.set_xlabel(x)
            ax.set_ylabel(y)
            ax.set_title(f"{y} vs {x}")
            ax.grid(True)
            fname = f"permutation_plots_{x}_{y}.png".replace(" ", "")
            fig.savefig(out_dir / fname, dpi=300, bbox_inches="tight")
            fig.canvas.draw_idle()

        plt.close(fig)

if column_permutation_plots:
    generate_permutation_plots()


# Oscillator Parameter Space

In [6]:
def hilbert_frequency(t, x, amp_thresh=0.3, detrend=True):

    if detrend:
        x = signal.detrend(x, type="linear")

    fs = 1.0 / np.median(np.diff(t))

    analytic = signal.hilbert(x)
    envelope = np.abs(analytic)
    phase = np.unwrap(np.angle(analytic))

    inst_freq = np.diff(phase) / (2 * np.pi) * fs   

    use = envelope[:-1] > (amp_thresh * np.median(envelope))
    if not np.any(use):
        return np.nan

    return np.nanmedian(inst_freq[use])


FILENAME_RE = re.compile(
    r"^[A-Za-z0-9]+-"               
    r"(?P<device>[A-Za-z0-9]+)-"
    r"(?P<R>[\d.]+)kOhm-"
    r"(?P<C>[\d.]+)nF-"
    r"(?P<T>\d+)K-"
    r"(?P<V>[\d.]+)V$",
    re.IGNORECASE,
)

def generate_parameter_space_df():
    rows = []

    for csv_path in tqdm(
        PROCESSED_DATA_DIR.rglob("*.csv"),
        desc="CSV files",
        unit=" files",
    ):
        match = FILENAME_RE.match(csv_path.stem)
        if match is None:               
            tqdm.write(f"Skipped {csv_path.name} (name pattern mismatch)")
            continue

        device_name   = match["device"]
        resistance    = float(match["R"]) * 1_000     
        capacitance   = float(match["C"]) * 1e-9        
        temperature   = int(match["T"])     
        voltage       = float(match["V"])        

        try:
            raw = pd.read_csv(csv_path)
            t = raw.iloc[:, 0].to_numpy(dtype=float)
            x = raw.iloc[:, 1].to_numpy(dtype=float)
            frequency = hilbert_frequency(t, x)
        except Exception as e:
            tqdm.write(f"{csv_path.name}: {e}")
            frequency = np.nan

        rows.append(
            {
                "device_name":  device_name,
                "resistance": resistance,
                "capacitance":  capacitance,
                "temperature":   temperature,
                "voltage":       voltage,
                "frequency":    frequency,
                "file_name":       csv_path.name, 
            }
        )

    return pd.DataFrame(rows)

if oscillator_parameter_space:
    parameter_space_df = generate_parameter_space_df()

    with open(str(DUMP_DIR / "parameter_space/parameter_space_df.pkl"), "wb") as f:
        pickle.dump(parameter_space_df, f)
        parameter_space_df.to_csv(
            str(DUMP_DIR / "parameter_space/parameter_space_df.csv"), 
            index=False
        )

CSV files: 180 files [01:09,  2.60 files/s]


In [3]:
with open(str(DUMP_DIR / "parameter_space/parameter_space_df.pkl"), "rb") as f:
    parameter_space_df = pickle.load(f)

devices = parameter_space_df["device_name"].unique()

for device in devices:
    device_df = parameter_space_df[
        parameter_space_df["device_name"] == device
    ]

    plt.scatter(
        device_df["voltage"], 
        device_df["frequency"],
    )
    plt.xlabel("Voltage")
    plt.ylabel("Frequency")
    plt.title(f"{device} Frequency vs Voltage")
    plt.savefig(str(IMAGES_DIR / f"parameter_space/{device}_frequency_vs_voltage.png"))
    plt.close()

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

parameter_space_df.head()

Unnamed: 0,device_name,resistance,capacitance,temperature,voltage,frequency,file_path
0,e12,800.0,1e-09,300,0.935,24134.523687,data/processed/dev-e12/LZ5G-e12-0.8kOhm-1nF-30...
1,e12,800.0,1e-09,300,0.93,19298.801516,data/processed/dev-e12/SVBN-e12-0.8kOhm-1nF-30...
2,e12,800.0,1e-08,300,0.9,61131.624871,data/processed/dev-e12/H93D-e12-0.8kOhm-10nF-3...
3,e12,800.0,1e-09,300,0.91,23565.414142,data/processed/dev-e12/XAFB-e12-0.8kOhm-1nF-30...
4,e12,800.0,1e-08,300,0.98,65591.181799,data/processed/dev-e12/N48K-e12-0.8kOhm-10nF-3...
