In [None]:
import pandas as pd
import numpy as np


RAW_FILE = "merged_dataset.csv"
OUT_FILE = "verilog_ready_dataset.csv"
SYSTEM_FREQ = 50
BITS = 16


df = pd.read_csv(RAW_FILE)


dt = df['t'].iloc[1] - df['t'].iloc[0]
fs = 1 / dt
samples_per_cycle = int(fs / SYSTEM_FREQ)

print("Sampling frequency:", fs)
print("Samples per cycle:", samples_per_cycle)



signals = ['Va', 'Vb', 'Vc', 'Ia', 'Ib', 'Ic']

windows = []
labels = []

for start in range(0, len(df) - samples_per_cycle, samples_per_cycle):
    window = df.iloc[start:start + samples_per_cycle]

    if window['Fault'].nunique() == 1:
        windows.append(window[signals].values.flatten())
        labels.append(window['Fault'].iloc[0])

X = np.array(windows)
y = np.array(labels)


cols = []
for sig in signals:
    for i in range(samples_per_cycle):
        cols.append(f"{sig}_{i}")

proc_df = pd.DataFrame(X, columns=cols)
proc_df['Fault'] = y


Qmax = 2**(BITS - 1) - 1  # 32767

for sig in signals:
    sig_cols = [c for c in proc_df.columns if c.startswith(sig)]
    max_val = np.max(np.abs(proc_df[sig_cols].values))
    proc_df[sig_cols] = np.round(
        proc_df[sig_cols] / max_val * Qmax
    ).astype(int)

proc_df.to_csv(OUT_FILE, index=False)
print("Saved:", OUT_FILE)


Sampling frequency: 10000.0
Samples per cycle: 200
Saved: verilog_ready_dataset.csv
