In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#**Data Loading**

In [None]:
base_path = "/content/drive/MyDrive/University of Ottawa Rolling-element Dataset – Vibration and Acoustic Faults under Constant Load and Speed conditions (UORED-VAFCLS)/1_CSV_Raw_Data_Files (.csv)"

label_map = {
    "1_Healthy": "Healthy",
    "2_Inner_Race_Faults": "InnerRace",
    "3_Outer_Race_Faults": "OuterRace",
    "4_Ball_Faults": "BallFault",
    "5_Cage_Faults": "CageFault"
}

# Dictionary to store data separately by mode
folders = {}

for folder, label in label_map.items():
    folder_path = os.path.join(base_path, folder)
    file_data = {}

    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            file_path = os.path.join(folder_path, file)

            df = pd.read_csv(file_path)
            file_data[file] = df

    folders[label] = file_data




In [None]:
print("Available classes:", list(folders.keys()))
print("Files in 'Healthy':", list(folders["Healthy"].keys()))
print("One sample file head:")
print(folders["Healthy"][list(folders["Healthy"].keys())[1]].head(10))

Available classes: ['Healthy', 'InnerRace', 'OuterRace', 'BallFault', 'CageFault']
Files in 'Healthy': ['H_10_0.csv', 'H_14_0.csv', 'H_12_0.csv', 'H_16_0.csv', 'H_13_0.csv', 'H_17_0.csv', 'H_3_0.csv', 'H_19_0.csv', 'H_1_0.csv', 'H_20_0.csv', 'H_6_0.csv', 'H_4_0.csv', 'H_2_0.csv', 'H_9_0.csv', 'H_5_0.csv', 'H_8_0.csv', 'H_7_0.csv', 'H_15_0.csv', 'H_11_0.csv', 'H_18_0.csv']
One sample file head:
   Accelerometer  Acoustic  Speed  Load  Temperature Difference
0       4.938235  0.003684   1736   400                5.864023
1       2.488559  0.005328      0     0                5.538230
2       4.583677  0.004999      0     0                4.884955
3      -0.347907  0.004671      0     0                6.027968
4      -0.380140  0.003356      0     0                5.538230
5      -0.154512  0.003684      0     0                6.682000
6      -1.250419  0.004342      0     0                6.853341
7       0.490140  0.003356      0     0                5.864023
8       0.200047  0.004013 

In [None]:
for label in folders:
  num_files = len(folders[label])
  print(f"{label}: {num_files}")

Healthy: 20
InnerRace: 10
OuterRace: 10
BallFault: 10
CageFault: 10


#**Accelerometer signals to Scalograms**

In [None]:
import pywt

fs = 42000  # Hz (dataset uses 42 kHz sampling rate)
segment_size = 2048

def morlet_scales_for_band(fs, fmin=100, fmax=15000, n_scales=128, fc=0.8125):
    freqs = np.logspace(np.log10(fmin), np.log10(fmax), n_scales)
    scales = (fc * fs) / freqs
    return scales, freqs

scales, target_freqs = morlet_scales_for_band(fs=fs, fmin=100, fmax=15000, n_scales=128)
wavelet = "morl"

input_dir = "/content/drive/MyDrive/University of Ottawa Rolling-element Dataset – Vibration and Acoustic Faults under Constant Load and Speed conditions (UORED-VAFCLS)/1_CSV_Raw_Data_Files (.csv)/1_Healthy"
output_dir = "/content/drive/MyDrive/UORED_scalograms_2048ss/1_Healthy"
os.makedirs(output_dir, exist_ok=True)

# Loop through all CSV files in the folder
for filename in os.listdir(input_dir):
    if filename.endswith(".csv"):
        df = pd.read_csv(os.path.join(input_dir, filename))
        signal = df["Accelerometer"].values

        # Slide over the signal in windows
        for i in range(0, len(signal) - segment_size, segment_size):
            segment = signal[i:i+segment_size]

            # Compute CWT scalogram
            coeffs, freqs = pywt.cwt(segment, scales, wavelet, sampling_period=1/fs)

            # Plot and save (resize ~224x224 px for ViT)
            fig = plt.figure(figsize=(2.24, 2.24), dpi=100)
            plt.imshow(np.abs(coeffs), extent=[0, segment_size/fs, target_freqs.min(), target_freqs.max()],
                       cmap="jet", aspect="auto", origin="lower")
            plt.axis("off")

            # Save with segment index
            out_name = f"{os.path.splitext(filename)[0]}_{i//segment_size}.png"
            plt.savefig(os.path.join(output_dir, out_name),
                        bbox_inches="tight", pad_inches=0)
            plt.close(fig)

print("Successfully Converted Scalograms")

#**Acoustic signals to spectrograms**

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal

fs = 42000                 # Hz
segment_size = 512         # samples per image (~12.19 ms)
nperseg = 505              # STFT window length
noverlap = 460             # STFT overlap (~91% inside STFT)
cmap = "viridis"           # colormap
fig_px = 224               # output image size in pixels (square)

input_dir  = "/content/drive/MyDrive/University of Ottawa Rolling-element Dataset – Vibration and Acoustic Faults under Constant Load and Speed conditions (UORED-VAFCLS)/1_CSV_Raw_Data_Files (.csv)/4_Ball_Faults"
output_dir = "/content/drive/MyDrive/UORED_acoustic_spectrograms_512/4_Ball_Faults"
os.makedirs(output_dir, exist_ok=True)

def save_stft_image(segment, out_path):
    # STFT
    f, t, Zxx = signal.stft(segment, fs=fs, nperseg=nperseg, noverlap=noverlap)
    S = np.abs(Zxx)
    S_db = 20 * np.log10(np.maximum(S, 1e-12))

    # Plot ~224x224
    fig = plt.figure(figsize=(fig_px/100, fig_px/100), dpi=100)
    plt.imshow(S_db, aspect="auto", origin="lower",
               extent=[t.min(), t.max(), f.min(), f.max()],
               cmap=cmap)
    plt.axis("off")
    plt.savefig(out_path, bbox_inches="tight", pad_inches=0)
    plt.close(fig)

print(f"📂 Processing folder: {input_dir}")
for filename in os.listdir(input_dir):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_dir, filename)
    df = pd.read_csv(file_path)

    if "Acoustic" not in df.columns:
        print(f"⚠️ Skipping (no 'Acoustic' column): {filename}")
        continue

    acoustic = df["Acoustic"].to_numpy()

    for i in range(0, len(acoustic) - segment_size, segment_size):
        seg = acoustic[i:i+segment_size]
        out_name = f"{os.path.splitext(filename)[0]}_{i//segment_size}.png"
        out_path = os.path.join(output_dir, out_name)
        save_stft_image(seg, out_path)

print("Successfully Converted Spectrograms ")