## This is starter code for single point prediction with CNNs

In [None]:
import os
import glob
import joblib

# common math imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# common torch imports
import torch
import torch.nn.functional as F

# common sklearn imports 
from sklearn.preprocessing import LabelEncoder, StandardScaler

## Load and process data

In [None]:
main_path = 'data'
train_folders = [f'{main_path}/train',
                 # f'{main_path}/rain-sounds', f'{main_path}/colored-noise'
]
test_folder = f'{main_path}/val'

# ---------- TRAINING DATA ----------
X_list = []
Y_list = []

for folder in train_folders:
    # find files like X_1000.npy, X_2000.npy, etc.
    X_files = sorted(glob.glob(os.path.join(folder, "X_*.npy")))
    Y_files = sorted(glob.glob(os.path.join(folder, "Y_*.npy")))

    for xf, yf in zip(X_files, Y_files):
        X_list.append(np.load(xf))
        Y_list.append(np.load(yf))

# Stack into arrays
X_train = np.vstack(X_list)
Y_train = np.concatenate(Y_list)

# Cleanup
del X_list, Y_list


# ---------- VALIDATION / TEST DATA ----------
X_test_list = []
Y_test_list = []

X_files = sorted(glob.glob(os.path.join(test_folder, "X_*.npy")))
Y_files = sorted(glob.glob(os.path.join(test_folder, "Y_*.npy")))

for xf, yf in zip(X_files, Y_files):
    X_test_list.append(np.load(xf))
    Y_test_list.append(np.load(yf))

X_test = np.vstack(X_test_list)
Y_test = np.concatenate(Y_test_list)

del X_test_list, Y_test_list

# --- SCALE DATA ---
BA, FR, TI = X_train.shape
X_train_2d = X_train.reshape(BA, FR * TI)
del X_train
scaler = StandardScaler()
X_train_scaled_2d = scaler.fit_transform(X_train_2d)
del X_train_2d
X_train_scaled = X_train_scaled_2d.reshape(BA, FR, TI)
del X_train_scaled_2d
X_train = X_train_scaled[:, np.newaxis, :, :]
del X_train_scaled

X_test_2d = X_test.reshape(X_test.shape[0], -1)
X_test_scaled_2d = scaler.transform(X_test_2d)
del X_test_2d
X_test_scaled = X_test_scaled_2d.reshape(X_test.shape[0], FR, TI)
del X_test_scaled_2d
X_test = X_test_scaled[:, np.newaxis, :, :]
del X_test_scaled

# --- TO NUMPY WITH 16-bit PRECISION ---
X_train = X_train.astype(np.float16)
X_test = X_test.astype(np.float16)
Y_train = Y_train.astype(np.int16)
Y_test = Y_test.astype(np.int16)

# --- SAVE NUMPY ARRAYS ---
np.save(os.path.join(main_path, "X_train.npy"), X_train)
np.save(os.path.join(main_path, "Y_train.npy"), Y_train)
np.save(os.path.join(main_path, "X_test.npy"),  X_test)
np.save(os.path.join(main_path, "Y_test.npy"),  Y_test)

# --- SAVE SCALER ---
scaler_path = os.path.join(main_path, "standard_scaler.joblib")
joblib.dump(scaler, scaler_path)
print(f"Saved scaler to {scaler_path}")


# --- PRINT SHAPES ---
print(f"X_train shape: {X_train.shape}")
print(f"Y_train shape: {Y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"Y_test shape: {Y_test.shape}")


Training data size: 52314
Testing data size: 13013


In [None]:
# verify results
idx = 0
plt.imshow(X_train[idx,0,:,:])