# Training

## Libraries

In [1]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
from glob import glob
import os

## Dataset loading

In [2]:
'''
Dataset structure:
audio/
├── train
│  ├── class
│  │   ├── 1.wav
│  │   ├── 2.wav
│  │   ├── ...
│  └── ...
'''
def load_dataset(type="train"):
    # Path to audio folder
    audio_folder = "data/{}".format(type)
    # Extract all classes available by reading the subfolder names
    classes = sorted(os.listdir(audio_folder))
    # Extract all audio files available for each class as a separate list
    audio_files = {}
    for c in classes:
        # Get all files in the class folder
        audio_files[c] = sorted(glob(os.path.join(audio_folder, c, "*.wav")))

    print("Classes: ", classes)
    print(audio_files)
    return audio_files, classes

# load_dataset("train")


## Feature extractions

In [3]:
def zcr(frame, frame_length):
    """Compute Zero Crossing Rate (ZCR)"""
    # Count the number of times the signal changes sign
    zero_crossings = np.sum(np.abs(np.diff(np.sign(frame)))) / 2
    return zero_crossings / frame_length
    # return librosa.feature.zero_crossing_rate(frame, frame_length=frame_length)[0, 0]

def rms(frame):
    """Compute Root Mean Square (RMS)"""
    # RMS measures the average power of the signal
    # return np.sqrt(np.sum(frame**2) / len(frame))
    return np.sqrt(np.mean(frame**2))

def temporal_entropy(frame):
    """Compute Temporal Entropy"""
    # Temporal entropy measures the distribution of energy in the time domain
    hist = np.histogram(frame, bins=8, range=(np.min(frame), np.max(frame)))[0]
    prob = hist / np.sum(hist)
    prob = prob[prob > 0]  # Avoid log(0)
    return -np.sum(prob * np.log2(prob))

def compute_fft(y, frame_length, hop_length):
    """Compute the Short-Time Fourier Transform (STFT) using NumPy."""
    # Frame-based processing
    # frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length)
    # window = np.hanning(frame_length)  # Apply a Hanning window
    # fft_result = np.fft.rfft(frames * window[:, None], axis=0)  # Compute FFT
    # return np.abs(fft_result)  # Return the magnitude spectrum
    return librosa.stft(y, n_fft=frame_length, hop_length=hop_length, window='hann', center=False)

def spectral_centroid(S, sr):
    """Compute Spectral Centroid"""
    # Spectral centroid is the weighted mean of the frequencies
    # freqs = np.fft.rfftfreq(S.shape[0] * 2 - 1, d=1/sr)
    # magnitude = np.sum(S, axis=1)
    # centroid = np.sum(freqs * magnitude) / np.sum(magnitude)
    # return centroid
    return librosa.feature.spectral_centroid(S=S, sr=sr)

def spectral_rolloff(S, sr, roll_percent=0.85):
    """Compute Spectral Rolloff"""
    # Spectral rolloff is the frequency below which a certain percentage of the total spectral energy is contained
    # freqs = np.fft.rfftfreq(S.shape[0] * 2 - 1, d=1/sr)
    # total_energy = np.sum(S)
    # cumulative_energy = np.cumsum(S)
    # rolloff_idx = np.where(cumulative_energy >= roll_percent * total_energy)[0]
    # if len(rolloff_idx) == 0:  # Handle case where no index satisfies the condition
    #     return freqs[-1]  # Return the highest frequency
    # return freqs[rolloff_idx[0]]
    # Using librosa's built-in function for simplicity
    return librosa.feature.spectral_rolloff(S=S, sr=sr, roll_percent=roll_percent)

def spectral_flatness(S):
    """Compute Spectral Flatness"""
    # Spectral flatness is the ratio of the geometric mean to the arithmetic mean of the spectrum
    geometric_mean = np.exp(np.mean(np.log(S + 1e-10)))  # Add small value to avoid log(0)
    arithmetic_mean = np.mean(S)
    return geometric_mean / arithmetic_mean
    # return librosa.feature.spectral_flatness(S=S)

def band_ratio(S, sr, frame_length):
    """Compute Band Energy Ratio (low vs mid frequencies)"""
    # Band energy ratio compares the energy in different frequency bands
    # freqs = np.fft.rfftfreq(frame_length, d=1/sr)
    freqs = librosa.fft_frequencies(sr=sr, n_fft=frame_length)
    low_band = (freqs >= 100) & (freqs < 1000)
    mid_band = (freqs >= 1000) & (freqs < 4000)
    low_energy = np.sum(S[low_band, :], axis=0)
    mid_energy = np.sum(S[mid_band, :], axis=0)
    return mid_energy / (low_energy + 1e-10)
    # Add small value to avoid division by zero

In [None]:
def extract_audio_features(file_path, sr, frame_length=2048, hop_length=512):
    """Extract material sound features compatible with HDC requirements"""
    
    # Load audio with optimal parameters for material sounds
    y, sr = librosa.load(file_path, sr=sr, duration=5.0)  # 16kHz sampling
    
    # Frame-based processing
    frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length)
    num_frames = frames.shape[1]
    # print("Number of frames: ", len(y))
    
    # Initialize feature arrays with zeros
    feature_names = ['zcr', 'rms', 'temporal_entropy', 'spectral_centroid', 'spectral_rolloff', 'spectral_flatness', 'band_ratio']
    features = {name: np.zeros(num_frames) for name in feature_names}

    # Time-domain features
    for i in range(num_frames):
        frame = frames[:, i]
        features['zcr'][i] = zcr(frame, frame_length)
        # print("Frame", i, features['zcr'][i])
        features['rms'][i] = rms(frame)
        features['temporal_entropy'][i] = temporal_entropy(frame)

    # Frequency-domain features
    S = np.abs(librosa.stft(y, n_fft=frame_length, hop_length=hop_length))
    features['spectral_centroid'] = spectral_centroid(S, sr)
    features['spectral_rolloff'] = spectral_rolloff(S, sr)
    features['spectral_flatness'] = spectral_flatness(S)

    # Band energy ratio (low vs mid frequencies)
    features['band_ratio'] = band_ratio(S, sr, frame_length)

    # Aggregate statistics for HDC encoding
    feature_vector = [
        np.mean(features['zcr']), np.std(features['zcr']),
        np.mean(features['rms']), np.max(features['rms']),
        np.mean(features['temporal_entropy']),
        np.mean(features['spectral_centroid']),
        np.mean(features['spectral_rolloff']),
        np.mean(features['spectral_flatness']),
        np.mean(features['band_ratio'])
    ]
    
    return feature_vector

# extract_audio_features("data/test/metal/artemis_recording_31.wav", sr=8000)

[0.2763671875,
 0.03773341123035959,
 0.0019981977669522167,
 0.004150137305259705,
 0.9842715292482215,
 1786.3950209271018,
 2956.43789556962,
 0.5788753,
 2.9733303]

## HDC operations

In [5]:
# --- Feature Setup ---
feature_names = [
    'zcr_mean', 'zcr_std', 'rms_mean', 'rms_max',
    'entropy_mean', 'spectral_centroid_mean', 'spectral_rolloff_mean',
    'spectral_flatness_mean', 'band_ratio_mean'
]

# Selectively chosen important feature pairs
important_pairs = [
    ('zcr_mean', 'entropy_mean'),
    ('rms_mean', 'spectral_rolloff_mean'),
    ('spectral_flatness_mean', 'spectral_centroid_mean'),
    ('rms_max', 'band_ratio_mean')
]

# --- 1. Feature name codebook (via permutation) ---
def generate_feature_codebook(feature_names, D):
    base = np.random.randint(0, 2, D, dtype=np.uint8)
    return {name: np.roll(base, i + 1) for i, name in enumerate(feature_names)}

# --- 2. Pre-generate value level hypervectors ---
def generate_value_level_hvs(levels, D):
    level_hvs = []
    for level in range(levels):
        hv = np.zeros(D, dtype=np.uint8)
        if level > 0:
            n_bits = level * D // levels
            indices = np.random.choice(D, n_bits, replace=False)
            hv[indices] = 1
        level_hvs.append(hv)
    return level_hvs

# --- 3. Map value to nearest level HV ---
def get_value_hv(levels, value, level_hvs):
    level = min(levels - 1, max(0, int(value * levels)))
    return level_hvs[level]

# --- 4. Encode single audio feature vector ---
def encode_feature_vector(features, codebook, level_hvs, D, levels):
    assert len(features) == len(codebook)
    
    feature_dict = dict(zip(codebook.keys(), features))
    hvs = []

    # Step 1: Encode individual features (key ⊙ value)
    for name, value in feature_dict.items():
        feat_hv = np.bitwise_xor(codebook[name], get_value_hv(levels, value, level_hvs))
        hvs.append(feat_hv.astype(np.int16))

    # Step 2: Encode selected feature-pair interactions (bound pair HVs)
    for f1, f2 in important_pairs:
        hv1 = np.bitwise_xor(codebook[f1], get_value_hv(levels, feature_dict[f1], level_hvs))
        hv2 = np.bitwise_xor(codebook[f2], get_value_hv(levels, feature_dict[f2], level_hvs))
        pair_hv = np.bitwise_xor(hv1, hv2)
        hvs.append(pair_hv.astype(np.int16))

    # Optional: visualize before bundling
    # plt.figure(figsize=(10, 8))
    # plt.imshow(hvs, aspect='auto', cmap='hot', vmin=0, vmax=1)
    # plt.colorbar()
    
    # Step 3: Final bundling (majority vote)
    hvs = np.array(hvs)
    sum_hv = np.sum(hvs, axis=0)
    threshold = len(hvs) // 2
    final_hv = (sum_hv > threshold).astype(np.uint8)
    # print(sum_hv.shape, hvs.shape, len(hvs),final_hv.shape)

    return final_hv

#######


## Model training

In [6]:
def train_hd_classifier(dataset, labels, codebook, level_hvs, D, levels, epochs):
    """
    dataset: list or np.array of normalized feature vectors (N x 9)
    labels: list or np.array of corresponding class labels (N)
    codebook: feature-name -> HVs (symbolic keys of 9 features)
    level_hvs: list of pre-generated value level HVs
    """

    dataset = np.copy(dataset)
    labels = np.copy(labels)

    num_classes = len(np.unique(labels))
    real_class_hvs = np.zeros((num_classes, D), dtype=np.int16)

    N = len(dataset)

    for epoch in range(epochs):
        for i in range(N):
            query_hv = dataset[i]
            y_true = labels[i]

            # Binarize class HVs
            bin_class_hvs = (real_class_hvs >= 0).astype(np.uint8)

            # Predict using Hamming distance
            predictions = np.sum(query_hv != bin_class_hvs, axis=1)
            y_pred = np.argmin(predictions)

            # OnlineHD-style update
            if y_pred != y_true:
                real_class_hvs[y_true] += query_hv
                real_class_hvs[y_pred] -= query_hv

        # Shuffle
        indices = np.random.permutation(N)
        dataset = dataset[indices]
        labels = labels[indices]

    final_class_hvs = np.zeros((num_classes, D), dtype=np.int16)
    final_class_hvs = (real_class_hvs >= 0).astype(np.uint8)
    return final_class_hvs

def predict_hd(query_hv, class_hvs):
    """
    Predict label for a query hypervector using Hamming distance.
    """
    distances = [np.sum(query_hv != class_hv) for class_hv in class_hvs]
    return np.argmin(distances)

def evaluate_hd(vectors, labels, class_hvs):
    correct = 0
    for query_hv, y_true in zip(vectors, labels, strict=True):

        # query_hv = encode_feature_vector(x, codebook, level_hvs, D, levels)

        y_pred = predict_hd(query_hv, class_hvs)
        if y_pred == y_true:
            correct += 1
        # else:
            # print(f"Predicted: {y_pred}, True: {y_true}")
    return correct / len(labels)



In [34]:
# --- Main execution ---

# Load dataset
dataset = [] # audio features
labels = [] # class ids

audio_files, classes = load_dataset("train")

# Prepare a dictionary of labels and their IDs
label_to_id = {class_name: idx for idx, class_name in enumerate(classes)}
id_to_label = {idx: class_name for class_name, idx in label_to_id.items()}

# Extract features
for _, class_name in enumerate(classes):
    for file_path in audio_files[class_name]:
        features = extract_audio_features(file_path, sr=8000)
        dataset.append(features)
        labels.append(label_to_id[class_name])


# Normalize feature-wise (0–1 scaling)
# dataset = np.array(dataset)
# dataset = (dataset - dataset.min(axis=0)) / (dataset.max(axis=0) - dataset.min(axis=0))

# generate codebooks
D = 250  # Hypervector dimensionality
LEVELS = 128 # quantization levels
np.random.seed(42)
codebook = generate_feature_codebook(feature_names, D)
value_level_hvs = generate_value_level_hvs(LEVELS, D)

vectors = [] # encode dataset vectors

for features in dataset:
    encoded_hv = encode_feature_vector(features, codebook, value_level_hvs, D, LEVELS)
    vectors.append(encoded_hv)

vectors = np.array(vectors)
labels = np.array(labels)


# create weights folder if it doesn't exist
if not os.path.exists("weights"):
    os.makedirs("weights")

# for i in range(20):
    # train the classifier
class_hvs = train_hd_classifier(vectors, labels, codebook, value_level_hvs, D, LEVELS, epochs=20)

# # convert class_hvs to boolean array
# class_hvs = (class_hvs >= 0).astype(np.dtypes.BoolDType())
# # Save the trained model
np.save("weights/class_hvs.npy", class_hvs) # save class hypervectors
np.save("weights/codebook.npy", codebook) # save codebook
np.save("weights/value_level_hvs.npy", value_level_hvs) # save value level hypervectors
np.save("weights/label_to_id.npy", label_to_id) # save label_to_id mapping

acc = evaluate_hd(vectors, labels, class_hvs)
print(f"Training accuracy: {acc * 100:.2f}%")



Classes:  ['cloth', 'grass', 'metal', 'sandpaper', 'wood']
{'cloth': ['data/train/cloth/artemis_recording_01.wav', 'data/train/cloth/artemis_recording_02.wav', 'data/train/cloth/artemis_recording_03.wav', 'data/train/cloth/artemis_recording_04.wav', 'data/train/cloth/artemis_recording_05.wav', 'data/train/cloth/artemis_recording_06.wav', 'data/train/cloth/artemis_recording_07.wav', 'data/train/cloth/artemis_recording_08.wav', 'data/train/cloth/artemis_recording_09.wav', 'data/train/cloth/artemis_recording_10.wav', 'data/train/cloth/artemis_recording_11.wav', 'data/train/cloth/artemis_recording_12.wav', 'data/train/cloth/artemis_recording_13.wav', 'data/train/cloth/artemis_recording_14.wav', 'data/train/cloth/artemis_recording_15.wav', 'data/train/cloth/artemis_recording_16.wav', 'data/train/cloth/artemis_recording_17.wav', 'data/train/cloth/artemis_recording_18.wav', 'data/train/cloth/artemis_recording_19.wav', 'data/train/cloth/artemis_recording_20.wav', 'data/train/cloth/artemis_reco

In [35]:
# --- Main execution ---

# Load dataset
dataset = [] # audio features
labels = [] # class ids

audio_files, classes = load_dataset("test")

# Prepare a dictionary of labels and their IDs
label_to_id = np.load("weights/label_to_id.npy", allow_pickle=True).item()

# Extract features
for label_idx, class_name in enumerate(classes):
    for file_path in audio_files[class_name]:
        features = extract_audio_features(file_path, sr=8000)
        dataset.append(features)
        labels.append(label_to_id[class_name])


# Normalize feature-wise (0–1 scaling)
# dataset = np.array(dataset)
# dataset = (dataset - dataset.min(axis=0)) / (dataset.max(axis=0) - dataset.min(axis=0))

# generate codebooks
# D = 10000  # Hypervector dimensionality
# LEVELS = 256 # quantization levels
np.random.seed(42)
codebook = np.load("weights/codebook.npy", allow_pickle=True).item()
value_level_hvs = np.load("weights/value_level_hvs.npy", allow_pickle=True)

vectors = [] # encode dataset vectors

for features in dataset:
    encoded_hv = encode_feature_vector(features, codebook, value_level_hvs, D, LEVELS)
    vectors.append(encoded_hv)

vectors = np.array(vectors)
labels = np.array(labels)

# train the trained class hypervectors
class_hvs = np.load("weights/class_hvs.npy", allow_pickle=True)

# Evaluate training performance
acc = evaluate_hd(vectors, labels, class_hvs)
print(f"Testing accuracy: {acc * 100:.2f}%")


Classes:  ['cloth', 'grass', 'metal', 'sandpaper', 'wood']
{'cloth': ['data/test/cloth/artemis_recording_31.wav', 'data/test/cloth/artemis_recording_32.wav', 'data/test/cloth/artemis_recording_33.wav', 'data/test/cloth/artemis_recording_34.wav', 'data/test/cloth/artemis_recording_35.wav'], 'grass': ['data/test/grass/artemis_recording_31.wav', 'data/test/grass/artemis_recording_32.wav', 'data/test/grass/artemis_recording_33.wav', 'data/test/grass/artemis_recording_34.wav', 'data/test/grass/artemis_recording_35.wav'], 'metal': ['data/test/metal/artemis_recording_31.wav', 'data/test/metal/artemis_recording_32.wav', 'data/test/metal/artemis_recording_33.wav', 'data/test/metal/artemis_recording_34.wav', 'data/test/metal/artemis_recording_35.wav'], 'sandpaper': ['data/test/sandpaper/artemis_recording_31.wav', 'data/test/sandpaper/artemis_recording_32.wav', 'data/test/sandpaper/artemis_recording_33.wav', 'data/test/sandpaper/artemis_recording_34.wav', 'data/test/sandpaper/artemis_recording_35

## Export weights to C 

In [36]:
# save class_hvs, codebook, value_level_hvs, label_to_id 
# to a C header file
def save_to_c_header(class_hvs, codebook, value_level_hvs, label_to_id):
    with open("weights.h", "w") as f:
        f.write("#ifndef HD_WEIGHTS_H\n")
        f.write("#define HD_WEIGHTS_H\n\n")
        f.write("#include <stdint.h>\n\n")
        
        # Define constants
        f.write("#define D {}\n".format(len(class_hvs[0])))
        f.write("#define NUM_CLASSES {}\n".format(len(class_hvs)))
        f.write("#define NUM_FEATURES {}\n".format(len(codebook)))
        f.write("#define LEVELS {}\n\n".format(len(value_level_hvs)))

        # Save class hypervectors
        f.write("const uint8_t class_hvs[NUM_CLASSES][D] = {\n")
        for hv in class_hvs:
            f.write("  {")
            f.write(", ".join(map(str, hv)))
            f.write("},\n")
        f.write("};\n\n")

        # Save codebook
        f.write("const uint8_t codebook[NUM_FEATURES][D] = {\n")
        features = list(codebook.keys())
        for feature_name in features:
            hv = codebook[feature_name]
            f.write("  {")
            f.write(", ".join(map(str, hv)))
            f.write("}}, // {}\n".format(feature_name))
        f.write("};\n\n")

        # Save value level hypervectors
        f.write("const uint8_t value_level_hvs[LEVELS][D] = {\n")
        for hv in value_level_hvs:
            f.write("  {")
            f.write(", ".join(map(str, hv)))
            f.write("},\n")
        f.write("};\n\n")

        # Save label to ID mapping
        f.write("const char* label_names[NUM_CLASSES] = {\n")
        for label_name, idx in sorted(label_to_id.items(), key=lambda x: x[1]):
            f.write('  "{}",\n'.format(label_name))
        f.write("};\n\n")

        f.write("#endif // HD_WEIGHTS_H\n")

# Save to C header file
save_to_c_header(class_hvs, codebook, value_level_hvs, label_to_id)

In [54]:
import librosa
import numpy as np
import soundfile as sf
import sys

def convert_to_pcm16(input_file, output_file, target_sr=8000):
    # Load audio using librosa, forcing mono and resampling as needed.
    # The samples will be in float32 format in range [-1, 1]
    y, sr = librosa.load(input_file, sr=target_sr, duration=5.0)
    # print(len(y), y[:10])
    # save the output y into a c header file 
    with open("audio_data.h", "w") as f:
        f.write("#ifndef AUDIO_DATA_H\n")
        f.write("#define AUDIO_DATA_H\n\n")
        f.write("#include <stdint.h>\n\n")
        f.write(f"#define AUDIO_LENGTH {len(y)}\n\n")
        f.write("const float audio_data_vector[AUDIO_LENGTH] = {\n")
        f.write(",\n".join(f"    {sample:.6f}" for sample in y))
        f.write("\n};\n\n")
        f.write("#endif // AUDIO_DATA_H\n")

if __name__ == '__main__':
    # if len(sys.argv) != 3:
    #     print("Usage: python convert_to_pcm16.py <input_audio_file> <output_wav_file>")
    # else:
    # input_file = sys.argv[1]
    # output_file = sys.argv[2]
    convert_to_pcm16("data/test/sandpaper/artemis_recording_33.wav", "converted.wav")


In [49]:
extract_audio_features("data/test/metal/artemis_recording_33.wav", sr=8000)

[0.2797721354166667,
 0.045330173831118946,
 0.0020035209568838278,
 0.00376133993268013,
 0.9683661591924575,
 1782.7016346292512,
 2965.7337816455697,
 0.58610415,
 2.9254842]

# Other Models

In [1]:
import os
from glob import glob
import numpy as np
import torchaudio
import torch
import torchaudio.transforms as T
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
import tempfile
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Step 1: Load dataset
def load_dataset(type="train"):
    audio_folder = f"data/{type}"
    classes = sorted(os.listdir(audio_folder))
    audio_files = {c: sorted(glob(os.path.join(audio_folder, c, "*.wav"))) for c in classes}
    return audio_files, classes

# Step 2: Extract features
def extract_features(file_path, transform, max_len=250):
    waveform, sr = torchaudio.load(file_path)
    if sr != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
        waveform = resampler(waveform)
    mfcc = transform(waveform).squeeze(0).transpose(0, 1)
    if mfcc.shape[0] < max_len:
        pad = torch.zeros(max_len - mfcc.shape[0], mfcc.shape[1])
        mfcc = torch.cat((mfcc, pad), dim=0)
    else:
        mfcc = mfcc[:max_len, :]
    return mfcc.flatten().numpy()

# Step 3: ML and DNN benchmarking
def model_size_mb(model):
    with tempfile.NamedTemporaryFile(delete=True) as f:
        pickle.dump(model, f)
        return os.path.getsize(f.name) / (1024 * 1024)

# Step 4: Torch NN Models
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, num_classes)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

class TinyCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 13 * 125, num_classes)
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [B, 8, 13, 125]
        x = x.view(x.size(0), -1)
        return self.fc1(x)

# Run everything
audio_files, classes = load_dataset("train")
label_to_id = {name: i for i, name in enumerate(classes)}
id_to_label = {i: name for name, i in label_to_id.items()}

transform = T.MFCC(sample_rate=16000, n_mfcc=13, melkwargs={"n_fft": 400, "hop_length": 160, "n_mels": 23})

X, y = [], []
for label, files in audio_files.items():
    for f in files:
        X.append(extract_features(f, transform))
        y.append(label_to_id[label])
X, y = np.array(X), np.array(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ML models
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y)
results = {}

for name, model in {
    "SVM": SVC(),
    "RandomForest": RandomForestClassifier(n_estimators=100)
}.items():
    model.fit(X_train, y_train)
    acc = model.score(X_test, y_test)
    size_mb = model_size_mb(model)
    results[name] = {"accuracy": acc, "model_size_MB": size_mb}


In [3]:

# DNN models
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_test_torch = torch.tensor(y_test, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train_torch, y_train_torch), batch_size=16, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_torch, y_test_torch), batch_size=16)

ffnn = FeedforwardNN(X_train.shape[1], len(classes))
optimizer = torch.optim.Adam(ffnn.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(10):
    for xb, yb in train_loader:
        optimizer.zero_grad()
        loss = loss_fn(ffnn(xb), yb)
        loss.backward()
        optimizer.step()

# Accuracy
ffnn.eval()
with torch.no_grad():
    acc = (ffnn(X_test_torch).argmax(1) == y_test_torch).float().mean().item()
    torch.save(ffnn.state_dict(), "ffnn_model.pt")
    size_mb = os.path.getsize("ffnn_model.pt") / (1024 * 1024)
    results["FeedforwardNN"] = {"accuracy": acc, "model_size_MB": size_mb}

# TinyCNN using reshaped MFCC
# Reshape X for CNN: [batch, 1, 250, 13] → transpose last two dims
X_cnn = X.reshape(-1, 250, 13)
X_cnn = np.transpose(X_cnn, (0, 2, 1))  # [B, 13, 250]
X_cnn = X_cnn[:, :, :250]  # Ensure fixed length
X_cnn = X_cnn[:, :, ::2]   # Downsample time axis for TinyCNN 

# Update the CNN class to match the actual shape of the data
class TinyCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2)
        # Calculate the actual flattened size based on our data dimensions
        self.fc1 = nn.Linear(8 * 6 * 62, num_classes)  # 13/2 (pool) = ~6, 250/2/2 = ~62
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)  # Flatten
        return self.fc1(x)

X_train_cnn, X_test_cnn, y_train_cnn, y_test_cnn = train_test_split(X_cnn, y, test_size=0.2, stratify=y)
X_train_cnn = torch.tensor(X_train_cnn[:, np.newaxis, :, :], dtype=torch.float32)
X_test_cnn = torch.tensor(X_test_cnn[:, np.newaxis, :, :], dtype=torch.float32)
y_train_cnn = torch.tensor(y_train_cnn, dtype=torch.long)
y_test_cnn = torch.tensor(y_test_cnn, dtype=torch.long)

train_loader_cnn = DataLoader(TensorDataset(X_train_cnn, y_train_cnn), batch_size=16, shuffle=True)
cnn = TinyCNN(num_classes=len(classes))
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)

for epoch in range(10):
    for xb, yb in train_loader_cnn:
        optimizer.zero_grad()
        loss = loss_fn(cnn(xb), yb)
        loss.backward()
        optimizer.step()

cnn.eval()
with torch.no_grad():
    acc = (cnn(X_test_cnn).argmax(1) == y_test_cnn).float().mean().item()
    torch.save(cnn.state_dict(), "tinycnn_model.pt")
    size_mb = os.path.getsize("tinycnn_model.pt") / (1024 * 1024)
    results["TinyCNN"] = {"accuracy": acc, "model_size_MB": size_mb}

import pprint
pprint.pprint(results)




In [4]:
# === LOAD AND EVALUATE ON TEST DATA ===

# Load test data
test_audio_files, _ = load_dataset("test")
X_test_real, y_test_real = [], []
for label, files in test_audio_files.items():
    for f in files:
        X_test_real.append(extract_features(f, transform))
        y_test_real.append(label_to_id[label])
X_test_real = np.array(X_test_real)
y_test_real = np.array(y_test_real)

# === Apply same scaler ===
X_test_real_scaled = scaler.transform(X_test_real)

# === Evaluate SVM and RF ===
results_test = {}
for name, model in {
    "SVM": SVC(),
    "RandomForest": RandomForestClassifier(n_estimators=100)
}.items():
    model.fit(X_scaled, y)  # train on full training set
    acc = model.score(X_test_real_scaled, y_test_real)
    results_test[name] = {"test_accuracy": acc}

# === Evaluate Feedforward NN ===
X_test_real_torch = torch.tensor(X_test_real_scaled, dtype=torch.float32)
y_test_real_torch = torch.tensor(y_test_real, dtype=torch.long)

ffnn.eval()
with torch.no_grad():
    acc = (ffnn(X_test_real_torch).argmax(1) == y_test_real_torch).float().mean().item()
    results_test["FeedforwardNN"] = {"test_accuracy": acc}

# === Evaluate TinyCNN ===
X_test_real_cnn = X_test_real.reshape(-1, 250, 13)
X_test_real_cnn = np.transpose(X_test_real_cnn, (0, 2, 1))[:, :, :250]
X_test_real_cnn = X_test_real_cnn[:, :, ::2]  # Downsample

X_test_real_cnn_tensor = torch.tensor(X_test_real_cnn[:, np.newaxis, :, :], dtype=torch.float32)
y_test_real_cnn_tensor = torch.tensor(y_test_real, dtype=torch.long)

cnn.eval()
with torch.no_grad():
    acc = (cnn(X_test_real_cnn_tensor).argmax(1) == y_test_real_cnn_tensor).float().mean().item()
    results_test["TinyCNN"] = {"test_accuracy": acc}

print("\n=== FINAL TEST ACCURACIES ===")
pprint.pprint(results_test)




In [9]:
# query HV is compared to all the class HVs in the item memory using Hamming distance

# Plots

## Accuracy vs Epoch for different D values

In [19]:
# --- Main execution ---

# Load dataset
dataset = [] # audio features
labels = [] # class ids

audio_files, classes = load_dataset("train")

# Prepare a dictionary of labels and their IDs
label_to_id = {class_name: idx for idx, class_name in enumerate(classes)}
id_to_label = {idx: class_name for class_name, idx in label_to_id.items()}

# Extract features
for _, class_name in enumerate(classes):
    for file_path in audio_files[class_name]:
        features = extract_audio_features(file_path, sr=8000)
        dataset.append(features)
        labels.append(label_to_id[class_name])


# Normalize feature-wise (0–1 scaling)
dataset = np.array(dataset)
dataset = (dataset - dataset.min(axis=0)) / (dataset.max(axis=0) - dataset.min(axis=0))

# generate codebooks
D = 100  # Hypervector dimensionality
LEVELS = 256 # quantization levels

D_values = [2**2, 2**4, 2**6, 2**8] # levels
# D_values = [2**6, 2**8, 2**10, 2**12]
# D_values = [10, 50, 100, 1000] # D
accuracy_values = []

for LEVELS in D_values:
    np.random.seed(42)
    codebook = generate_feature_codebook(feature_names, D)
    value_level_hvs = generate_value_level_hvs(LEVELS, D)

    vectors = [] # encode dataset vectors

    for features in dataset:
        encoded_hv = encode_feature_vector(features, codebook, value_level_hvs, D, LEVELS)
        vectors.append(encoded_hv)

    vectors = np.array(vectors)
    labels = np.array(labels)


    # create weights folder if it doesn't exist
    if not os.path.exists("weights"):
        os.makedirs("weights")

    accuracys = []
    for i in range(20):
        # train the classifier
        class_hvs = train_hd_classifier(vectors, labels, codebook, value_level_hvs, D, LEVELS, epochs=i+1)

        # # Save the trained model
        # np.save("weights/class_hvs.npy", class_hvs) # save class hypervectors
        # np.save("weights/codebook.npy", codebook) # save codebook
        # np.save("weights/value_level_hvs.npy", value_level_hvs) # save value level hypervectors
        # np.save("weights/label_to_id.npy", label_to_id) # save label_to_id mapping

        acc = evaluate_hd(vectors, labels, class_hvs)
        accuracys.append(acc*100)
        # print(f"{i+1}: Training accuracy: {acc * 100:.2f}%")
    accuracy_values.append(accuracys)





In [20]:
# --- Plotting the results ---
plt.figure(figsize=(10, 6))
for i, D in enumerate(D_values):
    plt.plot(range(1, 21), accuracy_values[i], label=f'D = {D}')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
# plt.title('Training Accuracy vs Epochs for Different D Values')
plt.xticks(range(1, 21))
plt.grid()
plt.legend()
plt.tight_layout()



## Memory Calculator

Codebook: D*f bytes
Class_hv: D*n bytes
level_hv: D*l bytes

In [47]:
# calculate the memory usage for a given D, LEVELS, number of classes, number of features

def calculate_memory_usage(D, LEVELS, num_classes, num_features):
    # Memory usage for class hypervectors
    class_hvs_memory = D * num_classes * (1) / (1024 ** 1)  # in KB (1 byte per bool)
    
    # Memory usage for codebook
    codebook_memory = D * num_features * (1) / (1024 ** 1) # in KB (1 byte per bool)
    
    # Memory usage for value level hypervectors
    value_level_hvs_memory = LEVELS * D * (1)  / (1024 ** 1)  # in KB (1 byte per bool)
    
    total_memory = class_hvs_memory + codebook_memory + value_level_hvs_memory
    
    return total_memory

# Example usage
D = 250
LEVELS = 128
num_classes = 5
num_features = 9

memory_usage = calculate_memory_usage(D, LEVELS, num_classes, num_features)
print(f"Total Memory Usage: {memory_usage:.2f} KB")

Total Memory Usage: 34.67 KB
