In [None]:
#@title Load pitch dataset

import numpy as np

data = np.load("pitch_tempo.npz")

X = data["X"]          # (N, 64) log-mel, đã scale
y = data["pitch"]      # (N,) pitch class [0..50]

print("X:", X.shape)
print("y:", y.shape)

# bỏ unvoiced (class = 0) nếu muốn
mask = y > 0
X = X[mask]
y = y[mask]

print("After voiced-only:")
print("X:", X.shape)
print("y:", y.shape)

NUM_CLASSES = int(y.max() + 1)

X: (944904, 64)
y: (944904,)
After voiced-only:
X: (618376, 64)
y: (618376,)


In [None]:
#@title Export mean and scale for TinyML
from sklearn.preprocessing import StandardScaler
import numpy as np

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

mean = scaler.mean_
scale = scaler.scale_

print("Mean:")
print(mean)

print("\nScale (std):")
print(scale)

Mean:
[-31.47932516 -31.619726   -34.58918788 -31.43537355 -29.15595713
 -29.38718413 -31.50365041 -33.58297018 -34.13655313 -34.96699259
 -36.43684129 -37.82619215 -39.41558779 -40.53129804 -42.09631899
 -42.42878572 -43.5009231  -46.13483026 -48.68916782 -49.24920747
 -49.13421082 -51.26572491 -52.63556974 -52.29285503 -53.11413171
 -52.14604688 -52.12687569 -51.38349772 -52.05142022 -50.65060592
 -50.57307541 -52.74523542 -54.91302744 -54.85145829 -56.98773544
 -53.41452484 -53.41389753 -51.60816136 -52.3967419  -52.68249512
 -51.82719158 -52.67357656 -53.2603045  -55.27142623 -54.17020182
 -57.95720256 -56.94443876 -58.20819817 -57.87664787 -57.94370646
 -57.26015645 -57.07221509 -58.74828378 -57.61877876 -57.02966454
 -56.835714   -56.72278572 -56.50173418 -55.72662125 -56.0367207
 -56.04366191 -55.51121848 -54.8771185  -56.70047583]

Scale (std):
[13.35365875 13.69935291 15.74900158 15.6477659  15.31074646 15.5268877
 15.59203407 14.83102993 14.54253457 15.16858525 15.53326505 15

In [None]:
#@title Convert mean and scale to C array
def to_c_array(name, arr, per_line=8):
    print(f"static const float {name}[{len(arr)}] = {{")
    for i in range(0, len(arr), per_line):
        chunk = ", ".join(f"{x:.8f}f" for x in arr[i:i+per_line])
        print("  " + chunk + ("," if i + per_line < len(arr) else ""))
    print("};\n")

to_c_array("PITCH_MEAN", scaler.mean_)
to_c_array("PITCH_SCALE", scaler.scale_)


static const float PITCH_MEAN[64] = {
  -31.47932516f, -31.61972600f, -34.58918788f, -31.43537355f, -29.15595713f, -29.38718413f, -31.50365041f, -33.58297018f,
  -34.13655313f, -34.96699259f, -36.43684129f, -37.82619215f, -39.41558779f, -40.53129804f, -42.09631899f, -42.42878572f,
  -43.50092310f, -46.13483026f, -48.68916782f, -49.24920747f, -49.13421082f, -51.26572491f, -52.63556974f, -52.29285503f,
  -53.11413171f, -52.14604688f, -52.12687569f, -51.38349772f, -52.05142022f, -50.65060592f, -50.57307541f, -52.74523542f,
  -54.91302744f, -54.85145829f, -56.98773544f, -53.41452484f, -53.41389753f, -51.60816136f, -52.39674190f, -52.68249512f,
  -51.82719158f, -52.67357656f, -53.26030450f, -55.27142623f, -54.17020182f, -57.95720256f, -56.94443876f, -58.20819817f,
  -57.87664787f, -57.94370646f, -57.26015645f, -57.07221509f, -58.74828378f, -57.61877876f, -57.02966454f, -56.83571400f,
  -56.72278572f, -56.50173418f, -55.72662125f, -56.03672070f, -56.04366191f, -55.51121848f, -54.87711850f, -

In [None]:
#@title Train / Val split + downsample

from sklearn.model_selection import train_test_split
import numpy as np

X_tr, X_va, y_tr, y_va = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Downsample train set
MAX_TRAIN = 200000
idx = np.random.choice(len(X_tr), MAX_TRAIN, replace=False)

X_tr = X_tr[idx]
y_tr = y_tr[idx]

print("Train:", X_tr.shape)
print("Val:", X_va.shape)

Train: (200000, 64)
Val: (123676, 64)


In [None]:
#@title Train teacher model

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

teacher = MLPClassifier(
    hidden_layer_sizes=(512, 512, 256),
    activation="relu",
    solver="adam",
    batch_size=1024,
    max_iter=20,
    random_state=42,
    verbose=True
)

teacher.fit(X_tr, y_tr)

y_pred = teacher.predict(X_va)
print("Teacher Val ACC:", accuracy_score(y_va, y_pred))

Iteration 1, loss = 2.08548552
Iteration 2, loss = 1.08500554
Iteration 3, loss = 0.99435794
Iteration 4, loss = 0.93958723
Iteration 5, loss = 0.89683094
Iteration 6, loss = 0.86163016
Iteration 7, loss = 0.83205723
Iteration 8, loss = 0.81132963
Iteration 9, loss = 0.79039115
Iteration 10, loss = 0.77546890
Iteration 11, loss = 0.75723983
Iteration 12, loss = 0.73441222
Iteration 13, loss = 0.72844275
Iteration 14, loss = 0.71359600
Iteration 15, loss = 0.69948239
Iteration 16, loss = 0.68741290
Iteration 17, loss = 0.67780142
Iteration 18, loss = 0.66728184
Iteration 19, loss = 0.65770030
Iteration 20, loss = 0.64974706




Teacher Val ACC: 0.7887787444613344


In [None]:
#@title Generate soft labels from teacher

T = 4.0  # temperature

logits = teacher.predict_proba(X_tr)
soft_labels = np.log(logits + 1e-9) / T
soft_labels = np.exp(soft_labels)
soft_labels /= soft_labels.sum(axis=1, keepdims=True)

print("Soft labels:", soft_labels.shape)

Soft labels: (200000, 49)


In [None]:
#@title Train student model
import tensorflow as tf
from tensorflow.keras import layers, models

NUM_CLASSES = soft_labels.shape[1]
input_shape = X_tr.shape[1]

student = models.Sequential([
    layers.Input(shape=(input_shape,)),
    layers.Dense(64, activation='tanh'),
    layers.Dense(32, activation='tanh'),
    layers.Dense(NUM_CLASSES, activation='softmax')
])

student.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

student.fit(
    X_tr, soft_labels,
    validation_data=(X_va, teacher.predict_proba(X_va)),
    batch_size=512,
    epochs=30
)

Epoch 1/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.2295 - loss: 3.5393 - val_accuracy: 0.4872 - val_loss: 2.3988
Epoch 2/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5211 - loss: 3.2149 - val_accuracy: 0.5661 - val_loss: 2.1823
Epoch 3/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5771 - loss: 3.1634 - val_accuracy: 0.5871 - val_loss: 2.1209
Epoch 4/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5959 - loss: 3.1420 - val_accuracy: 0.6053 - val_loss: 2.0911
Epoch 5/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.6123 - loss: 3.1264 - val_accuracy: 0.6185 - val_loss: 2.0717
Epoch 6/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.6225 - loss: 3.1192 - val_accuracy: 0.6227 - val_loss: 2.0437
Epoch 7/30
[1m391/391[0m 

<keras.src.callbacks.history.History at 0x7f6e262d3a10>

In [None]:
#@title Baseline small MLP (no distillation)

baseline = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation="tanh",
    max_iter=30,
    random_state=42
)

baseline.fit(X_tr, y_tr)
pred = baseline.predict(X_va)

print("Baseline ACC:", accuracy_score(y_va, pred))



Baseline ACC: 0.6702513017885443


In [None]:
#@title Finetune teacher model
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import numpy as np

teacher_configs = [
    (512, 512, 256),
    (256, 256, 128),
    (512, 256, 128),
    (256, 128, 64)
]
activations = ["relu", "tanh"]

best_val_acc_teacher = 0
best_teacher = None
best_teacher_cfg = None

for cfg in teacher_configs:
    for act in activations:
        print("\nTraining teacher with hidden layers:", cfg, "activation:", act)
        teacher = MLPClassifier(
            hidden_layer_sizes=cfg,
            activation=act,
            solver="adam",
            batch_size=1024,
            max_iter=30,
            random_state=42,
            verbose=True
        )
        teacher.fit(X_tr, y_tr)
        y_pred = teacher.predict(X_va)
        val_acc = accuracy_score(y_va, y_pred)
        print("Validation ACC:", val_acc)

        if val_acc > best_val_acc_teacher:
            best_val_acc_teacher = val_acc
            best_teacher = teacher
            best_teacher_cfg = (cfg, act)

print("\nBest teacher ACC:", best_val_acc_teacher, "Config:", best_teacher_cfg)


Training teacher with hidden layers: (512, 512, 256) activation: relu
Iteration 1, loss = 2.08548552
Iteration 2, loss = 1.08500554
Iteration 3, loss = 0.99435794
Iteration 4, loss = 0.93958723
Iteration 5, loss = 0.89683094
Iteration 6, loss = 0.86163016
Iteration 7, loss = 0.83205723
Iteration 8, loss = 0.81132963
Iteration 9, loss = 0.79039115
Iteration 10, loss = 0.77546890
Iteration 11, loss = 0.75723983
Iteration 12, loss = 0.73441222
Iteration 13, loss = 0.72844275
Iteration 14, loss = 0.71359600
Iteration 15, loss = 0.69948239
Iteration 16, loss = 0.68741290
Iteration 17, loss = 0.67780142
Iteration 18, loss = 0.66728184
Iteration 19, loss = 0.65770030
Iteration 20, loss = 0.64974706
Iteration 21, loss = 0.64230746
Iteration 22, loss = 0.63083667
Iteration 23, loss = 0.61848188
Iteration 24, loss = 0.61214731
Iteration 25, loss = 0.60254083
Iteration 26, loss = 0.59167045
Iteration 27, loss = 0.58471286
Iteration 28, loss = 0.58602224
Iteration 29, loss = 0.57037575
Iteration 



Validation ACC: 0.8016268313981694

Training teacher with hidden layers: (512, 512, 256) activation: tanh
Iteration 1, loss = 1.37320279
Iteration 2, loss = 1.08179211
Iteration 3, loss = 1.00457538
Iteration 4, loss = 0.95489028
Iteration 5, loss = 0.92557809
Iteration 6, loss = 0.90312783
Iteration 7, loss = 0.87776977
Iteration 8, loss = 0.86027657
Iteration 9, loss = 0.84851354
Iteration 10, loss = 0.83227906
Iteration 11, loss = 0.80940130
Iteration 12, loss = 0.80552523
Iteration 13, loss = 0.80256619
Iteration 14, loss = 0.79150293
Iteration 15, loss = 0.77720119
Iteration 16, loss = 0.76964709
Iteration 17, loss = 0.75972127
Iteration 18, loss = 0.75183690
Iteration 19, loss = 0.74657642
Iteration 20, loss = 0.74175399
Iteration 21, loss = 0.72773606
Iteration 22, loss = 0.72882911
Iteration 23, loss = 0.72165385
Iteration 24, loss = 0.70982985
Iteration 25, loss = 0.70670781
Iteration 26, loss = 0.69854386
Iteration 27, loss = 0.68255695
Iteration 28, loss = 0.68897746
Iterati



Validation ACC: 0.7775397005077784

Training teacher with hidden layers: (256, 256, 128) activation: relu
Iteration 1, loss = 2.33621798
Iteration 2, loss = 1.21456274
Iteration 3, loss = 1.10774108
Iteration 4, loss = 1.04512478
Iteration 5, loss = 1.00165264
Iteration 6, loss = 0.97313011
Iteration 7, loss = 0.94194041
Iteration 8, loss = 0.91454878
Iteration 9, loss = 0.89628177
Iteration 10, loss = 0.87576891
Iteration 11, loss = 0.85821362
Iteration 12, loss = 0.84369273
Iteration 13, loss = 0.83304874
Iteration 14, loss = 0.81730018
Iteration 15, loss = 0.80472065
Iteration 16, loss = 0.79865918
Iteration 17, loss = 0.78735859
Iteration 18, loss = 0.77488347
Iteration 19, loss = 0.76805844
Iteration 20, loss = 0.76274418
Iteration 21, loss = 0.75432699
Iteration 22, loss = 0.74945368
Iteration 23, loss = 0.73587553
Iteration 24, loss = 0.73288495
Iteration 25, loss = 0.72183366
Iteration 26, loss = 0.71779635
Iteration 27, loss = 0.71131720
Iteration 28, loss = 0.70477091
Iterati



Validation ACC: 0.7852453184126266

Training teacher with hidden layers: (256, 256, 128) activation: tanh
Iteration 1, loss = 1.58077913
Iteration 2, loss = 1.17852096
Iteration 3, loss = 1.09742080
Iteration 4, loss = 1.04680324
Iteration 5, loss = 1.00381716
Iteration 6, loss = 0.97325030
Iteration 7, loss = 0.95191224
Iteration 8, loss = 0.93163005
Iteration 9, loss = 0.91309808
Iteration 10, loss = 0.89430497
Iteration 11, loss = 0.88625468
Iteration 12, loss = 0.86618109
Iteration 13, loss = 0.85970082
Iteration 14, loss = 0.84947184
Iteration 15, loss = 0.84076360
Iteration 16, loss = 0.82688193
Iteration 17, loss = 0.82172988
Iteration 18, loss = 0.81648525
Iteration 19, loss = 0.80242793
Iteration 20, loss = 0.80045171
Iteration 21, loss = 0.79477160
Iteration 22, loss = 0.79176809
Iteration 23, loss = 0.78049736
Iteration 24, loss = 0.77614983
Iteration 25, loss = 0.77355421
Iteration 26, loss = 0.76777115
Iteration 27, loss = 0.76919737
Iteration 28, loss = 0.75388738
Iterati



Validation ACC: 0.768039069827614

Training teacher with hidden layers: (512, 256, 128) activation: relu
Iteration 1, loss = 2.27007595
Iteration 2, loss = 1.18516670
Iteration 3, loss = 1.07213848
Iteration 4, loss = 1.01519860
Iteration 5, loss = 0.97341418
Iteration 6, loss = 0.93881310
Iteration 7, loss = 0.91292475
Iteration 8, loss = 0.88778767
Iteration 9, loss = 0.87073592
Iteration 10, loss = 0.85554443
Iteration 11, loss = 0.84015995
Iteration 12, loss = 0.82603215
Iteration 13, loss = 0.81508800
Iteration 14, loss = 0.80128473
Iteration 15, loss = 0.79237047
Iteration 16, loss = 0.78218713
Iteration 17, loss = 0.77496752
Iteration 18, loss = 0.76531684
Iteration 19, loss = 0.75905323
Iteration 20, loss = 0.75102676
Iteration 21, loss = 0.73977100
Iteration 22, loss = 0.73515301
Iteration 23, loss = 0.72497888
Iteration 24, loss = 0.71743335
Iteration 25, loss = 0.71341258
Iteration 26, loss = 0.70543704
Iteration 27, loss = 0.70397754
Iteration 28, loss = 0.69457904
Iteratio



Validation ACC: 0.7881804068695624

Training teacher with hidden layers: (512, 256, 128) activation: tanh
Iteration 1, loss = 1.51173116
Iteration 2, loss = 1.13726816
Iteration 3, loss = 1.06478600
Iteration 4, loss = 1.01600739
Iteration 5, loss = 0.98750845
Iteration 6, loss = 0.96185603
Iteration 7, loss = 0.93599231
Iteration 8, loss = 0.91476059
Iteration 9, loss = 0.90717996
Iteration 10, loss = 0.89206332
Iteration 11, loss = 0.87802240
Iteration 12, loss = 0.86649242
Iteration 13, loss = 0.86286698
Iteration 14, loss = 0.85016628
Iteration 15, loss = 0.84038320
Iteration 16, loss = 0.83556033
Iteration 17, loss = 0.83184871
Iteration 18, loss = 0.82623577
Iteration 19, loss = 0.82418541
Iteration 20, loss = 0.81542632
Iteration 21, loss = 0.81499353
Iteration 22, loss = 0.80752890
Iteration 23, loss = 0.79918580
Iteration 24, loss = 0.79953755
Iteration 25, loss = 0.79255912
Iteration 26, loss = 0.78264291
Iteration 27, loss = 0.77868018
Iteration 28, loss = 0.78235436
Iterati



Validation ACC: 0.7661793719072415

Training teacher with hidden layers: (256, 128, 64) activation: relu
Iteration 1, loss = 2.93392836
Iteration 2, loss = 1.48127064
Iteration 3, loss = 1.28927515
Iteration 4, loss = 1.19691178
Iteration 5, loss = 1.13973864
Iteration 6, loss = 1.09817005
Iteration 7, loss = 1.06639074
Iteration 8, loss = 1.04042693
Iteration 9, loss = 1.02025087
Iteration 10, loss = 0.99938891
Iteration 11, loss = 0.98549908
Iteration 12, loss = 0.96173514
Iteration 13, loss = 0.95142966
Iteration 14, loss = 0.93865591
Iteration 15, loss = 0.92631792
Iteration 16, loss = 0.91622416
Iteration 17, loss = 0.90186557
Iteration 18, loss = 0.89556848
Iteration 19, loss = 0.88810435
Iteration 20, loss = 0.87505786
Iteration 21, loss = 0.86825297
Iteration 22, loss = 0.86335825
Iteration 23, loss = 0.85304974
Iteration 24, loss = 0.84630558
Iteration 25, loss = 0.83879973
Iteration 26, loss = 0.83259728
Iteration 27, loss = 0.82750056
Iteration 28, loss = 0.81983524
Iteratio



Validation ACC: 0.7549807561693457

Training teacher with hidden layers: (256, 128, 64) activation: tanh
Iteration 1, loss = 1.83758717
Iteration 2, loss = 1.26944036
Iteration 3, loss = 1.17164194
Iteration 4, loss = 1.12360003
Iteration 5, loss = 1.08616006
Iteration 6, loss = 1.05353065
Iteration 7, loss = 1.02744562
Iteration 8, loss = 1.00601125
Iteration 9, loss = 0.98833718
Iteration 10, loss = 0.97481088
Iteration 11, loss = 0.96276821
Iteration 12, loss = 0.95373769
Iteration 13, loss = 0.94757520
Iteration 14, loss = 0.93553848
Iteration 15, loss = 0.92675426
Iteration 16, loss = 0.92552785
Iteration 17, loss = 0.91680703
Iteration 18, loss = 0.91332831
Iteration 19, loss = 0.90852079
Iteration 20, loss = 0.89970875
Iteration 21, loss = 0.89491765
Iteration 22, loss = 0.89317546
Iteration 23, loss = 0.88962301
Iteration 24, loss = 0.88368651
Iteration 25, loss = 0.88386771
Iteration 26, loss = 0.87917807
Iteration 27, loss = 0.87239521
Iteration 28, loss = 0.87235902
Iteratio



Validation ACC: 0.7468870273941589

Best teacher ACC: 0.8016268313981694 Config: ((512, 512, 256), 'relu')


In [None]:
#@title Generate soft labels from best teacher
T = 4.0  # temperature

logits = best_teacher.predict_proba(X_tr)
soft_labels = np.log(logits + 1e-9) / T
soft_labels = np.exp(soft_labels)
soft_labels /= soft_labels.sum(axis=1, keepdims=True)

val_logits = best_teacher.predict_proba(X_va)
soft_labels_val = np.log(val_logits + 1e-9) / T
soft_labels_val = np.exp(soft_labels_val)
soft_labels_val /= soft_labels_val.sum(axis=1, keepdims=True)

print("Soft labels train:", soft_labels.shape)
print("Soft labels val:", soft_labels_val.shape)

Soft labels train: (200000, 49)
Soft labels val: (123676, 49)


In [None]:
#@title Finetune student model
import tensorflow as tf
from tensorflow.keras import layers, models

NUM_CLASSES = soft_labels.shape[1]
input_shape = X_tr.shape[1]

student_configs = [
    [64],
    [64, 32],
    [128, 64, 32]
]
activations_student = ["tanh", "relu"]

best_val_acc_student = 0
best_student = None
best_student_cfg = None

for cfg in student_configs:
    for act in activations_student:
        print("\nTraining student with layers:", cfg, "activation:", act)
        student = models.Sequential()
        student.add(layers.Input(shape=(input_shape,)))
        for u in cfg:
            student.add(layers.Dense(u, activation=act))
        student.add(layers.Dense(NUM_CLASSES, activation='softmax'))

        student.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        history = student.fit(
            X_tr, soft_labels,
            validation_data=(X_va, soft_labels_val),
            batch_size=512,
            epochs=30,
            verbose=1
        )

        val_acc = max(history.history['val_accuracy'])
        print("Student val ACC:", val_acc)

        if val_acc > best_val_acc_student:
            best_val_acc_student = val_acc
            best_student = student
            best_student_cfg = (cfg, act)

print("\nBest student val ACC:", best_val_acc_student, "Config:", best_student_cfg)


Training student with layers: [64] activation: tanh
Epoch 1/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.1746 - loss: 3.5687 - val_accuracy: 0.4229 - val_loss: 3.2338
Epoch 2/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.4598 - loss: 3.1845 - val_accuracy: 0.5386 - val_loss: 3.1010
Epoch 3/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5471 - loss: 3.0884 - val_accuracy: 0.5771 - val_loss: 3.0577
Epoch 4/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5756 - loss: 3.0499 - val_accuracy: 0.5877 - val_loss: 3.0423
Epoch 5/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5903 - loss: 3.0335 - val_accuracy: 0.5995 - val_loss: 3.0268
Epoch 6/30
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5924 - loss: 3.0250 - val_accuracy: 

In [None]:
#@title Finetune baseline MLP
baseline_configs = [
    (64, 32),
    (128, 64),
    (256, 128, 64)
]
activations_baseline = ["tanh", "relu"]

best_val_acc_baseline = 0
best_baseline = None
best_baseline_cfg = None

for cfg in baseline_configs:
    for act in activations_baseline:
        print("\nTraining baseline with layers:", cfg, "activation:", act)
        baseline = MLPClassifier(
            hidden_layer_sizes=cfg,
            activation=act,
            max_iter=30,
            random_state=42,
            early_stopping = True,
            verbose = True
        )
        baseline.fit(X_tr, y_tr)
        pred = baseline.predict(X_va)
        val_acc = accuracy_score(y_va, pred)
        print("Baseline val ACC:", val_acc)

        if val_acc > best_val_acc_baseline:
            best_val_acc_baseline = val_acc
            best_baseline = baseline
            best_baseline_cfg = (cfg, act)

print("\nBest baseline val ACC:", best_val_acc_baseline, "Config:", best_baseline_cfg)


Training baseline with layers: (64, 32) activation: tanh
Iteration 1, loss = 2.17087872
Validation score: 0.553600
Iteration 2, loss = 1.61118615
Validation score: 0.578400
Iteration 3, loss = 1.51412658
Validation score: 0.603700
Iteration 4, loss = 1.45650640
Validation score: 0.610250
Iteration 5, loss = 1.42074393
Validation score: 0.614850
Iteration 6, loss = 1.39438086
Validation score: 0.625900
Iteration 7, loss = 1.37441565
Validation score: 0.628850
Iteration 8, loss = 1.35326158
Validation score: 0.631000
Iteration 9, loss = 1.34425216
Validation score: 0.633150
Iteration 10, loss = 1.33041534
Validation score: 0.638150
Iteration 11, loss = 1.31325060
Validation score: 0.644200
Iteration 12, loss = 1.31013708
Validation score: 0.654050
Iteration 13, loss = 1.29824610
Validation score: 0.648400
Iteration 14, loss = 1.29367896
Validation score: 0.645700
Iteration 15, loss = 1.28189944
Validation score: 0.643650
Iteration 16, loss = 1.27963620
Validation score: 0.647100
Iterati



Baseline val ACC: 0.6657961124227821

Training baseline with layers: (64, 32) activation: relu
Iteration 1, loss = 2.68398143
Validation score: 0.542050
Iteration 2, loss = 1.54712364
Validation score: 0.601650
Iteration 3, loss = 1.39205333
Validation score: 0.632650
Iteration 4, loss = 1.30088841
Validation score: 0.649800
Iteration 5, loss = 1.24836077
Validation score: 0.650950
Iteration 6, loss = 1.20948451
Validation score: 0.661050
Iteration 7, loss = 1.17059196
Validation score: 0.677850
Iteration 8, loss = 1.13981065
Validation score: 0.683650
Iteration 9, loss = 1.11892492
Validation score: 0.688900
Iteration 10, loss = 1.09700543
Validation score: 0.695550
Iteration 11, loss = 1.08351749
Validation score: 0.700850
Iteration 12, loss = 1.06794333
Validation score: 0.695600
Iteration 13, loss = 1.05484462
Validation score: 0.699850
Iteration 14, loss = 1.04666295
Validation score: 0.708350
Iteration 15, loss = 1.03803979
Validation score: 0.708300
Iteration 16, loss = 1.028089



Baseline val ACC: 0.7253307028041011

Training baseline with layers: (128, 64) activation: tanh
Iteration 1, loss = 1.78665419
Validation score: 0.603900
Iteration 2, loss = 1.40063561
Validation score: 0.631400
Iteration 3, loss = 1.32438565
Validation score: 0.643050
Iteration 4, loss = 1.28337698
Validation score: 0.650800
Iteration 5, loss = 1.25667911
Validation score: 0.654500
Iteration 6, loss = 1.22953114
Validation score: 0.657750
Iteration 7, loss = 1.21370072
Validation score: 0.663250
Iteration 8, loss = 1.20164697
Validation score: 0.667350
Iteration 9, loss = 1.18178704
Validation score: 0.669200
Iteration 10, loss = 1.17729666
Validation score: 0.668800
Iteration 11, loss = 1.17238577
Validation score: 0.679300
Iteration 12, loss = 1.15721857
Validation score: 0.688250
Iteration 13, loss = 1.14824891
Validation score: 0.681550
Iteration 14, loss = 1.14724765
Validation score: 0.681650
Iteration 15, loss = 1.14587670
Validation score: 0.677300
Iteration 16, loss = 1.14017



Baseline val ACC: 0.6915731427277726

Training baseline with layers: (128, 64) activation: relu
Iteration 1, loss = 2.06409938
Validation score: 0.633000
Iteration 2, loss = 1.26837826
Validation score: 0.669050
Iteration 3, loss = 1.16872843
Validation score: 0.676500
Iteration 4, loss = 1.11138396
Validation score: 0.692200
Iteration 5, loss = 1.06630247
Validation score: 0.703050
Iteration 6, loss = 1.03743659
Validation score: 0.698850
Iteration 7, loss = 1.01248348
Validation score: 0.721900
Iteration 8, loss = 0.98943968
Validation score: 0.712750
Iteration 9, loss = 0.97362714
Validation score: 0.725300
Iteration 10, loss = 0.96335984
Validation score: 0.731250
Iteration 11, loss = 0.94495634
Validation score: 0.721000
Iteration 12, loss = 0.93732030
Validation score: 0.733800
Iteration 13, loss = 0.92699007
Validation score: 0.738950
Iteration 14, loss = 0.91764378
Validation score: 0.743500
Iteration 15, loss = 0.90841855
Validation score: 0.742650
Iteration 16, loss = 0.90191



Baseline val ACC: 0.7531372295352372

Training baseline with layers: (256, 128, 64) activation: tanh
Iteration 1, loss = 1.61158930
Validation score: 0.613100
Iteration 2, loss = 1.31614159
Validation score: 0.644150
Iteration 3, loss = 1.25508029
Validation score: 0.662600
Iteration 4, loss = 1.22645305
Validation score: 0.673300
Iteration 5, loss = 1.20397580
Validation score: 0.672000
Iteration 6, loss = 1.16112086
Validation score: 0.672100
Iteration 7, loss = 1.15611683
Validation score: 0.677850
Iteration 8, loss = 1.13686688
Validation score: 0.681050
Iteration 9, loss = 1.12817175
Validation score: 0.679450
Iteration 10, loss = 1.12528787
Validation score: 0.676800
Iteration 11, loss = 1.11589856
Validation score: 0.678250
Iteration 12, loss = 1.11863576
Validation score: 0.678300
Iteration 13, loss = 1.10265782
Validation score: 0.682850
Iteration 14, loss = 1.09646532
Validation score: 0.682600
Iteration 15, loss = 1.09234324
Validation score: 0.678750
Iteration 16, loss = 1.



Baseline val ACC: 0.7050195672563796

Training baseline with layers: (256, 128, 64) activation: relu
Iteration 1, loss = 1.80567255
Validation score: 0.644700
Iteration 2, loss = 1.19027189
Validation score: 0.680550
Iteration 3, loss = 1.08375042
Validation score: 0.704200
Iteration 4, loss = 1.01983941
Validation score: 0.710400
Iteration 5, loss = 0.98385671
Validation score: 0.724750
Iteration 6, loss = 0.95174610
Validation score: 0.725100
Iteration 7, loss = 0.92625217
Validation score: 0.739350
Iteration 8, loss = 0.90597851
Validation score: 0.738100
Iteration 9, loss = 0.89175025
Validation score: 0.746200
Iteration 10, loss = 0.87444646
Validation score: 0.745200
Iteration 11, loss = 0.86432308
Validation score: 0.746550
Iteration 12, loss = 0.85465026
Validation score: 0.752000
Iteration 13, loss = 0.84428132
Validation score: 0.747850
Iteration 14, loss = 0.83205903
Validation score: 0.758400
Iteration 15, loss = 0.82696149
Validation score: 0.744350
Iteration 16, loss = 0.



Baseline val ACC: 0.7722274329700184

Best baseline val ACC: 0.7722274329700184 Config: ((256, 128, 64), 'relu')


In [None]:
#@title Export best baseline MLP weight to .h
from sklearn.preprocessing import StandardScaler
import numpy as np

HEADER_FILE = "mlp_baseline_weights.h"
PREFIX = "MLP_BASELINE"

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
x_max = np.percentile(np.abs(X_scaled), 99.9)
INPUT_SCALE = 127.0 / x_max

mlp_best = best_baseline

coefs = mlp_best.coefs_            # list of (in_dim, out_dim)
intercepts = mlp_best.intercepts_  # list of (out_dim,)

with open(HEADER_FILE, "w") as f:
    f.write("#pragma once\n\n")
    f.write("#include <stdint.h>\n\n")

    f.write("// ===== Quantization config =====\n")
    f.write(f"#define {PREFIX}_INPUT_SCALE {INPUT_SCALE:.8f}f\n\n")

    prev_scale = INPUT_SCALE

    for layer_idx, (W, b) in enumerate(zip(coefs, intercepts)):
        in_dim, out_dim = W.shape

        # transpose cho C: (out_dim, in_dim)
        W = W.T

        # ---- Quantize weights sang int8 ----
        W_max = np.max(np.abs(W))
        W_scale = 127.0 / W_max if W_max != 0 else 1.0
        W_q = np.round(W * W_scale).astype(np.int8)

        # ---- Quantize bias sang int32 ----
        b_scale = prev_scale * W_scale
        b_q = np.round(b * b_scale).astype(np.int32)

        # ---- Write metadata ----
        f.write(f"// ===== Layer {layer_idx} =====\n")
        f.write(f"#define {PREFIX}_L{layer_idx}_IN  {in_dim}\n")
        f.write(f"#define {PREFIX}_L{layer_idx}_OUT {out_dim}\n")
        f.write(f"#define {PREFIX}_L{layer_idx}_W_SCALE {W_scale:.8f}f\n")
        f.write(f"#define {PREFIX}_L{layer_idx}_B_SCALE {b_scale:.8f}f\n\n")

        # ---- Write weights ----
        f.write(f"const int8_t {PREFIX}_W{layer_idx}[{out_dim}][{in_dim}] = {{\n")
        for row in W_q:
            f.write("  { " + ", ".join(map(str, row)) + " },\n")
        f.write("};\n\n")

        # ---- Write biases ----
        f.write(f"const int32_t {PREFIX}_B{layer_idx}[{out_dim}] = {{\n")
        f.write("  " + ", ".join(map(str, b_q)) + "\n")
        f.write("};\n\n")

        prev_scale = b_scale

print(f"Exported quantized baseline MLP weights to {HEADER_FILE}")

Exported quantized baseline MLP weights to mlp_baseline_weights.h


  b_q = np.round(b * b_scale).astype(np.int32)
