In [None]:
import pandas as pd

data = pd.read_pickle("data.pkl")

## Create Model Below

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model, Input

def build_model(bert_dim, phon_dim):
    # Inputs
    input_bert = Input(shape=(bert_dim,), name="bert_input")
    input_phon = Input(shape=(phon_dim,), name="phoneme_input")

    # BERT branch
    x1 = layers.Dense(128, activation="relu", name="bert_1")(input_bert)
    x1 = layers.Dense(32, use_bias=False, name="bert_2")(x1)
    x1 = layers.BatchNormalization(name="bert_3")(x1)
    x1 = layers.ReLU(name="bert_4")(x1)
    x1 = layers.Dense(64, activation="relu", name="bert_5")(x1)
    x1 = layers.Dropout(0.4, name="bert_6")(x1)

    # Phoneme branch
    x2 = layers.Dense(64, activation="relu", name="phon_1")(input_phon)
    x2 = layers.Dense(32, use_bias=False, name="phon_2")(x2)
    x2 = layers.BatchNormalization(name="phon_3")(x2)
    x2 = layers.ReLU(name="phon_4")(x2)
    x2 = layers.Dense(32, activation="relu", name="phon_5")(x2)
    x2 = layers.Dropout(0.4, name="phon_6")(x2)

    # Merge branches
    merged = layers.Concatenate()([x1, x2])
    x = layers.Dense(64, activation="relu")(merged)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    output = layers.Dense(1, activation="relu")(x)

    # Model
    model = Model(inputs=[input_bert, input_phon], outputs=output)

    return model


## Create Model Training Function Below

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

def train_phase(
    model,
    Bert_Train, Bert_Test, Phon_Train, Phon_Test, mean_aoa_train, mean_aoa_test, 
    freeze_bert: bool,
    freeze_pho: bool,
    initial_lr: float = 1e-4,
    epochs: int = 2000,
    batch_size: int = 32
):
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True
    )
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-6
    )

    for layer in model.layers:
        if layer.name.startswith("bert_"):
            layer.trainable = not freeze_bert
        elif layer.name.startswith("phon_"):
            layer.trainable = not freeze_pho
        else:
            layer.trainable = True

    model.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=initial_lr),
        loss="mse",
        metrics=["mae"]
    )

    # Train
    model.fit(
        [Bert_Train, Phon_Train],
        mean_aoa_train,
        validation_split=0.2,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping, reduce_lr],
        verbose=0
    )

    # Evaluate
    test_loss, test_mae = model.evaluate(
        [Bert_Test, Phon_Test],
        mean_aoa_test,
        verbose=0
    )

    return test_mae


## Train Model Below

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np

BERT_DIM = 1024
PHON_DIM = 32
TRIALS = 2000

import os

BEST_MAE_FILE = "best_maes.txt"

def load_best_maes(filepath):
    if not os.path.exists(filepath):
        return float('inf'), float('inf'), float('inf')
    
    with open(filepath, 'r') as f:
        lines = f.read().strip().splitlines()
    bests = []
    for line in lines:
        try:
            bests.append(float(line))
        except:
            bests.append(float('inf'))
    # Ensure length 3
    while len(bests) < 3:
        bests.append(float('inf'))
    return tuple(bests[:3])

def save_best_maes(filepath, phase1_mae, phase2_mae, phase3_mae):
    with open(filepath, 'w') as f:
        f.write(f"{phase1_mae}\n{phase2_mae}\n{phase3_mae}\n")


best_mae_phase1, best_mae_phase2, best_mae_phase3 = load_best_maes(BEST_MAE_FILE)

best_model_path_phase1 = "AoA_model_phase1.keras"
best_model_path_phase2 = "AoA_model_phase2.keras"
best_model_path_phase3 = "AoA_model_phase3.keras"

for i in range(TRIALS):
    Bert_Train, Bert_Test, Phon_Train, Phon_Test, mean_aoa_train, mean_aoa_test = train_test_split(
        np.stack(data["Word_Vec"].values).astype(np.float32),
        np.vstack(data["Phoneme"].values),
        data["Mean_AoA"],
        test_size=0.2,
        random_state=42 + i
    )

    AoA_model = build_model(BERT_DIM, PHON_DIM)
    
    # Phase 1: freeze BERT
    a = train_phase(
        AoA_model, Bert_Train, Bert_Test, Phon_Train, Phon_Test, mean_aoa_train, mean_aoa_test,
        freeze_bert=True, freeze_pho=False, initial_lr=1e-4
    )
    print(f"{a:.4f}", end="")
    if a < best_mae_phase1:
        
        best_mae_phase1 = a
        AoA_model.save(best_model_path_phase1)
        save_best_maes(BEST_MAE_FILE, best_mae_phase1, best_mae_phase2, best_mae_phase3)
    else:
        AoA_model = tf.keras.models.load_model(best_model_path_phase1)
    
    # Phase 2: unfreeze BERT, freeze phoneme
    b = train_phase(
        AoA_model, Bert_Train, Bert_Test, Phon_Train, Phon_Test, mean_aoa_train, mean_aoa_test,
        freeze_bert=False, freeze_pho=True, initial_lr=1e-4
    )
    print(f", {b:.4f}", end="")
    if b < best_mae_phase2:
        best_mae_phase2 = b
        AoA_model.save(best_model_path_phase2)
        save_best_maes(BEST_MAE_FILE, best_mae_phase1, best_mae_phase2, best_mae_phase3)
    else:
        AoA_model = tf.keras.models.load_model(best_model_path_phase2)
    
    # Phase 3: unfreeze both
    c = train_phase(
        AoA_model, Bert_Train, Bert_Test, Phon_Train, Phon_Test, mean_aoa_train, mean_aoa_test,
        freeze_bert=False, freeze_pho=False, initial_lr=1e-5
    )
    print(f", {c:.4f}", end="")
    if c < best_mae_phase3:
        best_mae_phase3 = c
        AoA_model.save(best_model_path_phase3)
        save_best_maes(BEST_MAE_FILE, best_mae_phase1, best_mae_phase2, best_mae_phase3)
        print(" <- New Best!", end="")
    else:
        AoA_model = tf.keras.models.load_model(best_model_path_phase3)
    
    print()


In [5]:
tf.keras.models.load_model(best_model_path_phase3).save("AoA_model.keras")