# Design

This is an decoder model which will take in a roundness value and output a pseudoword that corresponds to the roundness value

In [1]:
from utils.pseudoword_generator import *
from utils.word_tokenizer import *
from dotenv import load_dotenv
from utils.dataset import *
import pandas as pd
import torch
import os


load_dotenv()
pd.set_option('display.max_columns', None)
device = "cuda" if torch.cuda.is_available() else "cpu"
state = 42

# Dataset

In [2]:
# Import dataset
data = pd.read_csv(f"datasets/normalized.csv")
data.rename(columns={"Stimuli": "Pseudoword", "ExperimentalRoundScore": "Roundness"}, inplace=True)
data

Unnamed: 0,Pseudoword,Roundness
0,bebi,0.815217
1,bibe,0.913043
2,bobou,0.815217
3,boubo,1.000000
4,chechi,0.184783
...,...,...
119,outou,0.347826
120,uku,0.239130
121,ulu,0.913043
122,umu,0.913043


In [3]:
data.describe()

Unnamed: 0,Roundness
count,124.0
mean,0.562675
std,0.316366
min,0.0
25%,0.26087
50%,0.543478
75%,0.902174
max,1.0


In [4]:
trn = trainDataset()
val = valDataset()
tst = testDataset()

# Hyperparam tuning

This section performs a grid search to determine what are the best parameters to use to train the model. These intermediary models are trained with a smaller number of epochs and a shorter early stopping patience, as we are only looking to see which hyperparameters are the best. Once the best parameters are determined, these parameters will then be taken to train another model with a higher number of epochs and patience to train until the model converges

In [5]:
param_grid = {
    'd_model': [32, 64, 128],
    'nhead': [4, 8],
    'num_layers': [4, 8],
    'learning_rate': [0.1],
    'weight_decay': [0.01, 0.1],
    'batch_size': [8],
    'max_length': [12, 16]
}

In [6]:
result = grid_search(
    trn=trn,
    val=val,
    tst=tst,
    param_grid=param_grid,
    epochs=15,
    patience=3
)


[1/48] Testing parameters: {'d_model': 32, 'nhead': 4, 'num_layers': 4, 'learning_rate': 0.1, 'weight_decay': 0.01, 'batch_size': 8, 'max_length': 12}
Using decoupled weight decay
Epoch 1: Average Training Loss: 3.3078, Average Validation Loss: 3.2076
Epoch 2: Average Training Loss: 3.2766, Average Validation Loss: 3.0911
Epoch 3: Average Training Loss: 2.8234, Average Validation Loss: 2.5038
Epoch 4: Average Training Loss: 2.4685, Average Validation Loss: 2.3828
Epoch 5: Average Training Loss: 2.3320, Average Validation Loss: 2.3830
Epoch 6: Average Training Loss: 2.2654, Average Validation Loss: 2.2408
Epoch 7: Average Training Loss: 2.2038, Average Validation Loss: 2.2492
Epoch 8: Average Training Loss: 2.1329, Average Validation Loss: 2.3433
Epoch 9: Average Training Loss: 2.0791, Average Validation Loss: 2.1598
Epoch 10: Average Training Loss: 2.0263, Average Validation Loss: 2.1434
Epoch 11: Average Training Loss: 1.9781, Average Validation Loss: 2.2489
Epoch 12: Average Trainin

# Training

In [7]:
train_result = train(trn, val, tst, params=result["parameters"], epochs=100, patience=10)

Using decoupled weight decay
Epoch 1: Average Training Loss: 3.4873, Average Validation Loss: 3.3733
Epoch 2: Average Training Loss: 3.1850, Average Validation Loss: 2.6892
Epoch 3: Average Training Loss: 2.6327, Average Validation Loss: 2.1683
Epoch 4: Average Training Loss: 2.2704, Average Validation Loss: 2.1586
Epoch 5: Average Training Loss: 2.1139, Average Validation Loss: 1.9989
Epoch 6: Average Training Loss: 2.0129, Average Validation Loss: 2.0594
Epoch 7: Average Training Loss: 1.9254, Average Validation Loss: 2.2337
Epoch 8: Average Training Loss: 1.9074, Average Validation Loss: 2.0872
Epoch 9: Average Training Loss: 1.8534, Average Validation Loss: 2.1617
Epoch 10: Average Training Loss: 1.8616, Average Validation Loss: 2.1720
Epoch 11: Average Training Loss: 1.8451, Average Validation Loss: 2.3368
Epoch 12: Average Training Loss: 1.8268, Average Validation Loss: 2.1504
Epoch 13: Average Training Loss: 1.8280, Average Validation Loss: 2.2554
Epoch 14: Average Training Loss

In [8]:
train_result

{'best_val_loss': 1.9988694787025452,
 'final_test_loss': 2.4038931131362915,
 'model': WordTransformer(
   (input_embed): Sequential(
     (0): Linear(in_features=1, out_features=128, bias=True)
     (1): ReLU()
     (2): Linear(in_features=128, out_features=128, bias=True)
   )
   (token_embed): Embedding(29, 128)
   (transformer_decoder): TransformerDecoder(
     (layers): ModuleList(
       (0-7): 8 x TransformerDecoderLayer(
         (self_attn): MultiheadAttention(
           (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
         )
         (multihead_attn): MultiheadAttention(
           (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
         )
         (linear1): Linear(in_features=128, out_features=2048, bias=True)
         (dropout): Dropout(p=0.1, inplace=False)
         (linear2): Linear(in_features=2048, out_features=128, bias=True)
         (norm1): LayerNorm((128,), eps=1e-05, elementw

# Testing

In [9]:
random_sample = data.sample(n=10, random_state=42)
for _, row in random_sample.iterrows():
    print(f"Roundness Value : {row['Roundness']}")
    print(f"Original Word   : {row['Pseudoword']}")
    print(f"Predicted word  : {inference(train_result['model'], row['Roundness'], train_result['tokenizer'])}")
    print()

Roundness Value : 0.4565217391304347
Original Word   : guegui
Predicted word  : toui

Roundness Value : 0.3695652173913043
Original Word   : sise
Predicted word  : toui

Roundness Value : 0.9130434782608696
Original Word   : nonou
Predicted word  : louoo

Roundness Value : 0.8695652173913043
Original Word   : minlin
Predicted word  : louoo

Roundness Value : 0.0
Original Word   : zize
Predicted word  : keke

Roundness Value : 0.8695652173913043
Original Word   : ama
Predicted word  : louoo

Roundness Value : 0.3695652173913043
Original Word   : kantan
Predicted word  : toui

Roundness Value : 0.9130434782608696
Original Word   : umu
Predicted word  : louoo

Roundness Value : 0.9130434782608696
Original Word   : ulu
Predicted word  : louoo

Roundness Value : 0.1847826086956521
Original Word   : chechi
Predicted word  : kiui



In [10]:
roundness_list = []
for i in range(11):
    roundness_list.append(i/10)

for roundness in roundness_list:
    print(f"Roundness Value: {roundness}")
    print(f"Predicted word: {inference(train_result['model'], roundness, train_result['tokenizer'])}")
    print()

Roundness Value: 0.0
Predicted word: keke

Roundness Value: 0.1
Predicted word: keki

Roundness Value: 0.2
Predicted word: tiui

Roundness Value: 0.3
Predicted word: toui

Roundness Value: 0.4
Predicted word: toui

Roundness Value: 0.5
Predicted word: toui

Roundness Value: 0.6
Predicted word: louo

Roundness Value: 0.7
Predicted word: louo

Roundness Value: 0.8
Predicted word: louoo

Roundness Value: 0.9
Predicted word: louoo

Roundness Value: 1.0
Predicted word: louoo



# Save and load model

In [13]:
save_model(train_result['model'], path=f"outputs/pseudoword_generator_v0{os.getenv("GEN")}.pth")

In [21]:
with open(f"outputs/params_for_model_v0{os.getenv("GEN")}.json", "w") as f:
    json.dump(result["parameters"], f)

In [None]:
model = load_model(filename=f"pseudoword_generator_v0{os.getenv("GEN")}.pth")