# Design

This is an decoder model which will take in a roundness value and output a pseudoword that corresponds to the roundness value

In [1]:
from utils.pseudoword_generator import *
from torch.utils.data import DataLoader
from dotenv import load_dotenv
from prodigyopt import Prodigy
from utils.dataset import *
import pandas as pd
import torch
import os


load_dotenv()
pd.set_option('display.max_columns', None)
device = "cuda" if torch.cuda.is_available() else "cpu"
state = 42

# Dataset

In [None]:
# Import dataset
data = pd.read_csv(f"datasets/normalized.csv")
data.rename(columns={"Stimuli": "Pseudoword", "ExperimentalRoundScore": "Roundness"}, inplace=True)
data

In [None]:
data.describe()

In [4]:
trn = trainDataset()
val = valDataset()
tst = testDataset()

In [5]:
from utils.word_tokenizer import wordTokenizer

tokenizer = wordTokenizer()

In [6]:
batch = 8

trn = DataLoader(trn, batch_size=batch, shuffle=True, collate_fn=tokenizer.collate_fn)
val = DataLoader(val, batch_size=batch, shuffle=True, collate_fn=tokenizer.collate_fn)
tst = DataLoader(tst, batch_size=batch, shuffle=True, collate_fn=tokenizer.collate_fn)

# Model

In [7]:
model = WordTransformer(
    d_model=64,
    nhead=4,
    num_layers=4,
    max_length=12
)

In [8]:
optimizer = Prodigy(model.parameters(), lr=1., weight_decay=0, slice_p=1)

# Training

In [None]:
train(
    model=model,
    optimizer=optimizer,
    tokenizer=tokenizer,
    trainLoader=trn,
    testLoader=tst,
    valLoader=val,
    device=device,
    epochs=20,
    patience=10,
)

# Testing

In [None]:
random_sample = data.sample(n=10, random_state=42)
for _, row in random_sample.iterrows():
    print(f"Roundness Value : {row['Roundness']}")
    print(f"Original Word   : {row['Pseudoword']}")
    print(f"Predicted word  : {inference(model, row["Roundness"], tokenizer)}")
    print()

In [None]:
roundness_list = []
for i in range(11):
    roundness_list.append(i/10)

for roundness in roundness_list:
    print(f"Roundness Value: {roundness}")
    print(f"Predicted word: {inference(model, roundness, tokenizer)}")
    print()

# Save and load model

In [None]:
save_model(model, filename=f"pseudoword_generator_v0{os.getenv("GEN")}.pth")

In [None]:
model = load_model(filename=f"pseudoword_generator_v0{os.getenv("GEN")}.pth")