# Design

This is an decoder model which will take in a roundness value and output a pseudoword that corresponds to the roundness value

In [1]:
from utils.pseudoword_generator import *
import pandas as pd
import torch

pd.set_option('display.max_columns', None)
device = "cuda" if torch.cuda.is_available() else "cpu"
state = 42

VERSION = 1

  from .autonotebook import tqdm as notebook_tqdm


# Dataset

In [2]:
# Import dataset

data = pd.read_csv('datasets/normalized_v2.csv')
data

Unnamed: 0,Stimuli,ExperimentalRoundScore
0,timloe,0.438869
1,cestop,0.774481
2,soniel,0.598640
3,wiky,0.677876
4,lyll,0.472680
...,...,...
5635,lestest,0.866536
5636,uncing,0.214013
5637,smocy,0.715396
5638,plynching,0.999060


In [3]:
data.describe()

Unnamed: 0,ExperimentalRoundScore
count,5640.0
mean,0.580606
std,0.303741
min,0.000923
25%,0.40512
50%,0.628009
75%,0.803006
max,0.999966


In [4]:
# Split data into train, val and test sets

trn = data.sample(frac=0.8, random_state=state)
val = data.drop(trn.index).sample(frac=0.5, random_state=state)
tst = data.drop(trn.index).drop(val.index)
trn.reset_index(inplace=True, drop=True)
val.reset_index(inplace=True, drop=True)
tst.reset_index(inplace=True, drop=True)

In [5]:
print(f"Train set: {len(trn)} samples, Validation set: {len(val)} samples, Test set: {len(tst)} samples")

Train set: 4512 samples, Validation set: 564 samples, Test set: 564 samples


In [6]:
tst.describe()

Unnamed: 0,ExperimentalRoundScore
count,564.0
mean,0.579113
std,0.304238
min,0.000923
25%,0.408357
50%,0.619963
75%,0.802358
max,0.999965


# Model

In [6]:
model = RoundnessToTextModel(
    freeze_byt5=False
)

In [7]:
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8)

# Training

In [None]:
train(
    model=model,
    optimizer=optimizer,
    trn_roundness=trn["ExperimentalRoundScore"],
    val_roundness=val["ExperimentalRoundScore"],
    tst_roundness=tst["ExperimentalRoundScore"],
    trn_texts=trn["Stimuli"],
    val_texts=val["Stimuli"],
    tst_texts=tst["Stimuli"],
    batch_size=min(len(val), 200),
    epochs=100,
    patience=10,
    scheduler=scheduler,
)

# Save and load model

In [None]:
save_model(model, filename=f"pseudoword_generator_v0{VERSION}.pth")

In [7]:
model = load_model(filename=f"pseudoword_generator_v0{VERSION}.pth")

  model.load_state_dict(torch.load(path))


Model loaded from outputs/pseudoword_generator_v01.pth


# Testing

In [9]:
random_sample = tst.sample(n=10, random_state=42)

for _, row in random_sample.iterrows():
    print(f"Roundness Value: {row['ExperimentalRoundScore']}")
    print(f"Predicted word: {inference(model, row["ExperimentalRoundScore"])}")
    print()

Roundness Value: 0.3784743
Predicted word: apgor

Roundness Value: 0.0056608072
Predicted word: impails

Roundness Value: 0.030585967
Predicted word: flowesh

Roundness Value: 0.019359555
Predicted word: floal

Roundness Value: 0.99559695
Predicted word: minnip

Roundness Value: 0.0025887461
Predicted word: niaps

Roundness Value: 0.8004737
Predicted word: disrusts

Roundness Value: 0.9937313
Predicted word: tarxi

Roundness Value: 0.66056865
Predicted word: uncing

Roundness Value: 0.49427915
Predicted word: levrees

