# Grid Search

In [1]:
import sys
from pathlib import Path
from dataclasses import dataclass

import pandas as pd
import plotly.express as px

In [4]:
# Set correct working directory for imports
sys.path.append('..')
from part2.test import evaluate

[nltk_data] Downloading package punkt_tab to /Users/oriol/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [5]:
CHECKPOINT=Path("nmt.model")
CONFIG=Path("..", "part2", "model_config.yaml")
VOCAB=Path("..", "part2", "vocab", "vocab.json")
ENCODER=Path("..", "part2", "vocab", "spm.model")
SOURCE_FILE = Path("..", "multi30k_data", "test_2016_flickr.en")
TARGET_FILE = Path("..", "multi30k_data", "test_2016_flickr.fr")
OUTPUT_FILE = Path("outputs", "results.json")
DECODING_STEP=70
SAMPLE=True

In [6]:
@dataclass
class Args:
    checkpoint_path: Path
    model_config: Path
    vocab_file: Path
    encoder: Path
    test_source_file: Path
    test_target_file: Path
    output_file: Path
    max_decoding_time_step: int
    do_sample: bool
    top_k: int | None
    top_p: float | None
    temperature: float

In [7]:
DEFAULT_ARGS = Args(
    checkpoint_path=CHECKPOINT,
    model_config=CONFIG,
    vocab_file=VOCAB,
    encoder=ENCODER,
    test_source_file=SOURCE_FILE,
    test_target_file=TARGET_FILE,
    output_file=OUTPUT_FILE,
    max_decoding_time_step=DECODING_STEP,
    do_sample=SAMPLE,
    top_k=None,
    top_p=None,
    temperature=1.0
)

In [8]:
evaluate(DEFAULT_ARGS)

Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:39<00:00, 25.24it/s]


Corpus BLEU score = 42.49
Saving predictions to outputs/results.json


42.4922523289291

In [9]:
TOP_K_VALUES=[50, 100, 150, 200]
TOP_P_VALUES=[0.2, 0.4, 0.6, 0.8]
TEMPERATURE_VALUES=[0.25, 0.5, 0.75, 1.0]

In [12]:
grid = []
for k in TOP_K_VALUES:
    for p in TOP_P_VALUES:
        for t in TEMPERATURE_VALUES:
            grid.append((k, p, t))

In [70]:
results = []
for k, p, t in grid:
    args = DEFAULT_ARGS
    args.top_k = k
    args.top_p = p
    args.temperature = t
    score = evaluate(args)
    results.append(score)

Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 25.73it/s]


Corpus BLEU score = 50.20
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.49it/s]


Corpus BLEU score = 50.21
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 25.98it/s]


Corpus BLEU score = 50.13
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.39it/s]


Corpus BLEU score = 49.85
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.78it/s]


Corpus BLEU score = 50.22
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.12it/s]


Corpus BLEU score = 50.27
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.34it/s]


Corpus BLEU score = 50.36
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 26.15it/s]


Corpus BLEU score = 50.30
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 26.13it/s]


Corpus BLEU score = 50.36
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.73it/s]


Corpus BLEU score = 50.13
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 26.16it/s]


Corpus BLEU score = 49.73
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.79it/s]


Corpus BLEU score = 49.63
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.99it/s]


Corpus BLEU score = 50.07
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.76it/s]


Corpus BLEU score = 50.17
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [01:07<00:00, 14.74it/s]


Corpus BLEU score = 49.00
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.22it/s]


Corpus BLEU score = 47.68
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.76it/s]


Corpus BLEU score = 50.23
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.82it/s]


Corpus BLEU score = 50.16
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [01:22<00:00, 12.07it/s]


Corpus BLEU score = 50.17
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.77it/s]


Corpus BLEU score = 50.40
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.81it/s]


Corpus BLEU score = 50.21
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.88it/s]


Corpus BLEU score = 50.32
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [02:35<00:00,  6.45it/s] 


Corpus BLEU score = 50.22
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 28.01it/s]


Corpus BLEU score = 50.24
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 28.09it/s]


Corpus BLEU score = 50.45
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 28.06it/s]


Corpus BLEU score = 50.04
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [01:21<00:00, 12.25it/s]


Corpus BLEU score = 49.71
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.72it/s]


Corpus BLEU score = 48.84
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 25.86it/s]


Corpus BLEU score = 49.91
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.66it/s]


Corpus BLEU score = 49.75
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.77it/s]


Corpus BLEU score = 49.06
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [02:00<00:00,  8.30it/s]


Corpus BLEU score = 47.67
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.07it/s]


Corpus BLEU score = 50.21
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.47it/s]


Corpus BLEU score = 50.08
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.68it/s]


Corpus BLEU score = 50.01
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.76it/s]


Corpus BLEU score = 50.22
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.68it/s]


Corpus BLEU score = 50.00
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.96it/s]


Corpus BLEU score = 50.00
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.76it/s]


Corpus BLEU score = 50.14
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.47it/s]


Corpus BLEU score = 49.60
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.67it/s]


Corpus BLEU score = 50.40
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.69it/s]


Corpus BLEU score = 50.11
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.74it/s]


Corpus BLEU score = 49.72
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.67it/s]


Corpus BLEU score = 49.46
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.53it/s]


Corpus BLEU score = 50.00
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.71it/s]


Corpus BLEU score = 49.38
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.47it/s]


Corpus BLEU score = 48.77
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.77it/s]


Corpus BLEU score = 47.66
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.39it/s]


Corpus BLEU score = 50.22
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.60it/s]


Corpus BLEU score = 50.03
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.55it/s]


Corpus BLEU score = 50.15
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.68it/s]


Corpus BLEU score = 50.32
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.67it/s]


Corpus BLEU score = 50.22
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.73it/s]


Corpus BLEU score = 50.38
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.86it/s]


Corpus BLEU score = 49.96
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.60it/s]


Corpus BLEU score = 49.99
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.05it/s]


Corpus BLEU score = 50.08
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.11it/s]


Corpus BLEU score = 50.20
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.25it/s]


Corpus BLEU score = 49.53
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 26.05it/s]


Corpus BLEU score = 49.08
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:36<00:00, 27.09it/s]


Corpus BLEU score = 50.50
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:38<00:00, 26.28it/s]


Corpus BLEU score = 50.23
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:37<00:00, 26.99it/s]


Corpus BLEU score = 49.88
Saving predictions to outputs/results.json
Load test source sentences from ../multi30k_data/test_2016_flickr.en
Load test target sentences from ../multi30k_data/test_2016_flickr.fr
Load vocab from ../part2/vocab/vocab.json
Loading model
Number of parameters: 8455296 (8.46M)


Generating Predictions: 100%|██████████| 1000/1000 [00:35<00:00, 27.82it/s]


Corpus BLEU score = 47.76
Saving predictions to outputs/results.json


## ChatGPT Prompt:
Write the code for a plot to visualize the above results with their k p t parameters. Use plotly for the plot and don't make it 3d

In [13]:
data = []
for (k, p, t), score in zip(grid, results):
    data.append({
        "top_k": k,
        "top_p": p,
        "temperature": t,
        "score": score
    })

df = pd.DataFrame(data)

# Convert columns to categorical for cleaner facets
df["temperature"] = df["temperature"].astype(str)

NameError: name 'results' is not defined

In [14]:
df = pd.read_pickle("save.pkl")

In [24]:
fig = px.scatter(
    df,
    x="top_k",
    y="top_p",
    color="score",
    facet_col="temperature",
    color_continuous_scale="Viridis",
    title="Hyperparameter Grid Search Results (k, p, t)",
    labels={
        "top_k": "Top-K",
        "top_p": "Top-P",
        "score": "Evaluation Score",
        "temperature": "Temperature"
    }
)

fig.update_traces(marker=dict(size=12))
fig.update_layout(height=400, width=1000)
fig.show()

In [25]:
fig = px.scatter(
    df,
    x="top_p",
    y="temperature",
    color="score",
    facet_col="top_k",
    color_continuous_scale="Viridis",
    title="Hyperparameter Grid Search Results (k, p, t)",
    labels={
        "top_k": "Top-K",
        "top_p": "Top-P",
        "score": "Evaluation Score",
        "temperature": "Temperature"
    }
)

fig.update_traces(marker=dict(size=12))
fig.update_layout(height=400, width=1000)
fig.show()

In [26]:
fig = px.scatter(
    df,
    x="top_p",
    y="score",
    color="temperature",
    facet_col="top_k",
    color_continuous_scale="Viridis",
    title="Hyperparameter Grid Search Results (k, p, t)",
    labels={
        "top_k": "Top-K",
        "top_p": "Top-P",
        "score": "Evaluation Score",
        "temperature": "Temperature"
    }
)

fig.update_traces(marker=dict(size=12))
fig.update_layout(height=400, width=1000)
fig.show()

In [22]:
df.sort_values("score", axis=0, ascending=False)

Unnamed: 0,top_k,top_p,temperature,score
60,200,0.8,0.25,50.499699
24,100,0.6,0.25,50.445455
40,150,0.6,0.25,50.398585
19,100,0.2,1.0,50.398502
53,200,0.4,0.5,50.375165
...,...,...,...,...
46,150,0.8,0.75,48.768582
63,200,0.8,1.0,47.761060
15,50,0.8,1.0,47.677308
31,100,0.8,1.0,47.666317
