## Load data helper function

In [None]:
import pickle


def load_loss_values(folder_name: str, is_train: bool) -> list[float]:
    file_prefix = "train" if is_train else "val"
    file_path = f"../loss_values/{folder_name}/{file_prefix}_losses.pkl"
    with open(file_path, "rb") as f:
        loss_values = pickle.load(f)
    return loss_values

## Comparisons

### 1. Positional encoding

#### 1.1. Load the data

In [5]:
positional_encodings = {
    "absolute": {
        "train": load_loss_values("absolute_positional_encoding", True),
        "val": load_loss_values("absolute_positional_encoding", False),
    },
    "relative": {
        "train": load_loss_values("relative_positional_encoding", True),
        "val": load_loss_values("relative_positional_encoding", False),
    },
    "sinusoidal": {
        "train": load_loss_values("sinusoidal_positional_encoding", True),
        "val": load_loss_values("sinusoidal_positional_encoding", False),
    },
    "none": {
        "train": load_loss_values("no_positional_encoding", True),
        "val": load_loss_values("no_positional_encoding", False),
    },
    "rope": {
        "train": load_loss_values("rope", True),
        "val": load_loss_values("rope", False),
    },
}

#### 1.2. Helper function to plot the data

In [28]:
import numpy as np
import plotly.graph_objects as go

from plotly.graph_objs._figure import Figure
from plotly.subplots import make_subplots


def plot_positional_encodings_losses(
    line_configs: list[dict],
    iterations: np.ndarray,
    title: str,
    xaxis_title: str,
    yaxis_title: str,
    width: int = 1200,
    height: int = 700,
    yaxis_range: list[float] = [],
    save_figure: bool = False,
) -> Figure:
    figure = make_subplots(specs=[[{"secondary_y": True}]])

    for config in line_configs:
        figure.add_trace(
            go.Scatter(
                x=iterations,
                y=config["data"],
                mode="lines+markers",
                name=config["name"],
                hovertemplate="<b>Iteration</b>: %{x}<br><b>Loss</b>: %{y:.4f}<br>",
                line=dict(
                    color=config["color"], width=config["width"], dash=config["dash"]
                ),
                marker=dict(
                    symbol=config["marker_symbol"],
                    size=8,
                    color=config["color"],
                    line=dict(width=1, color="black"),
                ),
            ),
            secondary_y=False,
        )

    figure.update_layout(
        font=dict(family="Arial", size=14, color="black"),
        title={
            "text": title,
            "y": 0.95,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
            "font": dict(size=18),
        },
        autosize=False,
        width=width,
        height=height,
        margin=dict(l=80, r=50, b=150, t=100, pad=10),
        showlegend=True,
        xaxis=dict(
            title=xaxis_title,
            showgrid=True,
            gridcolor="lightgrey",
            tickmode="linear",
            tick0=0,
            dtick=max(1, len(iterations) // 10),
        ),
        yaxis=dict(
            title=yaxis_title,
            showgrid=True,
            gridcolor="lightgrey",
            zeroline=True,
            zerolinecolor="black",
            zerolinewidth=1,
            range=yaxis_range,
        ),
        legend=dict(
            yanchor="top",
            y=1,
            xanchor="right",
            x=1,
            bgcolor="rgba(255, 255, 255, 0.7)",
            bordercolor="lightgrey",
            borderwidth=1,
        ),
        plot_bgcolor="white",
    )

    if save_figure:
        figure.write_image("plot.svg", format="svg")
    return figure

#### 1.3. Plot the training data

In [30]:
line_configs = [
    {
        "name": "Absolute positional encoding",
        "data": positional_encodings["absolute"]["train"],
        "color": "#1f77b4",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 0,
    },
    {
        "name": "Sinusoidal positional encoding",
        "data": positional_encodings["sinusoidal"]["train"],
        "color": "#ff7f0e",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 1,
    },
    {
        "name": "Relative positional encoding",
        "data": positional_encodings["relative"]["train"],
        "color": "#2ca02c",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 2,
    },
    {
        "name": "No positional encoding",
        "data": positional_encodings["none"]["train"],
        "color": "#d62728",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 3,
    },
    {
        "name": "Rotary positional encoding (RoPE)",
        "data": positional_encodings["rope"]["train"],
        "color": "#9467bd",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 4,
    },
]
iterations = np.arange(1, len(positional_encodings["none"]["train"]) + 1)

figure = plot_positional_encodings_losses(
    line_configs=line_configs,
    iterations=iterations,
    title="Training losses for different positional encodings",
    xaxis_title="Iterations",
    yaxis_title="Training loss",
    width=1200,
    height=700,
    yaxis_range=[4.5, 7],
    save_figure=False,
)
figure.show()

#### 1.4. Plot the validation data

In [32]:
line_configs = [
    {
        "name": "Absolute positional encoding",
        "data": positional_encodings["absolute"]["val"],
        "color": "#1f77b4",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 0,
    },
    {
        "name": "Sinusoidal positional encoding",
        "data": positional_encodings["sinusoidal"]["val"],
        "color": "#ff7f0e",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 1,
    },
    {
        "name": "Relative positional encoding",
        "data": positional_encodings["relative"]["val"],
        "color": "#2ca02c",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 2,
    },
    {
        "name": "No positional encoding",
        "data": positional_encodings["none"]["val"],
        "color": "#d62728",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 3,
    },
    {
        "name": "Rotary positional encoding (RoPE)",
        "data": positional_encodings["rope"]["val"],
        "color": "#9467bd",
        "dash": "solid",
        "width": 2,
        "marker_symbol": 4,
    },
]
iterations = np.arange(1, len(positional_encodings["none"]["val"]) + 1)

figure = plot_positional_encodings_losses(
    line_configs=line_configs,
    iterations=iterations,
    title="Validation losses for different positional encodings",
    xaxis_title="Iterations",
    yaxis_title="Validation loss",
    width=1200,
    height=700,
    yaxis_range=[4, 6],
    save_figure=False,
)
figure.show()

#### 1.5. Conclusion

Rotary Positional Encoding (RoPE) and Relative Positional Encoding (RPE) showed strong performance on this benchmark. RoPE slightly outperformed RPE on the validation set, but both significantly outperformed the other approaches.

In contrast, Sinusoidal and the absence of Positional Encoding yielded the weakest results, with Sinusoidal Positional Encoding performing the worst overall.