In [10]:
import wandb
wandb.login()

True

In [11]:
import os
import yaml
import sys
import time
from copy import deepcopy
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F


In [12]:
with open('/home/quanhhh/Downloads/bo.yml') as f:
  sweep_config = yaml.safe_load(f)

In [13]:
# sweep_id = wandb.sweep(sweep_config, project="rul-project")
sweep_



Create sweep with ID: ey2demty
Sweep URL: https://wandb.ai/longmeow/rul-project/sweeps/ey2demty


# Mục mới

In [14]:
#model.py


class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dff, dropout):
        super().__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.dff = dff
        self.dropout = dropout
        self.encoderlayer = nn.TransformerEncoderLayer(
            d_model=self.d_model,
            nhead=self.nhead,
            dim_feedforward=self.dff,
            dropout=self.dropout,
            batch_first=True,
        )

    def forward(self, src, src_mask=None, src_key_padding_mask=None):
        out = self.encoderlayer(src)
        return out


class TransformerEncoder(nn.Module):
    def __init__(self, encoder_layer, num_layers):
        super().__init__()
        self.encoder_layer = encoder_layer
        self.num_layer = num_layers
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)

    def forward(self, src):
        out = self.transformer_encoder(src)
        return out


class TransformerModel(nn.Module):
    def __init__(self, encoder, linear):
        super().__init__()
        self.encoder = encoder
        self.linear = linear

    def forward(self, src):
        out = F.relu(self.linear(self.encoder(src)))
        return out


def create_transformer(d_model, nhead, dff, num_layers, dropout, l_win):
    linear = nn.Sequential(
        nn.Flatten(), nn.Dropout(dropout), nn.Linear(d_model * l_win, 1)
    )
    model = TransformerModel(
        TransformerEncoder(
            TransformerEncoderLayer(d_model, nhead, dff, dropout), num_layers
        ),
        linear,
    )

    for p in model.parameters():
      if p.dim() > 1:
        nn.init.xavier_uniform_(p)
        
    return model


In [15]:
#dataloader.py

class TimeSeriesDataset(Dataset):
    def __init__(self, config, mode):
        super().__init__()
        self.config = config
        self.mode = mode
        self.load_dataset(config)

    def __getitem__(self, idx):
        if self.mode == 'train':
            data = self.data[idx, :, :]
            label = self.label[idx]
            return data, label
        else:
            data = self.data[idx, :, :]
            label = self.label[idx]
            return data, label

    def __len__(self):
        return self.data.shape[0]

    def load_dataset(self, config):
        if self.mode == 'train':
            train_df = pd.read_csv('/home/quanhhh/Documents/RUL_Transformer/preprocessed_data/train_004.csv')

            def gen_sequence(id_df, seq_length, seq_cols):
                data_array = id_df[seq_cols].values
                num_elements = data_array.shape[0]
                for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
                    yield data_array[start:stop, :]
            
            sensor_cols = [
                "s1",
                "s2",
                "s3",
                "s4",
                "s5",
                "s7",
                "s8",
                "s9",
                "s10",
                "s11",
                "s12",
                "s13",
                "s14",
                "s15",
                "s16",
                "s17",
                "s18",
                "s19",
                "s20",
                "s21",
            ]
            sequence_cols = ["setting1", "setting2", "setting3"]
            sequence_cols.extend(sensor_cols)
            # generator for the sequences
            seq_gen = (list(gen_sequence(train_df[train_df['id']==id], self.config['l_win'], sequence_cols)) 
                      for id in train_df['id'].unique())

            # generate sequences and convert to numpy array
            seq_array = np.concatenate(list(seq_gen)).astype(np.float32)

            # function to generate labels
            def gen_labels(id_df, seq_length, label):
                data_array = id_df[label].values
                num_elements = data_array.shape[0]
                return data_array[seq_length:num_elements, :]

            # generate labels
            label_gen = [gen_labels(train_df[train_df['id']==id], self.config['l_win'], ['RUL']) 
                        for id in train_df['id'].unique()]
            label_array = np.concatenate(label_gen).astype(np.float32)

            self.data = seq_array
            self.label = label_array

        else:
            test_df = pd.read_csv('/home/quanhhh/Documents/RUL_Transformer/preprocessed_data/test_004.csv')

            sensor_cols = [
                "s1",
                "s2",
                "s3",
                "s4",
                "s5",
                "s7",
                "s8",
                "s9",
                "s10",
                "s11",
                "s12",
                "s13",
                "s14",
                "s15",
                "s16",
                "s17",
                "s18",
                "s19",
                "s20",
                "s21",
            ]
            sequence_cols = ["setting1", "setting2", "setting3"]
            sequence_cols.extend(sensor_cols)

            seq_array_test_last = [test_df[test_df['id']==id][sequence_cols].values[-config['l_win']:] 
                                  for id in test_df['id'].unique() if len(test_df[test_df['id']==id]) >= config['l_win']]

            seq_array_test_last = np.asarray(seq_array_test_last).astype(np.float32)


            y_mask = [len(test_df[test_df['id']==id]) >= config['l_win'] for id in test_df['id'].unique()]

            label_array_test_last = test_df.groupby('id')['RUL'].nth(-1)[y_mask].values
            label_array_test_last = label_array_test_last.reshape(label_array_test_last.shape[0],1).astype(np.float32)

            self.data = seq_array_test_last
            self.label = label_array_test_last

In [16]:
#trainer.py

class ModelTrainer():
    def __init__(self, model, train_data, criterion, optimizer, device, config):
        self.model = model
        self.train_data = train_data
        self.device = device
        self.config = config
        self.train_loss_list = list()
        self.min_loss = float('inf')
        self.best_model = None
        self.best_optimizer = None
        self.optimizer = optimizer
        self.criterion = criterion

    def train_epoch(self, epoch):
        train_loss = 0.0
        self.model.train()
        for x, rul in self.train_data:
            self.model.zero_grad()
            out = self.model(x.to(self.device).float())
            loss = torch.sqrt(self.criterion(out.float(), rul.to(self.device).float()))
            loss.backward()
            self.optimizer.step()
            train_loss += loss
            

        train_loss = train_loss / len(self.train_data)
        wandb.log({"train loss": train_loss})
        self.train_loss_list.append(train_loss)

        if train_loss < self.min_loss:
            self.min_loss = train_loss
            self.best_model = deepcopy(self.model.state_dict())
            self.best_optimizer = deepcopy(self.optimizer.state_dict())
            self.best_epoch_in_round = epoch

    def train(self):
        self.model.to(self.device)

        for epoch in range(1, self.config['n_epochs'] + 1):
            self.train_epoch(epoch)
            wandb.log({"epoch": epoch})


        self.config['train_loss_list'] = self.train_loss_list

    def update_config(self):
        return self.config


In [17]:
#train.py
def training():
    with wandb.init(config=sweep_config):
        config = wandb.config

        torch.manual_seed(42)
        start = time.perf_counter()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        train_data = TimeSeriesDataset(config, mode='train')
        train_loader = DataLoader(train_data,
                                  batch_size=config['batch_size'],
                                  shuffle=True,
                                  num_workers=config['num_workers'])

        model = create_transformer(d_model=config['d_model'],
                                    nhead=config['n_head'],
                                    dff=config['dff'],
                                    num_layers=config['num_layers'],
                                    dropout=config['dropout'],
                                    l_win=config['l_win'])

        optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config['weight_decay'])
        criterion = nn.MSELoss()
        trainer = ModelTrainer(model, train_loader, criterion, optimizer, device, config)

        trainer.train()

        #inference.py

        test_data = TimeSeriesDataset(config, mode='test')
        test_loader = DataLoader(test_data,
                                    batch_size=1,
                                    shuffle=False,
                                    num_workers=config['num_workers'])

        model.to(device)
        test_loss = 0.0
        criterion = nn.MSELoss()
        test_loss_list = list()
        pred_list = list()
        with torch.no_grad():
            for x, rul in test_loader:
                out = model(x.to(device).float())
                loss = torch.sqrt(criterion(out.float(), rul.to(device).float()))
                test_loss += loss
                test_loss_list.append(loss)
                pred_list.append(out.float())

        test_loss_avg = test_loss / len(test_loader)
        truth_list = [rul.float().item() for x, rul in test_loader]
        config['truth_list'] = truth_list
        config['pred_list'] = pred_list
        config['test_loss_avg'] = test_loss_avg
        config['test_loss_list_per_id'] = test_loss_list
        wandb.log({"test_loss_avg": test_loss_avg})
        if test_loss_avg < 14:
            wandb.alert(
                title='longmeow come first with ey2demty',
                text=f'test_loss_avg {test_loss_avg} is below the theshold 14',
            )
            print('Alert triggered')    


In [18]:
!date
wandb.agent(sweep_id, training, count=10000)

Tue Oct 11 08:26:12 PM +07 2022


[34m[1mwandb[0m: Agent Starting Run: 4rtbmdpz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10103048881130716
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.011125475497445598
[34m[1mwandb[0m: 	n_epochs: 95
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013988386067450589


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂█▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,95.0
test_loss_avg,42.84369
train loss,66.2261


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kbjmqj3l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.09212436205913924
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.010338135019563831
[34m[1mwandb[0m: 	n_epochs: 93
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01778645144494601


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▁▁▂▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,93.0
test_loss_avg,18.98571
train loss,33.95824


[34m[1mwandb[0m: Agent Starting Run: 0ev0v3ry with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10568006700653387
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007839306811914979
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007054852137086005


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,███▇▆▆▆▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,69.0
test_loss_avg,15.49689
train loss,29.65663


[34m[1mwandb[0m: Agent Starting Run: pzlylew5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07845886510251329
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.016948596728483645
[34m[1mwandb[0m: 	n_epochs: 98
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01231297244710003


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,████▇▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,98.0
test_loss_avg,23.18166
train loss,34.67806


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xikzfxzx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06856240346213868
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.012110869466657556
[34m[1mwandb[0m: 	n_epochs: 96
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.006891361109828158


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▁█▇▇▇▇▇▇▇▇▇▇▇▇

0,1
epoch,96.0
test_loss_avg,45.04942
train loss,66.28462


[34m[1mwandb[0m: Agent Starting Run: nbtvl9vk with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09216613898243184
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.012963297635072092
[34m[1mwandb[0m: 	n_epochs: 93
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01405244426348658


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,93.0
test_loss_avg,47.95015
train loss,65.80538


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zr5i1jtb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09985506848409642
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007714285167892083
[34m[1mwandb[0m: 	n_epochs: 75
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014867775594996436


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,75.0
test_loss_avg,15.38435
train loss,29.87829


[34m[1mwandb[0m: Agent Starting Run: xlknsve3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13224170092156584
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.009720423945113167
[34m[1mwandb[0m: 	n_epochs: 92
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0074747048766849


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▃▃▃▂▃▂▂▂▂▃▂▃▂▂▃▃▄▂▃▃▃▂▄▃▃▂▃▃▂▃▃▃▁▃▂▂▂▃

0,1
epoch,92.0
test_loss_avg,44.69027
train loss,66.60779


[34m[1mwandb[0m: Agent Starting Run: iprv6iqd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.1455261132880047
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.008778764314391058
[34m[1mwandb[0m: 	n_epochs: 82
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.006989954953044188


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▁▁▁▁▁▁▁▁▆█████████▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,82.0
test_loss_avg,44.89585
train loss,66.02467


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qswvvu8m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09605062547880722
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.009707709972942425
[34m[1mwandb[0m: 	n_epochs: 75
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00850494208897301


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,▁▁█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,75.0
test_loss_avg,44.8908
train loss,65.89095


[34m[1mwandb[0m: Agent Starting Run: k4nkyc56 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12080542835506368
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.01350828691153049
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0099389385173707


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,▄▆█▇▄▄▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,20.88017
train loss,34.65499


[34m[1mwandb[0m: Agent Starting Run: pvgxen8l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13127559161834007
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.009855422963587612
[34m[1mwandb[0m: 	n_epochs: 66
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01263265120181765


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,▇▇▇▇▇▇▇▇▇▇▇█▇▆▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,66.0
test_loss_avg,16.01845
train loss,32.8225


[34m[1mwandb[0m: Agent Starting Run: 2q3beszh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05835945109235688
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.007813595654223359
[34m[1mwandb[0m: 	n_epochs: 76
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008761263774122998


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,76.0
test_loss_avg,16.85305
train loss,28.52446


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4pq0syn3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05899615267357166
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.011356683046655236
[34m[1mwandb[0m: 	n_epochs: 67
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014080988962377604


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▄▄▄▄▇█████████▄▄▄▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67.0
test_loss_avg,17.8786
train loss,31.97034


[34m[1mwandb[0m: Agent Starting Run: is1uy5bo with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1227774704615579
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.00727131176603186
[34m[1mwandb[0m: 	n_epochs: 65
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.009022040495664622


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁██▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,65.0
test_loss_avg,44.41776
train loss,66.1023


[34m[1mwandb[0m: Agent Starting Run: l0yqsru7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13836548610010987
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.008016093022750036
[34m[1mwandb[0m: 	n_epochs: 82
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01503540296760844


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,82.0
test_loss_avg,19.00154
train loss,31.75364


[34m[1mwandb[0m: Agent Starting Run: wqidb55k with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.057502816057241224
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007213022059497413
[34m[1mwandb[0m: 	n_epochs: 82
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00786887083767008


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█████████████████▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,82.0
test_loss_avg,14.37671
train loss,27.1487


[34m[1mwandb[0m: Agent Starting Run: 6cr78q0q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05945663261433666
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.0176752895604455
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013148140246308056


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▅▄▄▄▅▃▄▄▄▁▂▂▂▃▃▂▃▅▃▂▄▃▄▂▅▃▁▃▄▂▃▂▂▃▄▂▃▅

0,1
epoch,84.0
test_loss_avg,43.1621
train loss,66.40949


[34m[1mwandb[0m: Agent Starting Run: rbe9h118 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14572011140336333
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.012304461595369536
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.018098264982135855


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▄▃▃▃▃▃▂▂▂▂▃▃▄▂▅▃▂▂▂▃▂▂▄▃▃▃▃▂▁▃▄▅▂▂▃▂▃▄

0,1
epoch,69.0
test_loss_avg,46.41128
train loss,66.05827


[34m[1mwandb[0m: Agent Starting Run: 78u3xbda with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06862854486184733
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.00721622357239225
[34m[1mwandb[0m: 	n_epochs: 83
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.017654479408542246


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,83.0
test_loss_avg,19.29158
train loss,30.77639


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mcidi1ap with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.1317160405634459
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.012818566120584908
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016835797040238736


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,46.56389
train loss,66.11669


[34m[1mwandb[0m: Agent Starting Run: ds3kugxv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10816509387118448
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.009032470192718687
[34m[1mwandb[0m: 	n_epochs: 67
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014684797980247716


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67.0
test_loss_avg,21.09677
train loss,30.59764


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7belz72n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13101995259787907
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007846289684724042
[34m[1mwandb[0m: 	n_epochs: 72
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011913454180900784


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,72.0
test_loss_avg,17.30502
train loss,29.8144


[34m[1mwandb[0m: Agent Starting Run: y6kaaczy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.11305067329669284
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.009466043507101752
[34m[1mwandb[0m: 	n_epochs: 63
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014911843668077776


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,██▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,63.0
test_loss_avg,18.96123
train loss,32.28838


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jvfdg14d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09886280608639313
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007581180706336779
[34m[1mwandb[0m: 	n_epochs: 70
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011876201163157164


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,███▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,70.0
test_loss_avg,14.56367
train loss,28.76513


[34m[1mwandb[0m: Agent Starting Run: pk4fd02q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07029800379217369
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.008480179126200403
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008225702956555805


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,69.0
test_loss_avg,18.06107
train loss,30.15289


[34m[1mwandb[0m: Agent Starting Run: 9s7e7ov1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.06440641989251336
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.006982990433220642
[34m[1mwandb[0m: 	n_epochs: 67
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013259453135241368


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67.0
test_loss_avg,16.22618
train loss,30.18279


[34m[1mwandb[0m: Agent Starting Run: wasthqek with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.087676374136647
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007410553666192858
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01629220781333772


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,16.64274
train loss,30.55338


[34m[1mwandb[0m: Agent Starting Run: mee7rrmo with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07918453677087933
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.007926530470687775
[34m[1mwandb[0m: 	n_epochs: 66
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00963651250007067


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,66.0
test_loss_avg,17.23168
train loss,30.74706


[34m[1mwandb[0m: Agent Starting Run: w688rlya with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05919459137393467
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.006958001403412599
[34m[1mwandb[0m: 	n_epochs: 67
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010577392109502384


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▆▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67.0
test_loss_avg,14.16951
train loss,28.37402


[34m[1mwandb[0m: Agent Starting Run: ppd36qzv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.05718030340200057
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007504405675357048
[34m[1mwandb[0m: 	n_epochs: 64
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008457916742789015


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,64.0
test_loss_avg,14.94027
train loss,29.21795


[34m[1mwandb[0m: Agent Starting Run: xec4afgu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07610651146607231
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.007247391529624726
[34m[1mwandb[0m: 	n_epochs: 76
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010559770191676636


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,76.0
test_loss_avg,17.67624
train loss,30.65846


[34m[1mwandb[0m: Agent Starting Run: gnlfm98c with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05253021320536677
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008200877707262917
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013731012268879673


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,69.0
test_loss_avg,16.51367
train loss,28.34118


[34m[1mwandb[0m: Agent Starting Run: z9yb0dwr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06075340037735871
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.010961891613765757
[34m[1mwandb[0m: 	n_epochs: 63
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01339734239308095


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,63.0
test_loss_avg,18.19048
train loss,32.08892


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eof4xdhs with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14728542532865585
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.008195435662253258
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012817321507037543


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,18.51218
train loss,30.82714


[34m[1mwandb[0m: Agent Starting Run: cfs28gq6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05790948273467831
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.0069337971076589055
[34m[1mwandb[0m: 	n_epochs: 71
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010796695253588692


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██████▆▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,71.0
test_loss_avg,16.71841
train loss,28.82207


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mmat4rwx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05843943705772624
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.0075005726655277645
[34m[1mwandb[0m: 	n_epochs: 66
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015503319811168264


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,66.0
test_loss_avg,16.07674
train loss,30.74192


[34m[1mwandb[0m: Agent Starting Run: rcji4hcy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.0603046259765864
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0068558668295040925
[34m[1mwandb[0m: 	n_epochs: 68
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0074703854865560675


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,68.0
test_loss_avg,15.08496
train loss,28.51533


[34m[1mwandb[0m: Agent Starting Run: 7zph7r8c with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.05789714103358688
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007154837066129263
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007629702256709168


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▃▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,69.0
test_loss_avg,17.99965
train loss,27.43601


[34m[1mwandb[0m: Agent Starting Run: w6b8j8vq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14486314974236508
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007035475269246294
[34m[1mwandb[0m: 	n_epochs: 63
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007057856190850382


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,63.0
test_loss_avg,15.91151
train loss,30.52356


[34m[1mwandb[0m: Agent Starting Run: ydcqxo1f with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.08560263131333934
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008226700765585704
[34m[1mwandb[0m: 	n_epochs: 78
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010037812590079633


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,78.0
test_loss_avg,16.72533
train loss,30.22069


[34m[1mwandb[0m: Agent Starting Run: k1bytvz4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.0518046049911311
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007692919584870413
[34m[1mwandb[0m: 	n_epochs: 72
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012231127614937764


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,72.0
test_loss_avg,15.9841
train loss,27.69139


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qk87txac with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06596552420080452
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.006856047531657936
[34m[1mwandb[0m: 	n_epochs: 61
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011012050048862772


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,61.0
test_loss_avg,18.36443
train loss,31.22084


[34m[1mwandb[0m: Agent Starting Run: p3rgmxwm with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09225047550839616
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0071796803135493045
[34m[1mwandb[0m: 	n_epochs: 73
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008876992995021387


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,73.0
test_loss_avg,15.51343
train loss,29.89466


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cq42gznq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.1038181368960018
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.007761046719601109
[34m[1mwandb[0m: 	n_epochs: 75
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013063117712577154


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,75.0
test_loss_avg,15.58972
train loss,30.77905


[34m[1mwandb[0m: Agent Starting Run: iv292ah7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1023747822297824
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.007985682619758513
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012943237016249163


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,17.86595
train loss,30.77317


[34m[1mwandb[0m: Agent Starting Run: laq9qj9v with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06833867016651181
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.007091082614662497
[34m[1mwandb[0m: 	n_epochs: 72
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00827907486948941


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,72.0
test_loss_avg,17.43745
train loss,29.97658


[34m[1mwandb[0m: Agent Starting Run: j270ctbp with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1372372729152696
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.010312836913245667
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01234350065824426


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,84.0
test_loss_avg,20.67636
train loss,32.55338


[34m[1mwandb[0m: Agent Starting Run: v539v4eb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06650286310990128
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007085205681197431
[34m[1mwandb[0m: 	n_epochs: 71
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.009208375961490418


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,71.0
test_loss_avg,17.09688
train loss,29.33002


[34m[1mwandb[0m: Agent Starting Run: mhomyafj with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.05113439396702257
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0068167689670573595
[34m[1mwandb[0m: 	n_epochs: 70
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.006977658651536962


Alert triggered


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,▇█▇▇▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,70.0
test_loss_avg,12.89527
train loss,27.93391


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sf3qit64 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.09025983585937954
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.009586675685751032
[34m[1mwandb[0m: 	n_epochs: 83
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01595532817572243


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,83.0
test_loss_avg,20.10617
train loss,32.31012


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qewxr06y with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1214609135845733
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.012170849786519182
[34m[1mwandb[0m: 	n_epochs: 75
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013217900141790789


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,███▆▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,75.0
test_loss_avg,19.60854
train loss,34.55446


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 027jg73b with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.05052938187672711
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.006866409904154494
[34m[1mwandb[0m: 	n_epochs: 64
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011473713417060997


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,64.0
test_loss_avg,18.4447
train loss,27.72557


[34m[1mwandb[0m: Agent Starting Run: 1wb0cnyq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.06339512584683428
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007531438252282399
[34m[1mwandb[0m: 	n_epochs: 74
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00878515821039902


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,74.0
test_loss_avg,14.44086
train loss,28.58265


[34m[1mwandb[0m: Agent Starting Run: vhxzbeio with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14548162676284992
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.01009351088897074
[34m[1mwandb[0m: 	n_epochs: 86
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0154896806856064


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,86.0
test_loss_avg,19.72188
train loss,33.27084


[34m[1mwandb[0m: Agent Starting Run: 9ermseqr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.054996912401373055
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0068146878084606455
[34m[1mwandb[0m: 	n_epochs: 66
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010010789246610531


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,66.0
test_loss_avg,16.70896
train loss,28.34887


[34m[1mwandb[0m: Agent Starting Run: v4j8naxn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05235851508604397
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.006812410178630224
[34m[1mwandb[0m: 	n_epochs: 69
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008892446991976251


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,69.0
test_loss_avg,17.30093
train loss,28.13899


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7zd5646g with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.05174138345133261
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007198249435083525
[34m[1mwandb[0m: 	n_epochs: 65
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008613531561634092


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,██▄▃▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,65.0
test_loss_avg,17.42472
train loss,28.55519


[34m[1mwandb[0m: Agent Starting Run: nmtq39tx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.051559951233541824
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.006858225570723732
[34m[1mwandb[0m: 	n_epochs: 72
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014119766803553669


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,72.0
test_loss_avg,18.36097
train loss,29.3942


[34m[1mwandb[0m: Agent Starting Run: 1a4kka9l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12911092420600906
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.007629646232084642
[34m[1mwandb[0m: 	n_epochs: 83
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016856538583598233


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁

0,1
epoch,83.0
test_loss_avg,20.34368
train loss,32.24007


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8senyxbi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14786016273228786
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.016327146570077243
[34m[1mwandb[0m: 	n_epochs: 79
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016299936740421225


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,████▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,79.0
test_loss_avg,21.67382
train loss,37.71258


[34m[1mwandb[0m: Agent Starting Run: obof5xx2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05551902512294931
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008103055709572938
[34m[1mwandb[0m: 	n_epochs: 73
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01241915307635004


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▇▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,73.0
test_loss_avg,17.95127
train loss,28.86063


[34m[1mwandb[0m: Agent Starting Run: k583jtke with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12659521169356214
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.010385145267722294
[34m[1mwandb[0m: 	n_epochs: 81
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012386395736980374


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▃▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,81.0
test_loss_avg,21.34602
train loss,32.92562


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: odv9w9z1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05749579153405518
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007334135098171
[34m[1mwandb[0m: 	n_epochs: 71
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007944069374957065


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▇▆▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,71.0
test_loss_avg,15.74102
train loss,29.39372


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9a5qpi62 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06085307035740241
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.00872935389626205
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015049313629582192


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,18.65224
train loss,30.63181


[34m[1mwandb[0m: Agent Starting Run: d0c7f3y7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12508183048455476
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007468766564557402
[34m[1mwandb[0m: 	n_epochs: 87
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013882708643050462


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,87.0
test_loss_avg,16.90945
train loss,31.50734


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jfd49itq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05770778636075633
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.007731307937653572
[34m[1mwandb[0m: 	n_epochs: 68
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013091678704612178


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,68.0
test_loss_avg,15.34201
train loss,28.37421


[34m[1mwandb[0m: Agent Starting Run: l2jw899r with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07875118969362702
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.006869285724071075
[34m[1mwandb[0m: 	n_epochs: 74
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010291657524885624


Alert triggered


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,74.0
test_loss_avg,12.90782
train loss,28.4262


[34m[1mwandb[0m: Agent Starting Run: 50tsnpl9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13605002393436355
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.009227356417047916
[34m[1mwandb[0m: 	n_epochs: 79
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01075836956358017


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,79.0
test_loss_avg,20.03505
train loss,32.60411


[34m[1mwandb[0m: Agent Starting Run: 8rsodg8m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.07680498547759937
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.00712518069362375
[34m[1mwandb[0m: 	n_epochs: 90
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.017412399475351645


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,90.0
test_loss_avg,17.661
train loss,30.15057


[34m[1mwandb[0m: Agent Starting Run: nrszngzt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.05147931343362932
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.006800593593332486
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011765836605406131


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,14.85698
train loss,28.04032


[34m[1mwandb[0m: Agent Starting Run: 8syfnmvu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10182215877341688
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.0067735607341762125
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01152469730687278


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,84.0
test_loss_avg,15.28707
train loss,29.94977


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c0ylo07j with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14932165134463785
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008213046283483986
[34m[1mwandb[0m: 	n_epochs: 92
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01663912028084143


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,92.0
test_loss_avg,17.43171
train loss,31.61691


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x86b4jz7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.125056835859746
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.008110461695109372
[34m[1mwandb[0m: 	n_epochs: 88
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.017813869650363308


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,88.0
test_loss_avg,16.65815
train loss,30.51773


[34m[1mwandb[0m: Agent Starting Run: tvzl33s7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12710090020896675
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007840760878109063
[34m[1mwandb[0m: 	n_epochs: 80
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.018293893881258117


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,80.0
test_loss_avg,17.56607
train loss,31.60453


[34m[1mwandb[0m: Agent Starting Run: yq2dj80w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.08865935994637966
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.006807989736693833
[34m[1mwandb[0m: 	n_epochs: 64
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.006931903917831005


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,64.0
test_loss_avg,16.76573
train loss,27.8175


[34m[1mwandb[0m: Agent Starting Run: fa9gf9hk with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1379040666947271
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.00883353806185537
[34m[1mwandb[0m: 	n_epochs: 90
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0114819068025539


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▅█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,90.0
test_loss_avg,17.97646
train loss,32.76267


[34m[1mwandb[0m: Agent Starting Run: jvhbkrrt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.13153731636967952
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008005822110536883
[34m[1mwandb[0m: 	n_epochs: 86
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013770165509661888


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,86.0
test_loss_avg,16.86991
train loss,32.23537


[34m[1mwandb[0m: Agent Starting Run: w4vzyqml with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14445911712142978
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.00909522167454658
[34m[1mwandb[0m: 	n_epochs: 99
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015448449351435072


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
test_loss_avg,16.43827
train loss,31.98061


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u1qh9a2u with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.1280335223037254
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.006830797033960679
[34m[1mwandb[0m: 	n_epochs: 90
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016758956042616485


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,90.0
test_loss_avg,18.63153
train loss,30.44566


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9m6vkyj2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.11719766059840668
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.008116188109054855
[34m[1mwandb[0m: 	n_epochs: 91
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012122340820958406


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,91.0
test_loss_avg,18.15936
train loss,30.87224


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zv5qzao9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12056472736394548
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.010061967843951736
[34m[1mwandb[0m: 	n_epochs: 87
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01297323682022188


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▂▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▁

0,1
epoch,87.0
test_loss_avg,26.65742
train loss,33.18514


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oj1q9puf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14771909211285272
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.01097258036939693
[34m[1mwandb[0m: 	n_epochs: 87
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015109580326974011


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,87.0
test_loss_avg,18.31115
train loss,33.81263


[34m[1mwandb[0m: Agent Starting Run: hy4pizls with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.11903166114426704
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007592029052780069
[34m[1mwandb[0m: 	n_epochs: 79
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0156329807782043


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,79.0
test_loss_avg,19.75203
train loss,30.59836


[34m[1mwandb[0m: Agent Starting Run: pq125a9m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.13914329857657198
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.00736289950506286
[34m[1mwandb[0m: 	n_epochs: 92
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011237803870440871


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,92.0
test_loss_avg,17.81932
train loss,30.54021


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0rqef533 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1439297565382529
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.009248213394905771
[34m[1mwandb[0m: 	n_epochs: 80
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016978218127218978


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,80.0
test_loss_avg,18.73574
train loss,32.29803


[34m[1mwandb[0m: Agent Starting Run: ym4o6axe with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10697538344713596
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.007136958602147801
[34m[1mwandb[0m: 	n_epochs: 98
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.017608614694491743


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,98.0
test_loss_avg,18.91894
train loss,30.51912


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: co8f2j28 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13669418829398147
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.008545713002169265
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013489149467322655


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,17.20826
train loss,31.16121


[34m[1mwandb[0m: Agent Starting Run: 4408ttfs with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.13552735280491446
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007128695113778653
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01128637366898138


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,84.0
test_loss_avg,16.84088
train loss,29.58258


[34m[1mwandb[0m: Agent Starting Run: uhrj7a21 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.1370666974230303
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007417831540124065
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014990909423289612
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,15.94487
train loss,30.82325


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 163egmgr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12908255203827887
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.007256864767844138
[34m[1mwandb[0m: 	n_epochs: 85
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.017098534644334223


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,85.0
test_loss_avg,21.22038
train loss,31.46596


[34m[1mwandb[0m: Agent Starting Run: sy90zl71 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14276283858231698
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007223368181898595
[34m[1mwandb[0m: 	n_epochs: 91
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.009784150927989508


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,91.0
test_loss_avg,19.33633
train loss,30.31313


[34m[1mwandb[0m: Agent Starting Run: 6hvlgs8z with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10753542545993836
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.00724511363688908
[34m[1mwandb[0m: 	n_epochs: 81
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014377438990691872


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,81.0
test_loss_avg,18.54757
train loss,30.58659


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fahxbkqu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12656954287633243
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.00709663725913009
[34m[1mwandb[0m: 	n_epochs: 93
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015276457358230182


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,93.0
test_loss_avg,15.10581
train loss,30.19496


[34m[1mwandb[0m: Agent Starting Run: 732hlhee with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10725447925530178
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007383880411982073
[34m[1mwandb[0m: 	n_epochs: 68
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007982055899933454


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,68.0
test_loss_avg,19.09602
train loss,28.76669


[34m[1mwandb[0m: Agent Starting Run: p43ivwup with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.13192941375819234
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.01013893416333841
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010735537691388313


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,84.0
test_loss_avg,27.0713
train loss,32.17313


[34m[1mwandb[0m: Agent Starting Run: koet9pkg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.13961928122839018
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.00848714647876784
[34m[1mwandb[0m: 	n_epochs: 80
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015582291238818753


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,80.0
test_loss_avg,18.99593
train loss,31.44221


[34m[1mwandb[0m: Agent Starting Run: q3ln897u with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.050140664656617775
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007130323684896949
[34m[1mwandb[0m: 	n_epochs: 62
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015750933239164416


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,62.0
test_loss_avg,16.51788
train loss,28.40294


[34m[1mwandb[0m: Agent Starting Run: eufzlu9d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1366048793324168
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.008675060601402154
[34m[1mwandb[0m: 	n_epochs: 88
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01618346411702794


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,88.0
test_loss_avg,18.37468
train loss,32.45173


[34m[1mwandb[0m: Agent Starting Run: 6jkqkd4i with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.138444816069776
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007354278859233347
[34m[1mwandb[0m: 	n_epochs: 99
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014288946398465672


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▇▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
test_loss_avg,16.59179
train loss,30.68973


[34m[1mwandb[0m: Agent Starting Run: 4mrtwqo2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.09383746257604808
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.011318712552965888
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015025170855832546


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,20.97755
train loss,33.84179


[34m[1mwandb[0m: Agent Starting Run: alof1uet with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14914537945405226
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.009006317666643114
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016048942397897524


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,18.77682
train loss,31.32802


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wc12dtta with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12967971760618846
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.00746526045945472
[34m[1mwandb[0m: 	n_epochs: 91
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01079624896653354


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,91.0
test_loss_avg,15.06471
train loss,30.5108


[34m[1mwandb[0m: Agent Starting Run: jnotwinf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10963122064328108
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.00744262183772354
[34m[1mwandb[0m: 	n_epochs: 67
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01754857634535032


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,67.0
test_loss_avg,14.782
train loss,29.53759


[34m[1mwandb[0m: Agent Starting Run: 33gi109z with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10952983039031912
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.00931435734325334
[34m[1mwandb[0m: 	n_epochs: 93
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01481269298310169


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,93.0
test_loss_avg,19.27527
train loss,32.60307


[34m[1mwandb[0m: Agent Starting Run: rvsvob7t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10700934696796482
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007782686331864248
[34m[1mwandb[0m: 	n_epochs: 89
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014249231968706809


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,89.0
test_loss_avg,17.07782
train loss,30.73082


[34m[1mwandb[0m: Agent Starting Run: axeoopuw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.08724887327263971
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.006858143442651876
[34m[1mwandb[0m: 	n_epochs: 99
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011136835940371709


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
test_loss_avg,16.03382
train loss,29.09769


[34m[1mwandb[0m: Agent Starting Run: tscu27he with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1443506095862891
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.009076365043109408
[34m[1mwandb[0m: 	n_epochs: 99
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01800014457153849


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,99.0
test_loss_avg,17.72059
train loss,32.69041


[34m[1mwandb[0m: Agent Starting Run: 5ho0jn7m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06299656862800813
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.00825605820852455
[34m[1mwandb[0m: 	n_epochs: 92
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.006803581867255203


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,92.0
test_loss_avg,20.20073
train loss,29.64512


[34m[1mwandb[0m: Agent Starting Run: eh5dcx3d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14913278860580467
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.010068963485528538
[34m[1mwandb[0m: 	n_epochs: 60
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014923710993609248


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_loss_avg,18.47977
train loss,32.47557


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 08a16izi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10696102774627196
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.008115511300962124
[34m[1mwandb[0m: 	n_epochs: 92
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.011224485219196795


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,92.0
test_loss_avg,16.0941
train loss,30.85021


[34m[1mwandb[0m: Agent Starting Run: vi7ylil9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12444794970018531
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.010641960275615687
[34m[1mwandb[0m: 	n_epochs: 100
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014193015704157763


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,100.0
test_loss_avg,21.1645
train loss,33.23277


[34m[1mwandb[0m: Agent Starting Run: wy7unjuk with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.145828786194011
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.00712410129050329
[34m[1mwandb[0m: 	n_epochs: 80
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010475231332500872


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,80.0
test_loss_avg,16.48215
train loss,29.96483


[34m[1mwandb[0m: Agent Starting Run: vscmv1p7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.14183932604245195
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.006751517695727128
[34m[1mwandb[0m: 	n_epochs: 80
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01181621437093833


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,80.0
test_loss_avg,16.39444
train loss,29.87555


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iekko4yw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.12132574290612774
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007695085065332553
[34m[1mwandb[0m: 	n_epochs: 91
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.010375685747479912


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_loss_avg,▁
train loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▅▂▁▁▁▂

0,1
epoch,91.0
test_loss_avg,17.15
train loss,32.34944


[34m[1mwandb[0m: Agent Starting Run: llvyja1o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.081420887932115
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.01236368027254104
[34m[1mwandb[0m: 	n_epochs: 81
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01350563478687421


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,81.0
test_loss_avg,19.94997
train loss,34.20106


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 314u1jqv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14426995229962652
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.00984696768850506
[34m[1mwandb[0m: 	n_epochs: 93
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012270990185822676


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,93.0
test_loss_avg,30.30318
train loss,32.90264


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: biz1j2vl with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.10190733499242588
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007133579392588528
[34m[1mwandb[0m: 	n_epochs: 85
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.007849542759381547


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,85.0
test_loss_avg,16.42685
train loss,29.64372


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: luppy0ss with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06429467979287123
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.009213336527949789
[34m[1mwandb[0m: 	n_epochs: 62
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016163653423254654


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,████████████▇▄▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,62.0
test_loss_avg,17.71054
train loss,30.69343


[34m[1mwandb[0m: Agent Starting Run: c0g3oqsz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.13200314869949714
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.016248449148355368
[34m[1mwandb[0m: 	n_epochs: 84
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01767408502345203


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,84.0
test_loss_avg,23.25384
train loss,35.98449


[34m[1mwandb[0m: Agent Starting Run: rrcusu5t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.06254914554029814
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.01165456043743282
[34m[1mwandb[0m: 	n_epochs: 60
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013267482641209928


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▇▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_loss_avg,16.90784
train loss,33.23056


[34m[1mwandb[0m: Agent Starting Run: r5x33sq1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.1304243934357569
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.013911116956365112
[34m[1mwandb[0m: 	n_epochs: 64
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.013635843450663095


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,██▄▃▂▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁

0,1
epoch,64.0
test_loss_avg,19.3393
train loss,34.99045


[34m[1mwandb[0m: Agent Starting Run: xso55mdr with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12800440311732675
[34m[1mwandb[0m: 	l_win: 123
[34m[1mwandb[0m: 	lr: 0.0069327174058129645
[34m[1mwandb[0m: 	n_epochs: 79
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014755497119988735


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,79.0
test_loss_avg,16.43329
train loss,30.90613


[34m[1mwandb[0m: Agent Starting Run: 15cir1ar with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10772330814396268
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.011033091105461633
[34m[1mwandb[0m: 	n_epochs: 78
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016159761311797635


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,78.0
test_loss_avg,20.29513
train loss,34.0415


[34m[1mwandb[0m: Agent Starting Run: 5pjfdr0t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09335619285982535
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.00705875104576239
[34m[1mwandb[0m: 	n_epochs: 76
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.008067024766595531


[34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,76.0
test_loss_avg,16.34611
train loss,28.28363


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 187hvnip with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.09855375203936018
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.0067498954811569456
[34m[1mwandb[0m: 	n_epochs: 82
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.014115638355620543


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,82.0
test_loss_avg,15.3422
train loss,29.78478


[34m[1mwandb[0m: Agent Starting Run: nmgkrlcc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.14091645759888766
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.0068766376942713744
[34m[1mwandb[0m: 	n_epochs: 90
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.016789798002549025


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,90.0
test_loss_avg,16.2681
train loss,31.20696


[34m[1mwandb[0m: Agent Starting Run: ty4tju3s with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.12493144445804388
[34m[1mwandb[0m: 	l_win: 120
[34m[1mwandb[0m: 	lr: 0.007604293451394121
[34m[1mwandb[0m: 	n_epochs: 64
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.015357680211254904


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▄▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,64.0
test_loss_avg,18.39503
train loss,31.23905


[34m[1mwandb[0m: Agent Starting Run: g6tmtsyt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.07179324962554684
[34m[1mwandb[0m: 	l_win: 122
[34m[1mwandb[0m: 	lr: 0.008140249153567904
[34m[1mwandb[0m: 	n_epochs: 77
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.012149036964975017


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_loss_avg,▁
train loss,█▆▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,77.0
test_loss_avg,15.12482
train loss,30.13968


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4a56dqfe with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.07928805653069226
[34m[1mwandb[0m: 	l_win: 121
[34m[1mwandb[0m: 	lr: 0.006883131465551419
[34m[1mwandb[0m: 	n_epochs: 75
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00952435719548358


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,75.0
test_loss_avg,18.21297
train loss,30.56708


[34m[1mwandb[0m: Agent Starting Run: seupkr6q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 256
[34m[1mwandb[0m: 	dropout: 0.10901653703783508
[34m[1mwandb[0m: 	l_win: 125
[34m[1mwandb[0m: 	lr: 0.007633291525355519
[34m[1mwandb[0m: 	n_epochs: 60
[34m[1mwandb[0m: 	n_head: 1
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.01727859723493584


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
test_loss_avg,▁
train loss,█▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
test_loss_avg,18.11633
train loss,30.11304


[34m[1mwandb[0m: Agent Starting Run: euh2au9q with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	d_model: 23
[34m[1mwandb[0m: 	dff: 128
[34m[1mwandb[0m: 	dropout: 0.08063104636512208
[34m[1mwandb[0m: 	l_win: 124
[34m[1mwandb[0m: 	lr: 0.007784511059591378
[34m[1mwandb[0m: 	n_epochs: 78
[34m[1mwandb[0m: 	n_head: 23
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	num_workers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0108208346314236


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
!date

In [None]:
#inference.py

test_data = TimeSeriesDataset(config, mode='test')
test_loader = DataLoader(test_data,
                             batch_size=1,
                             shuffle=False,
                             num_workers=config['num_workers'])

model.to(device)
test_loss = 0.0
criterion = nn.MSELoss()
test_loss_list = list()
pred_list = list()
with torch.no_grad():
    for x, rul in test_loader:
        out = model(x.to(device).float())
        loss = criterion(out.float(), rul.to(device).float())
        test_loss += loss
        test_loss_list.append(loss)
        pred_list.append(out.float())

test_loss_avg = test_loss / len(test_loader)

# print('DONE.')
# total = (time.perf_counter() - start) / 60
# print('Inference time: {}'.format(total))
# print('-----Test loss avg-----')
# print(test_loss_avg)
# print('-----Test loss list-----')
# print(test_loss_list, len(test_loss_list))

# #Plot Test loss per id
# plot1 = plt.figure(1)
# plt.plot(test_loss_list)
# plt.xlabel("Machine id")
# plt.ylabel("Test_Loss_Per_id")

# #Plot pred vs  truth
# truth = [rul for x, rul in test_loader]
# plot2 = plt.figure(2)
# plt.plot(pred_list, color = 'red', label='Prediction')
# plt.plot(truth, color='blue', label='Ground Truth')
# plt.xlabel("Machine id")
# plt.ylabel("Remaining Useful Lifetime")
# plt.show()

In [None]:
import yaml
import matplotlib.pyplot as plt

with open('/home/quanhhh/Downloads/fd004.yaml', 'r') as f:
  config = yaml.safe_load(f)

config2 = [ float(i) for i in config['pred_list']['value'] ]

plt_train_loss = plt.figure(1)
plt.plot(config['train_loss_list']['value'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
# plt.savefig("/content/drive/MyDrive/Config/cen_003/loss_cen_003.pdf")

plt_compare = plt.figure(2)
plt.plot(config2, color = 'red', label='Prediction')
plt.plot(config['truth_list']['value'], color='blue', label='Ground Truth')
plt.legend( loc='upper right', borderaxespad=0.5)
plt.xlabel("Machine ID")
plt.ylabel("Remaining Useful Lifetime")
# plt.savefig("/content/drive/MyDrive/Config/cen_003/rul_cen_003.pdf")
plt.show()





In [None]:
import yaml
import matplotlib.pyplot as plt
import numpy as np

down, up = list(), list()
count = 0
with open('/b_rul.txt', 'r') as f:
  text = f.readlines()
  for line in text:
    data = line.split(' ')
    up.append(float(data[0])/8000)
    down.append(float(data[1])/8000)
    count += 1
    if count == 1000:
      break

x = np.arange(len(up))
plt.plot(x, down)
plt.ylabel("Rate (KiB/s)")
plt.xlabel("Time (second)")
title = 'Downlink Bandwidth'
plt.title(title)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.pdf', dpi=300)
plt.show()
plt.savefig("/content/drive/MyDrive/Figs/downlink.pdf")

plt.plot(x, up)
plt.ylabel("Rate (KiB/s)")
plt.xlabel("Time (second)")
title = 'Uplink Bandwidth'
plt.title(title)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'gpu' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'gpu' + '.pdf', dpi=300)
plt.show()
plt.savefig("/content/drive/MyDrive/Figs/uplink.pdf")



In [None]:
# Plot 
import sys
import statistics
import numpy as np
import matplotlib.pyplot as plt

count = 0
ram, cpu, gpu = list(), list(), list()
with open('/t_rul.txt', 'r') as f:
	text = f.readlines()
	for line in text:
		data = line.split(' ')
		ram.append(data[1])
		cpu.append(data[12])
		gpu.append(data[16])
		count += 1
		if count == 1000:
			break
for step in range(len(ram)):
  ram[step] = float(ram[step][:-7])/3956*100
  cpu[step] = cpu[step][1:-4].split(',')
  if len(gpu[step]) == 7:
    gpu[step] = float(gpu[step][:2])
  elif len(gpu[step]) == 6:
    gpu[step] = float(gpu[step][:1])
  else:
    gpu[step] = float(gpu[step][:1])

  for core in range(4):
    cpu[step][core] = int(cpu[step][core].split('%@')[0])
  cpu[step] = statistics.mean(cpu[step])

#print('\n--------RAM--------\n', ram)
#print('\n--------GPU--------\n', gpu)
#print('\n--------CPU--------\n', cpu)

ram = np.array(ram)
cpu = np.array(cpu)
gpu = np.array(gpu)

x = np.arange(len(ram))
plt.plot(x, ram)
plt.ylabel("RAM Usage (%)")
plt.xlabel("Time (second)")
title = 'Jetson Nano - RAM Usage - Training Phase'
plt.title(title)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.pdf', dpi=300)
plt.show()
plt.savefig("/content/drive/MyDrive/Figs/ram.pdf")



plt.plot(x, gpu)
plt.ylabel("GPU Usage (%)")
plt.xlabel("Time (second)")
title = 'Jetson Nano - GPU Usage - Training Phase'
plt.title(title)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'gpu' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'gpu' + '.pdf', dpi=300)
plt.show()
plt.savefig("/content/drive/MyDrive/Figs/gpu.pdf")



plt.plot(x, cpu)
plt.ylabel("CPU Usage (%)")
plt.xlabel("Time (second)")
title = 'Jetson Nano - CPU Usage - Training Phase'
plt.title(title)
plt.show()
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'cpu' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'cpu' + '.pdf', dpi=300)
plt.savefig("/content/drive/MyDrive/Figs/cpu.pdf")


    

In [None]:
# Plot
import yaml
import matplotlib.pyplot as plt
import numpy as np

power = list()
count = 0
with open('/t_rul.txt', 'r') as f:
  text = f.readlines()
  for line in text:
    data = line.split(' ')
    power.append(data[27])
    count += 1 
    if count == 1000:
      break

for step in range(len(power)):
  if len(power[step]) == 9:
    power[step] = float(power[step][:4])
  else:
    power[step] = float(power[step][:3])

print(power)
x = np.arange(len(power))

plt.plot(x, power)
plt.ylabel("Power Consumption (mW)")
plt.xlabel("Time (second)")
title = 'Power Consumption - Training Phase'
plt.title(title)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.eps', format='eps', dpi=300)
# plt.savefig(str(sys.argv[1]).rstrip(".txt") + '-' + 'ram' + '.pdf', dpi=300)
plt.show()
plt.savefig("/content/drive/MyDrive/Figs/power.pdf")
