In [1]:
import glob
import numpy as np
import pandas as pd
import os
import time
import torch
import torch.nn as nn
from torch import optim
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, random_split
import matplotlib.pyplot as plt
import yaml
import re
import sys
sys.path.append('..')
from APPLICATION.model.tokenization import PrettyMidiTokenizer, BCI_TOKENS, SILENCE_TOKEN
from APPLICATION.model.model import TCN

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',504)
pd.set_option('display.width',1000)


# MODEL PARAMETERS
EPOCHS = 500 # 500
LEARNING_RATE = 1 # 4
BATCH_SIZE = 4 # 16
TRAIN_MODEL = True
FEEDBACK = False
EMPHASIZE_EEG = True
EARLY_STOP = True

pwd = os.getcwd()
print(pwd)

DIRECTORY_PATH = ''


cpu
c:\Users\Gianni\Desktop\MARCO\UNI\Magistrale\TESI\Code\TCN


In [2]:
'''
Assumptions:
Sequences described as input_#.mid and output_#.mid in the corresponding folders
'''
DATASET_PATH = os.path.join(DIRECTORY_PATH, 'dataset')

print(DATASET_PATH)

input_filenames = sorted(glob.glob(os.path.join(DATASET_PATH, 'input/*.mid')))
print('Number of input files:', len(input_filenames))

output_filenames = sorted(glob.glob(os.path.join(DATASET_PATH, 'output/*.mid')))
print('Number of output files:', len(output_filenames), '\n')


INPUT_TOK = PrettyMidiTokenizer(eeg = True)
OUTPUT_TOK = PrettyMidiTokenizer()

for i, (in_file, out_file) in enumerate(zip(input_filenames, output_filenames)):

    in_file_name = os.path.basename(in_file)
    out_file_name = os.path.basename(out_file)
    print(f'{i + 1}: {in_file_name} -> {out_file_name}')

    if 'RELAX' in in_file_name:
        emotion_token = BCI_TOKENS['relaxed']
    elif 'EXCITED' in in_file_name:
        emotion_token = BCI_TOKENS['concentrated']
    else:
        raise Exception('Emotion not found in file name. Please add the emotion to the file name.')

    in_seq, in_df = INPUT_TOK.midi_to_tokens(in_file, update_vocab=True, update_sequences=True, emotion_token = emotion_token)
    out_seq, out_df = OUTPUT_TOK.midi_to_tokens(out_file, update_vocab=True, update_sequences=True)

    if i == 0:
        print(in_df)

    print(f'Input sequence length: {len(in_seq)}')
    print(f'Emotion token: {emotion_token}\n')

print(f'\nNumber of input sequences: {len(INPUT_TOK.sequences)}')
print(f'Input sequence length: {len(INPUT_TOK.sequences[0])}')
print(f'Input vocabulars size: {len(INPUT_TOK.VOCAB)}')
print(f'\nNumber of output sequences: {len(OUTPUT_TOK.sequences)}')
print(f'Output sequence length: {len(OUTPUT_TOK.sequences[0])}')
print(f'Output vocabulars size: {len(OUTPUT_TOK.VOCAB)}')

print('\nInput vocab:', INPUT_TOK.VOCAB.word2idx)
print('Output vocab:', OUTPUT_TOK.VOCAB.word2idx)

with open('training_seq.txt', 'w') as f:    
    for seq in INPUT_TOK.sequences:
        for tok in seq[:48]:
            f.write('\"' + INPUT_TOK.VOCAB.idx2word[tok] + '\", ')
        f.write('\n')




dataset
Number of input files: 6
Number of output files: 6 

1: 0_Drum_HardRock_EXCITED.mid -> 0_Bass_HardRock_EXCITED.mid
[Note(start=0.000000, end=0.250000, pitch=36, velocity=127), Note(start=0.208333, end=0.250000, pitch=42, velocity=48), Note(start=0.458333, end=0.750000, pitch=38, velocity=127), Note(start=0.708333, end=0.750000, pitch=42, velocity=54), Note(start=0.958333, end=1.000000, pitch=42, velocity=125), Note(start=0.958333, end=1.250000, pitch=36, velocity=127), Note(start=1.208333, end=1.250000, pitch=42, velocity=59), Note(start=1.458333, end=1.750000, pitch=38, velocity=127), Note(start=1.708333, end=1.750000, pitch=42, velocity=67), Note(start=1.958333, end=2.000000, pitch=42, velocity=127), Note(start=1.958333, end=2.250000, pitch=36, velocity=127), Note(start=2.458333, end=2.500000, pitch=42, velocity=127), Note(start=2.458333, end=2.750000, pitch=38, velocity=127), Note(start=2.708333, end=2.750000, pitch=42, velocity=40), Note(start=2.958333, end=3.000000, pitch=

In [29]:
# Create the dataset
dataset = TensorDataset(torch.LongTensor(INPUT_TOK.sequences).to(device),
                        torch.LongTensor(OUTPUT_TOK.sequences).to(device))

# Split the dataset into training, evaluation and test sets
train_set, eval_set, test_set = random_split(dataset, [0.8, 0.1, 0.1])

In [33]:
# Augment the training set
def data_augmentation_shift(dataset, shifts):
    '''
    Shifts the sequences by a number of ticks to create new sequences.
    '''
    augmented_sequences = []
    labels = []

    for ticks in shifts:
        for sequence, label in dataset:
            sequence = sequence.cpu().numpy().copy()

            # remove the first token since it is the emotion token
            emotion_token = sequence[0]
            sequence = sequence[1:]

            # shift the sequence
            new_sequence = np.roll(sequence, ticks)

            # add the emotion token back to the sequence
            new_sequence = np.concatenate(([emotion_token], new_sequence))

            # add the new sequence to the augmented sequences
            augmented_sequences.append(new_sequence)
            labels.append(label.cpu().numpy().copy())
    
    augmented_dataset = TensorDataset(torch.LongTensor(augmented_sequences).to(device), 
                                      torch.LongTensor(labels).to(device))
    
    # Concatenate the original and the augmented dataset
    concatenated_dataset = torch.utils.data.ConcatDataset([dataset, augmented_dataset])

    return concatenated_dataset


def data_augmentation_transposition(dataset, transpositions):
    '''
    Transpose the sequences by a number of semitones to create new sequences.

    Parameters:
    - transpositions: a list of integers representing the number of semitones to transpose the sequences.

    NB: The transposition is done by adding the number of semitones to the pitch of each note in the sequence.
    '''

    augmented_sequences = []
    labels = []

    for transposition in transpositions:
        for sequence, label in dataset:

            augmented_sequence = sequence.cpu().numpy().copy()
            label = label.cpu().numpy().copy()

            for i in range(len(augmented_sequence)):

                token = augmented_sequence[i]
                word = INPUT_TOK.VOCAB.idx2word[token]

                # check if the token is a note
                if word != SILENCE_TOKEN and word != BCI_TOKENS['relaxed'] and word != BCI_TOKENS['concentrated']:

                    # extract all the pitches from the token 
                    pitches = re.findall('\d+', word) # NB: pitches is a string list

                    # transpose each pitch in the token 
                    for pitch in pitches:
                        new_pitch = str(int(pitch) + transposition)
                        word = word.replace(pitch, new_pitch)

                    # add the new token to the vocabulary
                    INPUT_TOK.VOCAB.add_word(word) 

                    # update the sequence with the new token
                    augmented_sequence[i] = INPUT_TOK.VOCAB.word2idx[word]
            
            # update sequence with the new tokens
            augmented_sequences.append(augmented_sequence)
            labels.append(label)

    augmented_dataset = TensorDataset(torch.LongTensor(augmented_sequences).to(device), 
                                      torch.LongTensor(labels).to(device))
    
    # Concatenate the original and the augmented dataset
    concatenated_dataset = torch.utils.data.ConcatDataset([dataset, augmented_dataset])

    return concatenated_dataset

train_set_augmented = data_augmentation_shift(train_set, [-3, -2, -1, 1, 2, 3])
train_set_augmented = data_augmentation_transposition(train_set_augmented, [3,5])

print(f'Training set size before augmentation: {len(train_set)}')
print(f'Training set size after augmentation: {len(train_set_augmented)}')

print(train_set[0][0])
print(train_set_augmented[137][0])

Training set size before augmentation: 137
Training set size after augmentation: 2877
tensor([ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  7,  7,  7,  7,  8,  0,
         0,  0,  0,  0,  3,  4,  4,  4,  4,  4, 13,  0,  0,  0,  0,  0,  6,  7,
         7,  7,  7,  7,  7, 22, 23, 23, 23, 23, 75, 17,  4,  3,  4,  4,  4, 22,
        23, 23, 23, 23, 76, 77, 67, 67, 67, 67, 78, 23,  0,  0,  0,  0,  3,  4,
         4,  4,  4,  4, 28, 22, 23, 23, 23, 23, 26, 11, 12, 12, 12, 12, 19, 20,
         0,  0,  0,  0,  3, 21,  4,  3,  4,  4, 22, 23,  0,  0,  0,  0,  6, 11,
        12, 12, 12, 12, 44, 19,  0,  0,  0,  0,  0,  9,  4,  4,  4,  4,  4, 13,
         0,  0,  0,  0,  0, 10,  7,  7,  7,  7,  7,  8,  0,  0,  0,  0,  3, 21,
        15,  9, 15, 15, 19, 20,  0,  0,  0,  0, 10,  7,  7,  7,  7,  7,  8,  0,
         0,  0,  0,  0,  3,  4,  4,  4,  4,  4, 32,  7,  7,  7,  7,  7,  6,  7,
         7,  7,  7,  7, 31,  4,  4,  4,  4, 28,  0,  0])
tensor([ 2,  0,  0,  0,  0,  0,  0,  6,  7,  7,  7,  7,  

In [None]:
def initialize_dataset():

  # Create the dataloaders
  train_sampler = RandomSampler(train_set_augmented)
  train_dataloader = DataLoader(train_set_augmented, sampler=train_sampler, batch_size=BATCH_SIZE)

  eval_sampler = RandomSampler(eval_set)
  eval_dataloader = DataLoader(eval_set, sampler=eval_sampler, batch_size=BATCH_SIZE)

  test_sampler = RandomSampler(test_set)
  test_dataloader = DataLoader(test_set, sampler=test_sampler, batch_size=BATCH_SIZE)

  return train_dataloader, eval_dataloader, test_dataloader

train_dataloader, eval_dataloader, test_dataloader = initialize_dataset()

print(f'Train set size: {len(train_set)}')
print(f'Evaluation set size: {len(eval_set)}')
print(f'Test set size: {len(test_set)}')

In [7]:
# Set the hyperparameters
SEED = 1111
torch.manual_seed(SEED)

'''
IMPORTANT:
to cover all the sequence of tokens k * d must be >= hidden units (see the paper)
k = kernel_size
d = dilation = 2 ^ (n_levels - 1)
'''

OUTPUT_SIZE = len(OUTPUT_TOK.VOCAB)

if FEEDBACK:
    INPUT_SIZE = len(INPUT_TOK.VOCAB) + OUTPUT_SIZE
    LEVELS = 8
    HIDDEN_UNITS = INPUT_TOK.SEQ_LENGTH * 2 # 192 * 2 = 384
else:
    INPUT_SIZE = len(INPUT_TOK.VOCAB)
    LEVELS = 7
    HIDDEN_UNITS = INPUT_TOK.SEQ_LENGTH # 192


EMBEDDING_SIZE = 20 # size of word embeddings -> Embedding() is used to encode input token into [192, 20] real value vectors (see model.py)
NUM_CHANNELS = [HIDDEN_UNITS] * (LEVELS - 1) + [EMBEDDING_SIZE] # [192, 192, 192, 192, 192, 192, 20]
GRADIENT_CLIP = 0.35


# balance the loss function by assigning a weight to each token related to its frequency
LOSS_WEIGTHS = torch.ones([OUTPUT_SIZE], dtype=torch.float, device = device)
OUTPUT_TOK.VOCAB.compute_weights()
for i, weigth in enumerate(OUTPUT_TOK.VOCAB.weights):
    LOSS_WEIGTHS[i] = 1 - weigth
    print(f'{OUTPUT_TOK.VOCAB.idx2word[i]}: {LOSS_WEIGTHS[i]}')


def initialize_model():
  # create the model
  model = TCN(input_size = INPUT_SIZE,
              embedding_size = EMBEDDING_SIZE,
              output_size = OUTPUT_SIZE,
              num_channels = NUM_CHANNELS,
              emphasize_eeg = EMPHASIZE_EEG,
              dropout = 0.45,
              emb_dropout = 0.25,
              kernel_size = 3,
              tied_weights = False) # tie encoder and decoder weights (legare)

  model.to(device)

  # May use adaptive softmax to speed up training
  criterion = nn.CrossEntropyLoss(weight = LOSS_WEIGTHS)
  optimizer = getattr(optim, 'SGD')(model.parameters(), lr=LEARNING_RATE)

  return model, criterion, optimizer

model, criterion, optimizer = initialize_model()

print(f'\nModel created: {model}')
print(model.encoder.weight[0])


O: 0.7447054982185364
52fS: 0.9925934672355652
52f: 0.9568337202072144
55fS: 0.9973382949829102
55f: 0.9865756034851074
45fS: 0.9981483817100525
45f: 0.9897002577781677
48fS: 0.9946765303611755
48f: 0.9550977945327759
47fS: 0.9942136406898499
47f: 0.9685221910476685
43fS: 0.9954866170883179
43f: 0.9571808576583862
57fS: 0.997569739818573
57f: 0.9818308353424072
50fS: 0.9929406046867371
50f: 0.9333410263061523
40fS: 0.9968753457069397
40f: 0.9849554300308228
52pS: 0.9897002577781677
52p: 0.9525517821311951
48pS: 0.9937507510185242
48p: 0.9606527090072632
43pS: 0.9950237274169922
43p: 0.9653975367546082
47pS: 0.9960652589797974
47p: 0.9798634648323059
67pS: 0.9990741610527039
67p: 0.9974539875984192
50pS: 0.9946765303611755
50p: 0.9538248181343079
40pS: 0.9986112713813782
40p: 0.9906260967254639
55pS: 0.9962967038154602
55p: 0.9863441586494446
59pS: 0.9961810111999512
59p: 0.9812521934509277
62pS: 0.9982640743255615
62p: 0.9930563569068909
60pS: 0.998842716217041
60p: 0.9958338141441345


In [17]:
def save_parameters():

    # plot the losses over the epochs

    plt.plot(train_losses, label='train')
    plt.plot(eval_losses, label='eval')
    plt.legend()
    plt.savefig(os.path.join(RESULTS_PATH, 'losses.png'))
    plt.clf()

    # save the vocabularies
    INPUT_TOK.VOCAB.save(os.path.join(RESULTS_PATH, 'input_vocab.txt'))
    OUTPUT_TOK.VOCAB.save(os.path.join(RESULTS_PATH, 'output_vocab.txt'))

     # save the model hyperparameters in a file txt
    with open(os.path.join(RESULTS_PATH, 'model_hyperparameters.txt'), 'w') as f:

        f.write(f'DATE: {time.strftime("%Y%m%d-%H%M%S")}\n\n')

        f.write(f'-----------------DATASET------------------\n')
        f.write(f'DATASET_PATH: {DATASET_PATH}\n')
        f.write(f'TRAIN_SET_SIZE: {len(train_set)}\n')
        f.write(f'EVAL_SET_SIZE: {len(eval_set)}\n')
        f.write(f'TEST_SET_SIZE: {len(test_set)}\n\n')


        f.write(f'----------OPTIMIZATION PARAMETERS----------\n')
        f.write(f'GRADIENT_CLIP: {GRADIENT_CLIP}\n')
        f.write(f'FEEDBACK: {FEEDBACK}\n')
        f.write(f'EARLY STOPPING: {EARLY_STOP}\n')
        f.write(f'EMPHASIZE_EEG: {EMPHASIZE_EEG}\n')
        f.write(f'LEARNING_RATE: {LEARNING_RATE}\n')
        f.write(f'BATCH_SIZE: {BATCH_SIZE}\n')
        f.write(f'EPOCHS: {EPOCHS}\n\n')


        f.write(f'------------MODEL PARAMETERS--------------\n')
        f.write(f'SEED: {SEED}\n')
        f.write(f'INPUT_SIZE: {INPUT_SIZE}\n')
        f.write(f'EMBEDDING_SIZE: {EMBEDDING_SIZE}\n')
        f.write(f'LEVELS: {LEVELS}\n')
        f.write(f'HIDDEN_UNITS: {HIDDEN_UNITS}\n')
        f.write(f'NUM_CHANNELS: {NUM_CHANNELS}\n')
        f.write(f'OUTPUT_SIZE: {OUTPUT_SIZE}\n')
        f.write(f'LOSS_WEIGTHS: {LOSS_WEIGTHS}\n\n')



        f.write(f'-------------------RESULTS----------------\n')
        f.write(f'TRAIN_LOSSES: {best_train_loss}\n')
        f.write(f'BEST_EVAL_LOSS: {best_eval_loss}\n')
        f.write(f'TEST_LOSS: {test_loss}\n')
        f.write(f'BEST_MODEL_EPOCH: {best_model_epoch}\n')

    data = {
        'DATE': time.strftime("%Y%m%d-%H%M%S"),
        'INPUT_SIZE': INPUT_SIZE,
        'EMBEDDING_SIZE': EMBEDDING_SIZE,
        'NUM_CHANNELS': NUM_CHANNELS,
        'OUTPUT_SIZE': OUTPUT_SIZE,
        'KERNEL_SIZE': 3
    }

    path = os.path.join(RESULTS_PATH, 'config.yaml')
    with open(path, 'w') as file:
        yaml.safe_dump(data, file)

In [18]:
BAR_LENGTH = INPUT_TOK.BAR_LENGTH

def epoch_step(dataloader, mode):

    if FEEDBACK:
        prev_output = torch.zeros([BATCH_SIZE, INPUT_TOK.SEQ_LENGTH], dtype=torch.long, device=device)

    if mode == 'train':
        model.train()
    else:
        model.eval() # disable dropout

    total_loss = 0

    # iterate over the training data
    for batch_idx, (data, targets) in enumerate(dataloader):

        batch_idx += 1

        # mask the last bar of the input data
        batch_size = data.size(0)
        data_masked = torch.cat((data[:, :BAR_LENGTH*3], torch.ones([batch_size, BAR_LENGTH], dtype=torch.long, device = device)), dim = 1)

        if FEEDBACK:
            input = torch.cat((data_masked, prev_output[:batch_size, :]), dim = 1)
        else:
            input = data_masked

        # reset model gradients to zero
        optimizer.zero_grad()

        # make the prediction
        output = model(input)[:, :INPUT_TOK.SEQ_LENGTH]
        prev_output = torch.argmax(output, 2)# batch, seq_len (hidden units), vocab_size

        # flatten the output sequence
        # NB: the size -1 is inferred from other dimensions
        # NB: contiguous() is used to make sure the tensor is stored in a contiguous chunk of memory, necessary for view() to work

        final_target = targets.contiguous().view(-1)
        final_output = output.contiguous().view(-1, OUTPUT_SIZE)

        # calculate the loss
        loss = criterion(final_output, final_target)

        if mode == 'train':
            # calculate the gradients
            loss.backward()

            # clip the gradients to avoid exploding gradients
            if GRADIENT_CLIP > 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), GRADIENT_CLIP)

            # update the weights
            optimizer.step()

        total_loss += loss.data.item()

    return total_loss / len(dataloader)


In [19]:
def train(results_path = None):

    global RESULTS_PATH, MODEL_PATH
    global best_eval_loss, best_train_loss, best_model_epoch, train_losses, eval_losses

    if results_path is None:
        RESULTS_PATH = os.path.join('results', time.strftime("%Y%m%d_%H%M%S"))
    else:
        RESULTS_PATH = results_path
    
    if not os.path.exists(RESULTS_PATH):
        os.makedirs(RESULTS_PATH)

    MODEL_PATH = os.path.join(RESULTS_PATH, 'model_state_dict.pth')

    best_eval_loss = 1e8
    best_train_loss = 1e8
    best_model_epoch = 0
    eval_losses = []
    train_losses = []
    lr = LEARNING_RATE

    for epoch in range(1, EPOCHS+1):

        start_time = time.time()

        train_loss = epoch_step(train_dataloader, 'train')

        eval_loss = epoch_step(eval_dataloader, 'eval')

        # Save the model if the validation loss is the best we've seen so far.
        if eval_loss < best_eval_loss:
            torch.save(model.state_dict(), MODEL_PATH)
            best_eval_loss = eval_loss
            best_model_epoch = epoch

        if train_loss < best_train_loss:
            best_train_loss = train_loss

        # # Anneal the learning rate if the validation loss plateaus
        # if epoch > 5 and eval_loss >= max(eval_losses[-5:]):
        #     lr = lr / 2.
        #     if lr < 0.1:
        #         lr = 2
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr


        eval_losses.append(eval_loss)
        train_losses.append(train_loss)

        # Early stopping
        if EARLY_STOP:
          if epoch > 15:
              if min(eval_losses[-15:]) > best_eval_loss:
                  break

        # print the loss and the progress
        elapsed = time.time() - start_time
        print('| epoch {:3d}/{:3d} | lr {:02.5f} | ms/epoch {:5.5f} | train_loss {:5.2f} | eval_loss {:5.2f}' \
                .format(epoch, EPOCHS, lr, elapsed * 1000, train_loss, eval_loss))


    print('\n\n TRAINING FINISHED:\n\n\tBest Loss: {:5.2f}\tBest Model saved at epoch: {:3d} \n\n' \
            .format(best_eval_loss, best_model_epoch))


    # test the model
    global test_loss
    test_loss = epoch_step(test_dataloader, 'eval')
    print(f'\n\nTEST LOSS: {test_loss}')

    save_parameters()

In [20]:
# MODEL PARAMETERS
TRAIN_MODEL = True

EPOCHS = 500 # 500
LEARNING_RATE = 2 # 4
BATCH_SIZE = 16 # 16
EARLY_STOP = True

FEEDBACK = False
EMPHASIZE_EEG = False
model, criterion, optimizer = initialize_model()
train('results/model')

FEEDBACK = False
EMPHASIZE_EEG = True
model, criterion, optimizer = initialize_model()
train('results/model_EEG')

FEEDBACK = True
EMPHASIZE_EEG = False
model, criterion, optimizer = initialize_model()
train('results/model_feedback')

FEEDBACK = True
EMPHASIZE_EEG = True
model, criterion, optimizer = initialize_model()
train('results/model_EEG_feedback')

# if TRAIN_MODEL:

#   for i in range(2):

#     if i == 0:
#       FEEDBACK = False
#     else:
#       FEEDBACK = True

#     BATCH_SIZE = 4
#     LEARNING_RATE = 1.0
#     model, criterion, optimizer = initialize_model()
#     train()

#     LEARNING_RATE = 2.0
#     model, criterion, optimizer = initialize_model()
#     train()

#     LEARNING_RATE = 4.0
#     model, criterion, optimizer = initialize_model()
#     train()

#     LEARNING_RATE = 1.0
#     BATCH_SIZE = 8
#     train_dataloader, eval_dataloader, test_dataloader = initialize_dataset()
#     model, criterion, optimizer = initialize_model()
#     train()

#     BATCH_SIZE = 16
#     train_dataloader, eval_dataloader, test_dataloader = initialize_dataset()
#     model, criterion, optimizer = initialize_model()
#     train()

#     BATCH_SIZE = 32
#     train_dataloader, eval_dataloader, test_dataloader = initialize_dataset()
#     model, criterion, optimizer = initialize_model()
#     train()

| epoch   1/500 | lr 2.00000 | ms/epoch 2705.51252 | train_loss  3.48 | eval_loss  3.53
| epoch   2/500 | lr 2.00000 | ms/epoch 2687.58941 | train_loss  3.34 | eval_loss  3.28
| epoch   3/500 | lr 2.00000 | ms/epoch 2597.70441 | train_loss  3.15 | eval_loss  3.13
| epoch   4/500 | lr 2.00000 | ms/epoch 2639.49633 | train_loss  3.01 | eval_loss  3.37
| epoch   5/500 | lr 2.00000 | ms/epoch 2740.63635 | train_loss  2.95 | eval_loss  3.10
| epoch   6/500 | lr 2.00000 | ms/epoch 2636.67393 | train_loss  2.86 | eval_loss  2.82
| epoch   7/500 | lr 2.00000 | ms/epoch 2643.39328 | train_loss  2.75 | eval_loss  2.69
| epoch   8/500 | lr 2.00000 | ms/epoch 2591.90536 | train_loss  2.63 | eval_loss  2.79
| epoch   9/500 | lr 2.00000 | ms/epoch 2671.51642 | train_loss  2.54 | eval_loss  2.45
| epoch  10/500 | lr 2.00000 | ms/epoch 2709.18822 | train_loss  2.47 | eval_loss  2.43
| epoch  11/500 | lr 2.00000 | ms/epoch 2692.83223 | train_loss  2.41 | eval_loss  2.26
| epoch  12/500 | lr 2.00000 | m

  return F.conv1d(input, weight, bias, self.stride,


| epoch   1/500 | lr 2.00000 | ms/epoch 2789.24727 | train_loss  3.46 | eval_loss  3.44
| epoch   2/500 | lr 2.00000 | ms/epoch 2625.35381 | train_loss  3.29 | eval_loss  3.40
| epoch   3/500 | lr 2.00000 | ms/epoch 2722.38803 | train_loss  3.15 | eval_loss  3.21
| epoch   4/500 | lr 2.00000 | ms/epoch 2641.48498 | train_loss  3.00 | eval_loss  3.07
| epoch   5/500 | lr 2.00000 | ms/epoch 2584.58400 | train_loss  2.91 | eval_loss  2.83
| epoch   6/500 | lr 2.00000 | ms/epoch 2547.05811 | train_loss  2.80 | eval_loss  2.99
| epoch   7/500 | lr 2.00000 | ms/epoch 4374.59540 | train_loss  2.73 | eval_loss  2.63
| epoch   8/500 | lr 2.00000 | ms/epoch 4579.25010 | train_loss  2.65 | eval_loss  2.55
| epoch   9/500 | lr 2.00000 | ms/epoch 4403.08499 | train_loss  2.58 | eval_loss  2.54
| epoch  10/500 | lr 2.00000 | ms/epoch 4579.23818 | train_loss  2.52 | eval_loss  2.52
| epoch  11/500 | lr 2.00000 | ms/epoch 4430.42421 | train_loss  2.43 | eval_loss  2.29
| epoch  12/500 | lr 2.00000 | m

<Figure size 640x480 with 0 Axes>