# 能運作

## 此 install 是用來debug的。install 過後，要重新啟動工作階段才能運行成功

In [None]:
!pip install accelerate==0.23.0

## 使用 wandb 繪製訓練圖

In [19]:
!pip install wandb
%env WANDB_LOG_MODEL=true
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mnrnmnrn[0m. Use [1m`wandb login --relogin`[0m to force relogin


env: WANDB_LOG_MODEL=true


True

## 載入資料

In [None]:
#載入資料
import pandas as pd
import torch
input_20_songs_sequence = ['song_id_13 song_id_16 song_id_17 song_id_6 song_id_17 song_id_2 song_id_17 song_id_14 song_id_13 song_id_2 song_id_11 song_id_18 song_id_13 song_id_12 song_id_16 song_id_1 song_id_6 song_id_15 song_id_10 song_id_16', 'song_id_18 song_id_19 song_id_5 song_id_7 song_id_17 song_id_12 song_id_6 song_id_15 song_id_19 song_id_1 song_id_20 song_id_19 song_id_17 song_id_19 song_id_19 song_id_8 song_id_8 song_id_5 song_id_3 song_id_7']
output_5_songs_sequence = ['song_id_7 song_id_12 song_id_18 song_id_5 song_id_18', 'song_id_6 song_id_19 song_id_8 song_id_15 song_id_14']
data = {'input_20_songs_sequence': input_20_songs_sequence, 'output_5_songs_sequence': output_5_songs_sequence}
df = pd.DataFrame(data)
song_to_int = {f'song_id_{i}': i+1 for i in range(21)}
int_to_song = {i+1: f'song_id_{i+1}' for i in range(21)}
int_to_song[0] = '[PAD]'

## 描述

In [None]:
#使用Huggingface的生成式模型，並訓練模型
#輸入為使用者聽的前20首歌，預測接下來會聽的5首歌
#預測的5首歌都要不同。若預測的5首歌有重複，則視為預測錯誤
#輸入的20首歌中，有可能有重複的歌曲

## 載入模型

In [None]:
#載入模型
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

## 載入訓練資料

In [None]:
#載入訓練資料
from torch.utils.data import Dataset

class SongDataset(Dataset):
    def __init__(self, df, tokenizer, song_to_int):
        self.df = df
        self.tokenizer = tokenizer
        self.song_to_int = song_to_int
        self.max_len = 25

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        input_20_songs_sequence = self.df.iloc[idx]['input_20_songs_sequence']
        output_5_songs_sequence = self.df.iloc[idx]['output_5_songs_sequence']
        input_20_songs_sequence = [self.song_to_int[song] for song in input_20_songs_sequence.split(' ')]
        output_5_songs_sequence = [self.song_to_int[song] for song in output_5_songs_sequence.split(' ')]
        input_20_songs_sequence = [0] + input_20_songs_sequence
        output_5_songs_sequence = [0] + output_5_songs_sequence
        input_20_songs_sequence = input_20_songs_sequence + [0] * (self.max_len - len(input_20_songs_sequence))
        output_5_songs_sequence = output_5_songs_sequence + [0] * (self.max_len - len(output_5_songs_sequence))
        input_20_songs_sequence = torch.tensor(input_20_songs_sequence)
        output_5_songs_sequence = torch.tensor(output_5_songs_sequence)
        return input_20_songs_sequence, output_5_songs_sequence

train_dataset = SongDataset(df, tokenizer, song_to_int)

## 訓練模型


In [1]:
#訓練模型
from transformers import Trainer, TrainingArguments
from transformers import default_data_collator

def my_data_collator(features):
    # Your custom data collation logic here
    input_20_songs_sequence, output_5_songs_sequence = zip(*features)

    # Padding sequences to the maximum length
    input_20_songs_sequence = torch.nn.utils.rnn.pad_sequence(input_20_songs_sequence, batch_first=True, padding_value=0)
    output_5_songs_sequence = torch.nn.utils.rnn.pad_sequence(output_5_songs_sequence, batch_first=True, padding_value=0)

    return {
        'input_ids': input_20_songs_sequence,  # Rename to 'input_ids'
        'labels': output_5_songs_sequence,  # Rename to 'labels'
    }


training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    save_steps=500,
    save_total_limit=1,
    evaluation_strategy='steps',
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model='eval_loss',
    greater_is_better=False,
    report_to="wandb"
)

# Update the Trainer instantiation to include the callback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=my_data_collator,
)

trainer.train()

ImportError: ignored

## 使用範例

In [None]:
input_20_songs = 'song_id_13 song_id_16 song_id_17 song_id_6 song_id_17 song_id_2 song_id_17 song_id_14 song_id_13 song_id_2 song_id_11 song_id_18 song_id_13 song_id_12 song_id_16 song_id_1 song_id_6 song_id_15 song_id_10 song_id_16' #20首歌
input_20_songs = [song_to_int[song] for song in input_20_songs.split(' ')]
input_20_songs = torch.tensor(input_20_songs)
input_20_songs = input_20_songs.unsqueeze(0)
input_20_songs = input_20_songs.to(model.device)

output_5_songs = model.generate(input_ids=input_20_songs, max_length=25, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
output_5_songs = output_5_songs.tolist()[0][:5]
output_5_songs = [int_to_song[song] for song in output_5_songs]
print(output_5_songs)

---
# 底下為測試用

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class SongRecommendationModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers=2):
        super(SongRecommendationModel, self).__init__()

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=hidden_size, nhead=1), num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, src):
        src_embedded = self.embedding(src)
        src_embedded = src_embedded.permute(1, 0, 2)  # Change the sequence length and batch size dimensions
        output = self.transformer(src_embedded)
        output = self.fc(output[-1, :, :])  # Take the output of the last time step
        return output



# 將字符串映射到整數
song_id_mapping = {}
reverse_song_id_mapping = {}
unique_songs = set()

def map_songs(data):
    for row in data:
        for song_id in row:
            if song_id not in unique_songs:
                unique_songs.add(song_id)
                idx = len(unique_songs) - 1
                song_id_mapping[song_id] = idx
                reverse_song_id_mapping[idx] = song_id

input_data = [['6027767fad949f3ca5e772df04924949', '041547bddb0a3e730f32db84c65868ca',
               '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca',
               '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53',
               '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc',
               'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca',
               'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949',
               '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949'],
              ['883d4ffa999d2f7c8f5293d85112da49', '883d4ffa999d2f7c8f5293d85112da49',
               '883d4ffa999d2f7c8f5293d85112da49', 'ecfed150865a7129690805286222656d',
               'd36c6cf30154e18e6c972704206d6b1e', 'd36c6cf30154e18e6c972704206d6b1e',
               'c7170f4c6488a8f9013f0e4eadf9b385', 'c7170f4c6488a8f9013f0e4eadf9b385',
               '940d87a98fef6e456a3f59ecd7e88f63', '883d4ffa999d2f7c8f5293d85112da49',
               'f6407930f4a8e921df43911dad3847a3', '4917c1184063708092051859415be029',
               '3419b303ba51124a091cde496c6a0c16', 'f57c28ff61e365a82c7a00267d21c96e',
               '0d488acd5aa820a96e84f9488f03e335', '807653562fa6eb36cf75dee0279fb124',
               '33441a5f6fb494f0d0021f2585c91305', 'fb9b6b981cc1996542d5d81d47b459af',
               '65719c6edaa80d0880940c0e20c5e499', '7c4bd89cc6d7c6c91a38d58c2808b1b9']]

map_songs(input_data)

# 將字符串轉換為整數標識符
input_data = [[song_id_mapping[song_id] for song_id in row] for row in input_data]

# 將輸出的字符串轉換為整數標識符
output_data = [['75c2aa348888f982d85e3f870e6ba5b2', '0cab8863e5440551c7b37e59635ec18e',
                '4d5aceee5c9731151ca69f0946ffa71f', '929b07d69451684f4f0f6e3bcc2a62d6',
                '12ae4e616d3e5c7bd53ec771797f596b'],
               ['34f1a786e245f2886ab99b0062de906c', 'd8ec0f80ee6b4457f12e74aa469335d6',
                'd63dbd5214a39f50100c8d59f1c24d6a', 'c1550c264fb083b3acffe619bd02d75e',
                '61a3b37f326394081b95196a5eb676b8']]
map_songs(output_data)

# 將輸出的字符串轉換為整數標識符
output_data = [[song_id_mapping[song_id] for song_id in row] for row in output_data]

output_size = 5
hidden_size = 128
num_layers = 2

# 初始化模型
model = SongRecommendationModel(input_size=len(unique_songs),
                                output_size=output_size,
                                hidden_size=hidden_size,
                                num_layers=num_layers)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 將你提供的input和output轉換成PyTorch的tensor
input_data = torch.tensor(input_data, dtype=torch.long)
output_data = torch.tensor(output_data, dtype=torch.long)

# 訓練模型
num_epochs = 1000
for epoch in range(num_epochs):
    # 正向傳播
    outputs = model(input_data)
    loss = criterion(outputs, output_data.view(-1))  # Flatten the output_data tensor

    # 反向傳播和優化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 測試模型
test_input = torch.tensor(input_data, dtype=torch.long)
with torch.no_grad():
    model.eval()
    predicted_output = model(test_input)
    _, predicted_indices = torch.topk(predicted_output, output_size, dim=1)

# 將預測的索引轉換為歌曲ID
predicted_song_ids = [[reverse_song_id_mapping[idx] for idx in row] for row in predicted_indices.numpy()]

# 打印輸出
print("Predicted Song IDs:")
print(predicted_song_ids)


ValueError: ignored

In [None]:
# 使用模型進行預測
input_test = [['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T']]
input_test_indices = torch.tensor([convert_songs_to_indices(songs, {song: i for i, song in enumerate(total_song)}) for songs in input_test])
input_test_indices = input_test_indices.squeeze(0)  # 移除批次維度
output_logits = model(input_test_indices)

# 將預測轉換為歌曲
output_indices = output_logits.argmax(dim=-1).tolist()
output_songs = [total_song[idx] for idx in output_indices]
print(output_songs)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(TransformerModel, self).__init__()

        # Assuming the input and output embeddings have the same size
        embed_size = 128
        self.embedding = nn.Embedding(input_size, embed_size)
        self.transformer = nn.Transformer(
            d_model=embed_size,
            nhead=2,
            num_encoder_layers=2,
            num_decoder_layers=2
        )
        self.fc = nn.Linear(embed_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)  # Change the shape to (seq_len, batch_size, embed_size)
        output = self.transformer(x, x)
        output = self.fc(output[-1, :, :])  # Take the last layer's output for classification
        return output

total_song = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names):
    return [total_song.index(song) for song in total_song]

# Example usage
input_data = [['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T'],['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2']]
label_data = [['U', 'V', 'W', 'X', 'Y'], ['U2', 'V2', 'W2', 'X2', 'Y2']]
input_size = 200
output_size = 5

# Create the model
model = TransformerModel(input_size, output_size)

# Convert input and label to indices
input_data = [song_to_index(input_list) for input_list in input_data]
label_data = [song_to_index(label_list) for label_list in label_data]

input_tensor = torch.tensor(input_data).unsqueeze(0)  # Add batch dimension
label_tensor = torch.tensor(label_data).unsqueeze(0)  # Add batch dimension

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 1
for epoch in range(epochs):
    # Forward pass
    output = model(input_tensor)
    loss = criterion(output, label_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}')

# Test the model
model.eval()
with torch.no_grad():
    test_input = torch.tensor(song_to_index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T'], total_song)).unsqueeze(0)
    output = model(test_input)
    _, predicted = torch.max(output, 1)
    predicted_songs = [total_song[idx] for idx in predicted.tolist()[0]]
    print(f'Predicted songs: {predicted_songs}')




RuntimeError: ignored

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(TransformerModel, self).__init__()

        # Assuming the input and output embeddings have the same size
        embed_size = 128
        self.embedding = nn.Embedding(input_size, embed_size)
        self.transformer = nn.Transformer(
            d_model=embed_size,
            nhead=2,
            num_encoder_layers=2,
            num_decoder_layers=2
        )
        self.fc = nn.Linear(embed_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)  # Change the shape to (seq_len, batch_size, embed_size)
        output = self.transformer(x, x)
        output = self.fc(output[-1, :, :])  # Take the last layer's output for classification
        return output

total_songs = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names, total_songs):
    return [total_songs.index(song) for song in song_names]

# Example usage
input_data = [['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T'],['A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2']]
label_data = [['U', 'V', 'W', 'X', 'Y'], ['U2', 'V2', 'W2', 'X2', 'Y2']]
input_size = len(total_songs)
output_size = 5

# Create the model
model = TransformerModel(input_size, output_size)

# Convert input and label to indices
input_data = [song_to_index(input_list, total_songs) for input_list in input_data]
label_data = [song_to_index(label_list, total_songs) for label_list in label_data]

input_tensor = torch.tensor(input_data).unsqueeze(0)  # Add batch dimension
label_tensor = torch.tensor(label_data).squeeze()  # Remove the batch dimension from labels

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10
for epoch in range(epochs):
    # Forward pass
    output = model(input_tensor)
    loss = criterion(output, label_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}')

# Test the model
model.eval()
with torch.no_grad():
    test_input = torch.tensor(song_to_index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T'], total_songs)).unsqueeze(0)
    output = model(test_input)
    _, predicted = torch.max(output, 1)
    predicted_songs = [total_songs[idx] for idx in predicted.tolist()]
    print(f'Predicted songs: {predicted_songs}')


RuntimeError: ignored

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig

# 定義BERT模型
class MyBertModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyBertModel, self).__init__()
        self.bert = BertModel(
            BertConfig(
                vocab_size=input_size,
                hidden_size=768,  # You can adjust the hidden size as needed
                num_hidden_layers=12,  # You can adjust the number of layers as needed
                num_attention_heads=12,  # You can adjust the number of attention heads as needed
                intermediate_size=3072,  # You can adjust the intermediate size as needed
            )
        )
        self.fc = nn.Linear(768, output_size)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        logits = self.fc(outputs.pooler_output)
        return logits

# 設定輸入和輸出大小
input_size = 20000  # Adjust based on your vocabulary size
output_size = 5

# 初始化模型
bert_model = MyBertModel(input_size, output_size)

# 初始化tokenizer
total_songs = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names):
    string = ''
    for song in song_names:
      string += total_songs.index(song)
    return [total_songs.index(song) for song in song_names]

# 定義輸入
input_text = ['A B C D E F G H I J K L M N O P Q R S T', 'A2 B2 C2 D2 E2 F2 G2 H2 I2 J2 K2 L2 M2 N2 O2 P2 Q2 R2 S2 T2']
input_ids_list = [song_to_index for text in input_text]

# 定義標籤
label_text = ['U V W X Y', 'U2 V2 W2 X2 Y2']
label_ids_list = [tokenizer.encode(label_text, return_tensors='pt', padding=False, truncation=True, max_length=10) for text in label_text]

# 訓練模型
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(bert_model.parameters(), lr=0.001)

# 訓練迴圈
epochs = 3
for epoch in range(epochs):
    for input_ids, label_ids in zip(input_ids_list, label_ids_list):
        optimizer.zero_grad()
        outputs = bert_model(input_ids, attention_mask=(input_ids != 0))
        loss = criterion(outputs, label_ids.squeeze())
        loss.backward()
        optimizer.step()

# 測試模型
with torch.no_grad():
    for input_ids in input_ids_list:
        output_ids = bert_model(input_ids, attention_mask=(input_ids != 0)).topk(output_size).indices[0].tolist()
        output_songs = [total_song[i] for i in output_ids]

        # 打印結果
        print("Input Songs:", tokenizer.decode(input_ids[0], skip_special_tokens=True).split())
        print("Generated Songs:", output_songs)


AttributeError: ignored

In [None]:
import torch
from transformers import BertModel, BertTokenizer

# 使用BERT模型和标记器
model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 定义新的输入形状
max_seq_len = 128
input_ids = torch.randint(0, 1000, (1, max_seq_len))
attention_mask = torch.ones((1, max_seq_len))

# 用新的输入调用model
output = model(input_ids, attention_mask)

# 将模型的输出堆叠成固定的形状
output_shape = (5, 768)
fixed_output = torch.zeros(output_shape)
fixed_output[:, :] = output[0][:, 0, :]  # 将输出的第一个位置的 [CLS] token 的隐藏状态复制到 fixed_output

# 显示 fixed_output 的形状
print(fixed_output.shape)  # 输出：torch.Size([1, 768])


torch.Size([5, 768])


In [None]:
from datetime import datetime, timedelta
import torch
from transformers import BertModel, BertTokenizer

# 使用BERT模型和标记器
model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 假设 song_ids 是包含20首歌曲id的列表
song_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]

# 将歌曲id转换为字符串，并使用空格连接
song_ids_str = ' '.join(map(str, song_ids))

# 定义新的输入形状
max_seq_len = 128  # 可根据需要调整
input_ids = tokenizer.encode(song_ids_str, add_special_tokens=True, max_length=max_seq_len, pad_to_max_length=True, truncation=True)
attention_mask = torch.ones((1, max_seq_len))

# 用新的输入调用model
output = model(torch.tensor([input_ids]), attention_mask)

# 提取 [CLS] token 的隐藏状态
cls_embedding = output[0][:, 0, :]

# 将隐藏状态转换为字符串
final_output_str = ' '.join(map(str, cls_embedding.tolist()))

# 显示 final_output_str
print(final_output_str)


[-0.3195570111274719, 0.4280405044555664, 0.5585589408874512, 0.4973354935646057, -0.007525980938225985, -0.44585809111595154, 0.2989266812801361, -0.01971552148461342, 0.1744040697813034, -0.14784805476665497, -0.2903769612312317, -0.48642697930336, -0.07700340449810028, 0.4377892315387726, 0.4711739420890808, 1.070137619972229, -0.3097708523273468, 0.5950692296028137, -0.3302118480205536, 0.258063405752182, 0.5387468934059143, 0.3174436092376709, 0.782282829284668, 0.06433650851249695, -0.6479944586753845, 0.02694805897772312, 0.1207866370677948, -0.9755364656448364, -0.49601444602012634, 0.20584125816822052, -0.6337382197380066, 0.3621390759944916, 0.15745748579502106, -0.2028491199016571, 0.0149107426404953, -0.22828947007656097, -0.3661006689071655, -0.10994458943605423, 0.6241545677185059, 0.2743743360042572, 0.1105203926563263, -0.5393616557121277, 0.5848770141601562, -0.05721202492713928, 0.20648832619190216, -0.4382016956806183, -3.2039926052093506, 0.2949097156524658, -0.0121

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig

# 定義BERT模型
class MyBertModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyBertModel, self).__init__()
        self.bert = BertModel(
            BertConfig(
                vocab_size=input_size,
                hidden_size=768,  # You can adjust the hidden size as needed
                num_hidden_layers=12,  # You can adjust the number of layers as needed
                num_attention_heads=12,  # You can adjust the number of attention heads as needed
                intermediate_size=3072,  # You can adjust the intermediate size as needed
            )
        )
        self.fc = nn.Linear(768, output_size)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        logits = self.fc(outputs.pooler_output)
        return logits
# 初始化tokenizer
total_songs = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names):
    string = ''
    for song in song_names:
      string += total_songs.index(song)
    return [total_songs.index(song) for song in song_names]
# 設定輸入和輸出大小
input_size = 20000  # Adjust based on your vocabulary size
output_size = 5

# 初始化模型
bert_model = MyBertModel(input_size, output_size)

# 初始化tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 定義輸入
input_text = 'A B C D E F G H I J K L M N O P Q R S T'
input_ids = tokenizer.encode(input_text, return_tensors='pt')

# 生成輸出
attention_mask = torch.ones_like(input_ids)  # 簡單使用全1的attention mask
with torch.no_grad():
    output_ids = bert_model(input_ids, attention_mask=attention_mask).topk(output_size).indices[0].tolist()

# 將輸出轉換為歌曲名稱
output_songs = [total_songs[i] for i in output_ids]

# 打印結果
print("Input Songs:", input_text.split())
print("Generated Songs:", output_songs)


Input Songs: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T']
Generated Songs: ['B', 'D', 'E', 'C', 'A']


In [None]:
# 导入必要的库
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax

# 加载预训练的BERT模型和分词器
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 定义输入和输出的歌曲数量
num_input_songs = 20
num_output_songs = 5

# 定义输入歌曲id和输出歌曲id的字符串
input_song = 'id_of_song_1 id_of_song_2 id_of_song_3 id_of_song_4 id_of_song_5 id_of_song_6 id_of_song_7 id_of_song_8 id_of_song_9 id_of_song_10 id_of_song_11 id_of_song_12 id_of_song_13 id_of_song_14 id_of_song_15 id_of_song_16 id_of_song_17 id_of_song_18 id_of_song_19 id_of_song_20'

# 将输入歌曲id分词并转换为模型输入
input_tokens = tokenizer.encode_plus(input_song, add_special_tokens=True, return_tensors='pt')
input_tokens = input_tokens.to(device)

# 使用BERT模型进行推断
with torch.no_grad():
    logits = model(**input_tokens).logits

# 对模型输出进行softmax，得到每首歌的概率分布
probs = softmax(logits, dim=1)[0]

# 获取概率最高的5首歌的索引
top5_song_indices = torch.topk(probs, num_output_songs).indices

# 打印输出歌曲id
output_song = ' '.join([f'id_of_song_{i+1}' for i in top5_song_indices])
print("Output Song IDs:", output_song)


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: ignored

In [None]:
input_song = 'id_of_song_1 id_of_song_2 id_of_song_3 id_of_song_4 id_of_song_5 id_of_song_6 id_of_song_7 id_of_song_8 id_of_song_9 id_of_song_10 id_of_song_11 id_of_song_12 id_of_song_13 id_of_song_14 id_of_song_15 id_of_song_16 id_of_song_17 id_of_song_18 id_of_song_19 id_of_song_20'
output_song = 'id_of_song_2 id_of_song_5 id_of_song_20 id_of_song_11 id_of_song_17'

In [None]:
import random
input = []
for j in range(2):
  text = ''
  for i in range(5):
    text += "song_id_"+str(random.randint(1,20))+" "
  input.append(text[:-1])

In [None]:
print(input)

['song_id_7 song_id_12 song_id_18 song_id_5 song_id_18', 'song_id_6 song_id_19 song_id_8 song_id_15 song_id_14']


In [None]:
total_songs = []
for word in input_song.split(" "):
  total_songs.append(word)
print(total_songs)

['id_of_song_1', 'id_of_song_2', 'id_of_song_3', 'id_of_song_4', 'id_of_song_5', 'id_of_song_6', 'id_of_song_7', 'id_of_song_8', 'id_of_song_9', 'id_of_song_10', 'id_of_song_11', 'id_of_song_12', 'id_of_song_13', 'id_of_song_14', 'id_of_song_15', 'id_of_song_16', 'id_of_song_17', 'id_of_song_18', 'id_of_song_19', 'id_of_song_20']


In [None]:
# 生成輸出
with torch.no_grad():
    logits = bert_model(input_ids, attention_mask=attention_mask)
    output_ids = logits.argmax(dim=-1)[0].tolist()

# 將輸出轉換為歌曲名稱，保證包含所有歌曲
output_songs = [total_songs[i] for i in output_ids]

# 打印結果
print("Input Songs:", input_text.split())
print("Generated Songs:", output_songs)


TypeError: ignored

In [None]:
logits

tensor([[ 0.0020,  0.3746, -0.0292,  0.1410,  0.1362]])

In [None]:
outputs

tensor([[ 0.1373,  0.0085, -0.1494, -0.0316,  0.0720]],
       grad_fn=<AddmmBackward0>)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer

# 定義自定義的數據集
class MyDataset(Dataset):
    def __init__(self, input_text, label_text, tokenizer, max_length=20):
        self.input_text = input_text
        self.label_text = label_text
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.input_text)

    def __getitem__(self, idx):
        input_ids = self.tokenizer.encode(self.input_text[idx], return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)
        label_ids = self.tokenizer.encode(self.label_text[idx], return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_length)
        return {'input_ids': input_ids, 'label_ids': label_ids}
# 初始化tokenizer
total_songs = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names):
    return [total_songs.index(song) for song in song_names]

# 設定輸入和輸出大小
input_size = 20000  # Adjust based on your vocabulary size
output_size = 5

# 初始化模型
bert_model = MyBertModel(input_size, output_size)

# 初始化tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 定義輸入
input_texts = ['A B C D E F G H I J K L M N O P Q R S T ', 'A2 B2 C2 D2 E2 F2 G2 H2 I2 J2 K2 L2 M2 N2 O2 P2 Q2 R2 S2 T2 ']
label_texts = ['U V W X Y ', 'U2 V2 W2 X2 Y2 ']

# 創建數據集和數據載入器
dataset = MyDataset(input_text, label_text, tokenizer)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# 訓練模型
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(bert_model.parameters(), lr=0.001)

# 訓練迴圈
epochs = 3
for epoch in range(epochs):
    for input_text, label_text in zip(input_texts, label_texts):
        input_ids = tokenizer.encode(input_text, return_tensors='pt')
        label_ids = tokenizer.encode(label_text, return_tensors='pt')

        optimizer.zero_grad()
        outputs = bert_model(input_ids, attention_mask=(input_ids != 0))

        # Assuming label_ids is your modified tensor
        loss = criterion(outputs, label_ids)

        loss.backward()
        optimizer.step()

# 測試模型
with torch.no_grad():
    for batch in dataloader:
        input_ids = batch['input_ids']
        output_ids = bert_model(input_ids, attention_mask=(input_ids != 0)).topk(output_size).indices[0].tolist()
        output_songs = [total_song[i] for i in output_ids]

        # 打印結果
        print("Input Songs:", tokenizer.decode(input_ids[0][0], skip_special_tokens=True).split())
        print("Generated Songs:", output_songs)


ValueError: ignored

In [None]:
loss = criterion(outputs, label_ids)

tensor([[0.0331, 0.4820, 0.1957, 0.4740, 0.3058]], grad_fn=<AddmmBackward0>)

In [None]:
outputs

tensor([[0.0331, 0.4820, 0.1957, 0.4740, 0.3058]], grad_fn=<AddmmBackward0>)

In [None]:
label_ids

tensor([[ 101, 1057, 1058, 1059, 1060, 1061,  102]])

In [None]:
import torch
import torch.nn as nn

# 模型的输出
outputs = torch.tensor([[0.0331, 0.4820, 0.1957, 0.4740, 0.3058]], requires_grad=True)

# 目标标签的类别索引
label_ids = torch.tensor([[101, 1057, 1058, 1059, 1060, 1061, 102]])

# 将模型输出转换为类别索引
_, predicted_labels = outputs.max(dim=1)

# 使用交叉熵损失函数
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, predicted_labels)

# 打印损失
print(loss.item())


1.439799427986145


In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer, BertConfig

# 定義BERT模型
class MyBertModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyBertModel, self).__init__()
        self.bert = BertModel(
            BertConfig(
                vocab_size=input_size,
                hidden_size=768,  # You can adjust the hidden size as needed
                num_hidden_layers=12,  # You can adjust the number of layers as needed
                num_attention_heads=12,  # You can adjust the number of attention heads as needed
                intermediate_size=3072,  # You can adjust the intermediate size as needed
            )
        )
        self.fc = nn.Linear(768, output_size)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        logits = self.fc(outputs.pooler_output)
        return logits

# 初始化tokenizer
total_songs = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2', 'J2', 'K2', 'L2', 'M2', 'N2', 'O2', 'P2', 'Q2', 'R2', 'S2', 'T2','U2','V2','W2','X2','Y2']
# Convert song names to indices
def song_to_index(song_names):
    return [total_songs.index(song) for song in song_names]

# 設定輸入和輸出大小
input_size = 20000  # Adjust based on your vocabulary size
output_size = 5

# 初始化模型
bert_model = MyBertModel(input_size, output_size)

# 初始化tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 定義輸入
input_texts = ['A B C D E F G H I J K L M N O P Q R S T ', 'A2 B2 C2 D2 E2 F2 G2 H2 I2 J2 K2 L2 M2 N2 O2 P2 Q2 R2 S2 T2 ']
label_texts = ['U V W X Y ', 'U2 V2 W2 X2 Y2 ']

# 處理成模型可接受的格式
input_ids_list = [tokenizer.encode(text, return_tensors='pt', padding='max_length', truncation=True, max_length=20) for text in input_texts]
label_ids_list = [tokenizer.encode(text, return_tensors='pt', padding='max_length', truncation=True, max_length=20) for text in label_texts]

# 生成輸出
with torch.no_grad():
    output_ids_list = [bert_model(input_ids, attention_mask=(input_ids != 0)).topk(output_size).indices[0].tolist() for input_ids in input_ids_list]

# 將輸出轉換為歌曲名稱
output_songs_list = [[total_songs[i] for i in output_ids] for output_ids in output_ids_list]

# 打印結果
for input_text, output_songs in zip(input_texts, output_songs_list):
    print("Input Songs:", input_text.split())
    print("Generated Songs:", output_songs)


IndexError: ignored

In [None]:
import torch
import torch.nn as nn
from transformers import BertModel, BertConfig
import torch.optim as optim

class SongRecommendationModel(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(SongRecommendationModel, self).__init__()

        # 使用BertModel作為基礎模型
        self.bert = BertModel(BertConfig(hidden_size=hidden_size))
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input_ids):
        # 使用BertModel獲取輸入序列的表示
        outputs = self.bert(input_ids)
        pooled_output = outputs.pooler_output

        # 使用全連接層進行預測
        output = self.fc(pooled_output)
        return output.view(-1, output_size)

# 設置模型參數
input_size = 20  # 輸入序列的長度
output_size = 5  # 輸出序列的長度
hidden_size = 768  # 使用Bert模型的預設隱藏維度

# 初始化模型
model = SongRecommendationModel(input_size=input_size, output_size=output_size, hidden_size=hidden_size)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
input_data_str = [['6027767fad949f3ca5e772df04924949', '041547bddb0a3e730f32db84c65868ca',
               '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca',
               '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53',
               '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc',
               'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca',
               'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
               'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949',
               '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949'],
              ['883d4ffa999d2f7c8f5293d85112da49', '883d4ffa999d2f7c8f5293d85112da49',
               '883d4ffa999d2f7c8f5293d85112da49', 'ecfed150865a7129690805286222656d',
               'd36c6cf30154e18e6c972704206d6b1e', 'd36c6cf30154e18e6c972704206d6b1e',
               'c7170f4c6488a8f9013f0e4eadf9b385', 'c7170f4c6488a8f9013f0e4eadf9b385',
               '940d87a98fef6e456a3f59ecd7e88f63', '883d4ffa999d2f7c8f5293d85112da49',
               'f6407930f4a8e921df43911dad3847a3', '4917c1184063708092051859415be029',
               '3419b303ba51124a091cde496c6a0c16', 'f57c28ff61e365a82c7a00267d21c96e',
               '0d488acd5aa820a96e84f9488f03e335', '807653562fa6eb36cf75dee0279fb124',
               '33441a5f6fb494f0d0021f2585c91305', 'fb9b6b981cc1996542d5d81d47b459af',
               '65719c6edaa80d0880940c0e20c5e499', '7c4bd89cc6d7c6c91a38d58c2808b1b9']]
input_data_int = [[song_id_mapping[song_id] for song_id in row] for row in input_data_str]

# 將整數標識符轉換為PyTorch張量
input_tensor = torch.tensor(input_data_int, dtype=torch.long)

# 使用模型進行預測
with torch.no_grad():
    model.eval()
    predictions = model(input_tensor)

# 輸出預測結果
print("Predictions:")
print(predictions)


Predictions:
tensor([[ 0.1106, -0.0038, -0.1039, -0.0351,  0.1045],
        [ 0.1106, -0.0038, -0.1039, -0.0351,  0.1045]])


# Seq2Se2

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Seq2SeqEncoder(nn.Module):
    def __init__(self,embedding_dim,hidden_size,source_vocab_size):
        super(Seq2SeqEncoder,self).__init__()

        self.lstm_layer = nn.LSTM(input_size=embedding_dim,
                                 hidden_size=hidden_size,
                                 batch_first=True)
        self.embedding_table = torch.nn.Embedding(source_vocab_size,embedding_dim)

    def forward(self,input_ids):
        # 这里的ids是多个id，所以会是三维的
        input_sequence = self.embedding_table(input_ids) # 3d tensor batch*source_length*embedding_dim
        output_states,(final_h,final_c) = self.lstm_layer(input_sequence)

        return output_states,final_h


In [None]:
class Seq2SeqAttentionMechanism(nn.Module):
    def __init__(self):
        super(Seq2SeqAttentionMechanism,self).__init__()

    # 单步执行
    def forward(self,decoder_state_t,encoder_states):
        bs,source_length,hidden_size = encoder_states.shape

        # decoder_state是二维 batch*hidden，需要扩维
        decoder_state_t = decoder_state_t.unsqueeze(1)
        decoder_state_t =  torch.tile(decoder_state_t,(1,source_length,1))

        score = torch.sum(decoder_state_t * encoder_states,dim=-1) # bs*source_length

        attn_prob = F.softmax(score,dim=-1) # bs*source_length

        context = torch.sum(attn_prob.unsqueeze(-1)*encoder_states,1) # bs*hidden_size

        return attn_prob,context


In [None]:
class Seq2SeqDecoder(nn.Module):
    def __init__(self,embedding_dim,hidden_size,num_classes,target_vocab_size,start_id,end_id):
        super(Seq2SeqDecoder,self).__init__()

        # cell就是单步执行
        self.lstm_cell = torch.nn.LSTMCell(embedding_dim,hidden_size)
        self.proj_layer = nn.Linear(hidden_size*2,num_classes)
        self.attention_mechanism = Seq2SeqAttentionMechanism()
        self.num_classes = num_classes
        self.embedding_table = torch.nn.Embedding(target_vocab_size,embedding_dim)
        # 偏移id
        self.start_id = start_id
        self.end_id = end_id

    # 训练用
    def forward(self, shifted_target_ids, encoder_states):
        shifted_target = self.embedding_table(shifted_target_ids)

        bs, target_length, embedding_dim = shifted_target.shape
        bs, source_length, hidden_size = encoder_states.shape

        logits = torch.zeros(bs, target_length, self.num_classes)
        probs = torch.zeros(bs, target_length, source_length)

        for t in range(target_length):
            decoder_input_t = shifted_target[:, t, :]
            if t == 0:
                h_t, c_t = self.lstm_cell(decoder_input_t)
            else:
                h_t, c_t = self.lstm_cell(decoder_input_t, (h_t, c_t))

            attn_prob, context = self.attention_mechanism(h_t, encoder_states)

            decoder_output = torch.cat((context, h_t), -1)
            logits[:, t, :] = self.proj_layer(decoder_output)
            probs[:, t, :] = attn_prob

        return probs, logits

    def inference(self, encoder_states, num_samples=5, max_sequence_length=5):
      # 推理階段
      result = []

      for _ in range(num_samples):
          target_id = torch.tensor([self.start_id])  # 使用tensor包裝起始id
          h_t = None
          sample_result = []

          for _ in range(max_sequence_length):  # 控制生成的序列長度
              decoder_input_t = self.embedding_table(target_id)
              if h_t is None:
                  h_t, c_t = self.lstm_cell(decoder_input_t)
              else:
                  h_t, c_t = self.lstm_cell(decoder_input_t, (h_t, c_t))

              attn_prob, context = self.attention_mechanism(h_t, encoder_states)

              decoder_output = torch.cat((context, h_t), -1)
              logits = self.proj_layer(decoder_output)

              # 上一刻預測的，作為下一時刻的輸入
              # 基於機率選擇，而不是使用argmax
              prob_dist = F.softmax(logits, dim=-1)
              target_id = torch.multinomial(prob_dist, 1).squeeze(1)

              sample_result.append(vocabulary.index2word[target_id.item()])  # 使用item()獲取Python值

              if torch.any(target_id == self.end_id):
                  print('stop decoding')
                  break

          result.append(sample_result)

      return result


In [None]:
class Model(nn.Module):
    def __init__(self,embedding_dim,hidden_size,num_classes,
                source_vocab_size,target_vocab_size,start_id,end_id):
        super(Model,self).__init__()

        self.encoder = Seq2SeqEncoder(embedding_dim,hidden_size,source_vocab_size)

        self.decoder = Seq2SeqDecoder(embedding_dim,hidden_size,num_classes,
                                     target_vocab_size,start_id,end_id)

    def forward(self,inut_sequence_ids,shifted_target_ids):

        encoder_states,final_h = self.encoder(input_sequence_ids)

        probs,logits = self.decoder(shifted_target_ids,encoder_states)

        return probs,logits
    def ifer(self):
        pass


In [None]:
class Vocabulary:
    def __init__(self):
        self.word2index = {}
        self.index2word = {}
        self.next_index = 0

    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.next_index
            self.index2word[self.next_index] = word
            self.next_index += 1

    def sequence_to_indices(self, sequence):
        return [self.word2index[word] for word in sequence]

    def indices_to_sequence(self, indices):
        return [self.index2word[index] for index in indices]

In [None]:
A = [['6027767fad949f3ca5e772df04924949','041547bddb0a3e730f32db84c65868ca',
  '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca',
  '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53',
  '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc',
  'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca',
  'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949',
  '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949'],['6027767fad949f3ca5e772df04924949','041547bddb0a3e730f32db84c65868ca',
  '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca',
  '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53',
  '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc',
  'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca',
  'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7',
  'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949',
  '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949']]

B = [['75c2aa348888f982d85e3f870e6ba5b2', '0cab8863e5440551c7b37e59635ec18e',
  '4d5aceee5c9731151ca69f0946ffa71f', '929b07d69451684f4f0f6e3bcc2a62d6',
  '12ae4e616d3e5c7bd53ec771797f596b'],['75c2aa348888f982d85e3f870e6ba5b2', '0cab8863e5440551c7b37e59635ec18e',
  '4d5aceee5c9731151ca69f0946ffa71f', '929b07d69451684f4f0f6e3bcc2a62d6',
  '12ae4e616d3e5c7bd53ec771797f596b']]

# 示例用法
vocabulary = Vocabulary()

# 建立词汇表
for word_list in A:
  for word in word_list:
    vocabulary.add_word(word)
for word_list in B:
  for word in word_list:
    vocabulary.add_word(word)

In [None]:
input_sequence_indices = []
for word_list in A:
  a = vocabulary.sequence_to_indices(word_list)
  input_sequence_indices.append(a)

target_sequence_indices = []
for word_list in B:
  b = vocabulary.sequence_to_indices(word_list)
  target_sequence_indices.append(b)

# 转换为 PyTorch tensors
input_tensor = torch.tensor(input_sequence_indices).to(torch.int32)
target_tensor = torch.tensor(target_sequence_indices).to(torch.int32)

target_ids = torch.cat((target_tensor,end_id*torch.ones(bs,1)),dim=1).to(torch.int32)

RuntimeError: ignored

In [None]:
import torch

# 定义模型和数据
source_length = 20
embedding_dim = 8
hidden_size = 16
num_classes = 10
bs = 1  # 这里设置为1，因为推断是针对单个序列的
start_id = end_id = 0
source_vocab_size = 100
target_vocab_size = 100

input_sequence_ids = torch.randint(source_vocab_size, size=(bs, source_length)).to(torch.int32)

# 创建模型
model = Model(embedding_dim, hidden_size, num_classes, source_vocab_size, target_vocab_size, start_id, end_id)

# 获取encoder_states，这将用于解码器的推断
encoder_states, final_h = model.encoder(input_sequence_ids)

# 使用推断方法生成序列
predicted_sequences  = model.decoder.inference(encoder_states)

# 打印生成的序列
(predicted_sequences)


stop decoding
stop decoding
stop decoding


[['5ef6718f4517d2d3c316fc45226f41dc',
  'e4a125e3163e4c1bd40060614c79bd53',
  '6027767fad949f3ca5e772df04924949'],
 ['75c2aa348888f982d85e3f870e6ba5b2', '6027767fad949f3ca5e772df04924949'],
 ['a97177f0f37a2bae91d8e67831949392',
  '0cab8863e5440551c7b37e59635ec18e',
  '5ef6718f4517d2d3c316fc45226f41dc',
  '6027767fad949f3ca5e772df04924949'],
 ['e4a125e3163e4c1bd40060614c79bd53',
  'e4a125e3163e4c1bd40060614c79bd53',
  '75c2aa348888f982d85e3f870e6ba5b2',
  '0cab8863e5440551c7b37e59635ec18e',
  'a97177f0f37a2bae91d8e67831949392'],
 ['041547bddb0a3e730f32db84c65868ca',
  '75c2aa348888f982d85e3f870e6ba5b2',
  'e7efab54028017e35a35d1b1637e210c',
  'e7efab54028017e35a35d1b1637e210c',
  '3f8e8cbe4b5d55f07ba4c7ddfab624b7']]

In [None]:
source_length = 20
target_length = 5
embedding_dim = 8
hidden_size = 16
num_classes = 10
bs = 2
start_id = end_id = 0
source_vocab_size = 100
target_vocab_size = 100

input_sequence_ids = torch.randint(source_vocab_size,size=(bs,source_length)).to(torch.int32)

target_ids = torch.randint(target_vocab_size,size=(bs,target_length))
target_ids = torch.cat((target_ids,end_id*torch.ones(bs,1)),dim=1).to(torch.int32)

shifted_target_ids = torch.cat((start_id*torch.ones(bs,1),target_ids[:,1:]),dim=1).to(torch.int32)

model = Model(embedding_dim,hidden_size,num_classes,source_vocab_size,target_vocab_size,start_id,end_id)
probs,logits = model(input_sequence_ids,shifted_target_ids)

In [None]:
probs

tensor([[[0.0487, 0.0471, 0.0486, 0.0494, 0.0499, 0.0486, 0.0494, 0.0497,
          0.0496, 0.0496, 0.0515, 0.0484, 0.0495, 0.0497, 0.0501, 0.0522,
          0.0517, 0.0526, 0.0512, 0.0524],
         [0.0448, 0.0450, 0.0489, 0.0531, 0.0518, 0.0479, 0.0507, 0.0502,
          0.0483, 0.0481, 0.0534, 0.0483, 0.0514, 0.0512, 0.0527, 0.0520,
          0.0508, 0.0514, 0.0496, 0.0506],
         [0.0434, 0.0438, 0.0475, 0.0533, 0.0475, 0.0468, 0.0511, 0.0494,
          0.0485, 0.0482, 0.0537, 0.0490, 0.0525, 0.0512, 0.0524, 0.0533,
          0.0491, 0.0543, 0.0510, 0.0539],
         [0.0464, 0.0461, 0.0466, 0.0499, 0.0473, 0.0480, 0.0491, 0.0507,
          0.0470, 0.0490, 0.0526, 0.0500, 0.0511, 0.0503, 0.0479, 0.0522,
          0.0528, 0.0552, 0.0530, 0.0550],
         [0.0498, 0.0482, 0.0501, 0.0529, 0.0583, 0.0520, 0.0520, 0.0508,
          0.0448, 0.0476, 0.0527, 0.0467, 0.0517, 0.0503, 0.0497, 0.0481,
          0.0538, 0.0482, 0.0467, 0.0457],
         [0.0488, 0.0469, 0.0487, 0.0505, 0.0

In [None]:
shifted_target_ids

tensor([[ 0, 29, 68, 33,  0],
        [ 0, 43, 14, 83,  0]], dtype=torch.int32)

In [None]:
# Inference
with torch.no_grad():
    # Generate predictions using the inference method
    predicted_ids = model.decoder.inference(model.encoder(input_sequence_ids)[0])

    # Print the predicted sequence
    print("Predicted Sequence:")
    for i in range(bs):
        predicted_sequence = [int(predicted_ids[t, i].item()) for t in range(predicted_ids.size(0))]
        print(predicted_sequence)

Context size: torch.Size([2, 16])
h_t_repeated size: torch.Size([2, 16])
tensor([[-0.0400,  0.1259, -0.0308,  0.1001, -0.0791,  0.0943,  0.0840, -0.0201,
         -0.0472, -0.0126, -0.0988, -0.0012,  0.1006, -0.0197, -0.0052, -0.0032,
          0.0161, -0.0260,  0.1349,  0.1316,  0.0119,  0.0291,  0.0690,  0.0301,
         -0.1018, -0.1098,  0.0258, -0.0464,  0.0350,  0.0136, -0.0012, -0.0703],
        [-0.0669,  0.1080,  0.0431,  0.0950, -0.0271,  0.1027, -0.0144, -0.0497,
          0.0032,  0.0203, -0.0224, -0.0158,  0.1251,  0.0314, -0.0392, -0.0872,
          0.0161, -0.0260,  0.1349,  0.1316,  0.0119,  0.0291,  0.0690,  0.0301,
         -0.1018, -0.1098,  0.0258, -0.0464,  0.0350,  0.0136, -0.0012, -0.0703]])


  target_id = torch.tensor(target_id, dtype=torch.long)


RuntimeError: ignored

In [None]:
model.encoder(input_sequence_ids)[0]

tensor([[[-1.4693e-01,  7.9924e-02, -8.0531e-02, -2.7915e-02,  2.7785e-02,
           1.3915e-02, -4.1421e-02,  8.9855e-02,  4.0514e-02,  7.4672e-02,
           6.3039e-02, -3.7883e-02, -5.7570e-02,  5.6767e-02,  8.1288e-02,
          -1.0501e-01],
         [-1.9930e-01,  9.2346e-02, -4.6385e-02,  1.1414e-01, -1.7856e-01,
          -7.5722e-02, -9.5991e-02, -2.4275e-02,  6.8016e-02,  5.2614e-02,
          -1.7428e-01, -2.4964e-02,  4.4947e-02,  1.1409e-01, -4.3466e-02,
          -2.2042e-01],
         [-1.9896e-01,  1.3815e-01,  4.1449e-02, -4.1453e-02,  2.3651e-03,
          -5.1864e-02, -4.8640e-02,  1.5867e-01,  1.1019e-01, -7.7386e-02,
          -2.4774e-02, -7.5502e-02, -8.1692e-02,  1.0810e-01,  2.5605e-03,
          -4.3216e-02],
         [-2.0138e-01,  2.2007e-01,  7.2002e-02, -8.4817e-02,  5.3292e-03,
          -1.4397e-01, -6.4865e-02,  1.7637e-01,  7.9066e-02,  7.1616e-02,
           3.0461e-02, -2.8392e-02, -1.2075e-01,  1.3214e-01,  3.7748e-02,
          -1.7797e-01],
    

In [None]:
source_length = 20
target_length = 5
embedding_dim = 8
hidden_size = 16
num_classes = 20
bs = 2
start_id = end_id = 0
source_vocab_size = 100
target_vocab_size = 100

input_sequence_ids = torch.randint(source_vocab_size,size=(bs,source_length)).to(torch.int64)

target_ids = torch.randint(target_vocab_size,size=(bs,target_length))
target_ids = torch.cat((target_tensor,end_id*torch.ones(bs,1)),dim=1).to(torch.int64)

shifted_target_ids = torch.cat((start_id*torch.ones(bs,1),target_ids[:,1:]),dim=1).to(torch.int64)

model = Model(embedding_dim,hidden_size,num_classes,source_vocab_size,target_vocab_size,start_id,end_id)
probs,logits = model(input_tensor,shifted_target_ids)
print(probs.shape)
print(logits.shape)

torch.Size([2, 6, 20])
torch.Size([2, 6, 20])


In [None]:


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    probs, logits = model(input_sequence_ids, shifted_target_ids)

    # Reshape logits for loss calculation
    logits_flat = logits.view(-1, num_classes)
    shifted_target_ids_flat = shifted_target_ids.view(-1).long()

    # Compute the loss
    loss = criterion(logits_flat, shifted_target_ids_flat)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')




Epoch [10/100], Loss: 2.9035
Epoch [20/100], Loss: 2.8059
Epoch [30/100], Loss: 2.6876
Epoch [40/100], Loss: 2.5256
Epoch [50/100], Loss: 2.2859
Epoch [60/100], Loss: 1.9511
Epoch [70/100], Loss: 1.6283
Epoch [80/100], Loss: 1.3961
Epoch [90/100], Loss: 1.2292
Epoch [100/100], Loss: 1.1005


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention, Concatenate

# Define the attention layer
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

# Define the seq2seq model with attention
def create_seq2seq_attention_model(input_shape, output_sequence_length):
    # Encoder
    encoder_inputs = Input(shape=(None,20))
    encoder_lstm = LSTM(128, return_sequences=True, return_state=True)
    encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
    encoder_states = [state_h, state_c]

    # Decoder
    decoder_inputs = Input(shape=(None,))
    decoder_embedding = Embedding(output_sequence_length, 128)(decoder_inputs)
    decoder_lstm = LSTM(128, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

    # Attention mechanism
    attention_layer = BahdanauAttention(128)
    context_vector, attention_weights = attention_layer(state_h, encoder_outputs)

    # Concatenate attention output and decoder LSTM output
    decoder_concat = Concatenate(axis=-1)([decoder_outputs, context_vector])

    # Dense layer for output
    decoder_dense = Dense(output_sequence_length, activation='softmax')
    decoder_outputs = decoder_dense(decoder_concat)

    # Model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    return model


input_data = np.array([
    ['6027767fad949f3ca5e772df04924949', '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca', '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53', '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc', 'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca', 'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', 'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949'],
    ['883d4ffa999d2f7c8f5293d85112da49', '883d4ffa999d2f7c8f5293d85112da49', '883d4ffa999d2f7c8f5293d85112da49', 'ecfed150865a7129690805286222656d', 'd36c6cf30154e18e6c972704206d6b1e', 'd36c6cf30154e18e6c972704206d6b1e', 'c7170f4c6488a8f9013f0e4eadf9b385', 'c7170f4c6488a8f9013f0e4eadf9b385', '940d87a98fef6e456a3f59ecd7e88f63', '883d4ffa999d2f7c8f5293d85112da49', 'f6407930f4a8e921df43911dad3847a3', '4917c1184063708092051859415be029', '3419b303ba51124a091cde496c6a0c16', 'f57c28ff61e365a82c7a00267d21c96e', '0d488acd5aa820a96e84f9488f03e335', '807653562fa6eb36cf75dee0279fb124', '33441a5f6fb494f0d0021f2585c91305', 'fb9b6b981cc1996542d5d81d47b459af', '65719c6edaa80d0880940c0e20c5e499', '7c4bd89cc6d7c6c91a38d58c2808b1b9']
])

output_data = np.array([
    ['75c2aa348888f982d85e3f870e6ba5b2', '0cab8863e5440551c7b37e59635ec18e', '4d5aceee5c9731151ca69f0946ffa71f', '929b07d69451684f4f0f6e3bcc2a62d6', '12ae4e616d3e5c7bd53ec771797f596b'],
    ['34f1a786e245f2886ab99b0062de906c', 'd8ec0f80ee6b4457f12e74aa469335d6', 'd63dbd5214a39f50100c8d59f1c24d6a', 'c1550c264fb083b3acffe619bd02d75e', '61a3b37f326394081b95196a5eb676b8']
])

# Tokenize input and output data
input_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
input_tokenizer.fit_on_texts(input_data.flatten())
input_data_seq = input_tokenizer.texts_to_sequences(input_data.flatten())
input_data_seq = np.array(input_data_seq).reshape(input_data.shape)

output_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
output_tokenizer.fit_on_texts(output_data.flatten())
output_data_seq = output_tokenizer.texts_to_sequences(output_data.flatten())
output_data_seq = np.array(output_data_seq).reshape(output_data.shape)

# Pad sequences
input_data_padded = tf.keras.preprocessing.sequence.pad_sequences(input_data_seq, padding='post')
output_data_padded = tf.keras.preprocessing.sequence.pad_sequences(output_data_seq, padding='post')



# Define model
model = create_seq2seq_attention_model(input_data_padded.shape, output_data_padded.shape[1])

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit([input_data_padded, output_data_padded[:, :-1]], np.expand_dims(output_data_padded[:, 1:], -1), epochs=50, batch_size=2)

# Generate predictions (same as before)
sample_input = input_data_padded[0:1]
predictions = model.predict([sample_input, np.zeros((sample_input.shape[0], output_data_padded.shape[1]))])

# Decode predictions (same as before)
decoded_predictions = []
for i in range(predictions.shape[0]):
    decoded_predictions.append([output_tokenizer.index_word[idx] for idx in np.argmax(predictions[i], axis=1)])

print('Sample Input:', input_data[0])
print('Predicted Output:', decoded_predictions[0])


ValueError: ignored

In [None]:
output_data_padded.shape

(2, 5)

In [None]:
# prompt: input: [['6027767fad949f3ca5e772df04924949' '041547bddb0a3e730f32db84c65868ca'   '041547bddb0a3e730f32db84c65868ca' '041547bddb0a3e730f32db84c65868ca'   '8b32f88104ecf859be934d9b45f30cd1' 'e4a125e3163e4c1bd40060614c79bd53'   '8b32f88104ecf859be934d9b45f30cd1' '5ef6718f4517d2d3c316fc45226f41dc'   'e4a125e3163e4c1bd40060614c79bd53' '041547bddb0a3e730f32db84c65868ca'   'e7efab54028017e35a35d1b1637e210c' '3f8e8cbe4b5d55f07ba4c7ddfab624b7'   '3f8e8cbe4b5d55f07ba4c7ddfab624b7' '3f8e8cbe4b5d55f07ba4c7ddfab624b7'   '3f8e8cbe4b5d55f07ba4c7ddfab624b7' '3f8e8cbe4b5d55f07ba4c7ddfab624b7'   'a97177f0f37a2bae91d8e67831949392' '6027767fad949f3ca5e772df04924949'   '6027767fad949f3ca5e772df04924949' '6027767fad949f3ca5e772df04924949']  ['883d4ffa999d2f7c8f5293d85112da49' '883d4ffa999d2f7c8f5293d85112da49'   '883d4ffa999d2f7c8f5293d85112da49' 'ecfed150865a7129690805286222656d'   'd36c6cf30154e18e6c972704206d6b1e' 'd36c6cf30154e18e6c972704206d6b1e'   'c7170f4c6488a8f9013f0e4eadf9b385' 'c7170f4c6488a8f9013f0e4eadf9b385'   '940d87a98fef6e456a3f59ecd7e88f63' '883d4ffa999d2f7c8f5293d85112da49'   'f6407930f4a8e921df43911dad3847a3' '4917c1184063708092051859415be029'   '3419b303ba51124a091cde496c6a0c16' 'f57c28ff61e365a82c7a00267d21c96e'   '0d488acd5aa820a96e84f9488f03e335' '807653562fa6eb36cf75dee0279fb124'   '33441a5f6fb494f0d0021f2585c91305' 'fb9b6b981cc1996542d5d81d47b459af'   '65719c6edaa80d0880940c0e20c5e499' '7c4bd89cc6d7c6c91a38d58c2808b1b9']]  output: [['75c2aa348888f982d85e3f870e6ba5b2' '0cab8863e5440551c7b37e59635ec18e'   '4d5aceee5c9731151ca69f0946ffa71f' '929b07d69451684f4f0f6e3bcc2a62d6'   '12ae4e616d3e5c7bd53ec771797f596b']  ['34f1a786e245f2886ab99b0062de906c' 'd8ec0f80ee6b4457f12e74aa469335d6'   'd63dbd5214a39f50100c8d59f1c24d6a' 'c1550c264fb083b3acffe619bd02d75e'   '61a3b37f326394081b95196a5eb676b8']]  我想要輸入20首歌的id然後輸出接下來的5首歌id，我有一批這樣的訓練集，範例如上。 給我使用attention的seq2seq模型。輸入跟輸出要符合我給的格式。且5個輸出都要不同。用我給的input做訓練，使用pytorch

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention, Concatenate

# Define the encoder and decoder layers
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

    def forward(self, x):
        output, (h, c) = self.lstm(x)
        return output, h, c

class Decoder(nn.Module):
    def __init__(self, hidden_dim, output_dim, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, h, c):
        output, (h, c) = self.lstm(x, (h, c))
        output = self.linear(output)
        return output, h, c

# Define the attention layer
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.linear1 = nn.Linear(hidden_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
        self.linear3 = nn.Linear(hidden_dim, 1)

    def forward(self, query, key, value):
        query = self.linear1(query)
        key = self.linear2(key)
        value = self.linear3(value)
        attention = torch.bmm(query, key.transpose(1, 2))
        attention = F.softmax(attention, dim=2)
        output = torch.bmm(attention, value)
        return output

# Define the seq2seq model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, attention):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.attention = attention

    def forward(self, x, y):
        # Encode the input sequence
        output, h, c = self.encoder(x)

        # Initialize the hidden state and cell state of the decoder
        decoder_h = h
        decoder_c = c

        # Generate the output sequence
        outputs = []
        for i in range(y.shape[1]):
            # Get the current input to the decoder
            y_i = y[:, i, :]

            # Compute the attention weights
            attention_weights = self.attention(decoder_h, output, output)

            # Compute the context vector
            context_vector = torch.bmm(attention_weights, output)

            # Concatenate the context vector and the current input
            x_i = torch.cat((context_vector, y_i), dim=1)

            # Decode the input
            output, decoder_h, decoder_c = self.decoder(x_i, decoder_h, decoder_c)

            # Store the output
            outputs.append(output)

        return outputs

# Create the model
encoder = Encoder(input_dim=20, hidden_dim=128, num_layers=2)
decoder = Decoder(hidden_dim=128, output_dim=20, num_layers=2)
attention = Attention(hidden_dim=128)
model = Seq2Seq(encoder, decoder, attention)

# Train the model
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
for epoch in range(10):
    model.train()
    total_loss = 0
    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(x, y)
        loss = criterion(outputs.view(-1, 20), y.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print("Epoch {}: Loss = {}".format(epoch, total_loss / len(train_loader)))

# Evaluate the model
model.eval()
total_correct = 0
for i, (x, y) in enumerate(test_loader):
    outputs = model(x, y)
    _, predicted = torch.max(outputs.data, 1)
    total_correct += (predicted == y).sum().item()
print("Accuracy = {}".format(total_correct / len(test_loader)))

# Generate predictions
model.eval()
x = torch.tensor([['6027767fad949f3ca5e772df04924949', '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca', '041547bddb0a3e730f32db84c65868ca', '8b32f88104ecf859be934d9b45f30cd1', 'e4a125e3163e4c1bd40060614c79bd53', '8b32f88104ecf859be934d9b45f30cd1', '5ef6718f4517d2d3c316fc45226f41dc', 'e4a125e3163e4c1bd40060614c79bd53', '041547bddb0a3e730f32db84c65868ca', 'e7efab54028017e35a35d1b1637e210c', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', '3f8e8cbe4b5d55f07ba4c7ddfab624b7', 'a97177f0f37a2bae91d8e67831949392', '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949', '6027767fad949f3ca5e772df04924949']], dtype=torch.long)
outputs = model(x)
_, predicted = torch.max(outputs.data, 1)
print(predicted)


TypeError: ignored