## 編碼器模型範例
這是一個編碼器模型的範例，用於將輸入的序列進行編碼，並將編碼後的結果輸出。

In [None]:
from transformers import BertTokenizer,BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")
text="my bank account"
encoded_input = tokenizer(text, max_length=100,
                            add_special_tokens=True, truncation=True,
                            padding=True, return_tensors="pt")
output = model(**encoded_input)
last_hidden_state, pooler_output = output[0], output[1]
print(model)
print(output[0].shape)
print(output[0][0])


In [None]:
from transformers import AutoTokenizer, AutoModel

# 使用 AutoTokenizer 和 AutoModel 替代 BertTokenizer 和 BertModel
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModel.from_pretrained("bert-base-uncased")

text = "my bank account"
encoded_input = tokenizer(
    text, 
    max_length=100, 
    add_special_tokens=True, 
    truncation=True, 
    padding=True, 
    return_tensors="pt"
)

output = model(**encoded_input)

# `output` 是 ModelOutput，包含 `last_hidden_state` 和其他屬性
last_hidden_state = output.last_hidden_state
pooler_output = output.pooler_output if "pooler_output" in output else None

print(model)
print(last_hidden_state.shape)
print(last_hidden_state[0])

if pooler_output is not None:
    print(pooler_output.shape)
    print(pooler_output)


### 解碼器


In [None]:
from transformers import AutoTokenizer, GPT2Model
import torch

tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = GPT2Model.from_pretrained("openai-community/gpt2")

inputs = tokenizer("Yes, Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state
print(last_hidden_states.shape)
print(model)

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch

# 使用 AutoTokenizer 和 AutoModel 替代 GPT2Model
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = AutoModel.from_pretrained("openai-community/gpt2")

# 將文本轉換為張量格式
inputs = tokenizer("Yes, Hello, my dog is cute", return_tensors="pt")

# 模型前向傳播
outputs = model(**inputs)

# 提取最後的隱藏層狀態
last_hidden_states = outputs.last_hidden_state

# 印出隱藏層形狀和模型結構
print(last_hidden_states.shape)
print(model)


In [None]:
from transformers import AutoTokenizer, AutoModel
import torch

# 使用 AutoTokenizer 和 AutoModel 替代 GPT2Model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModel.from_pretrained("gpt2")

# 準備輸入文字
text = "The quick brown fox jumps over the lazy dog"
inputs = tokenizer(text, return_tensors="pt")

# 獲取模型輸出
with torch.no_grad():
    outputs = model(**inputs)

# logits 是應用softmax之前的輸出，直接對應於詞彙表的維度
logits = outputs.logits

# 展示logits的維度
# 維度應該是 [批次大小, 序列長度, 詞彙表大小]
print("Logits shape:", logits.shape)


### 序列到序列

In [None]:
from transformers import MT5EncoderModel, T5Tokenizer

# 加載模型和分詞器
model_name = 'google/mt5-small' # 可以根據需要選擇不同大小的MT5模型
model = MT5EncoderModel.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)
print(model)

In [None]:
from transformers import AutoModel, AutoTokenizer

# 使用 AutoModel 和 AutoTokenizer 替代 MT5EncoderModel 和 T5Tokenizer
model_name = 'google/mt5-small'  # 可以根據需要選擇不同大小的 MT5 模型
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 印出模型結構
print(model)
