In [24]:
!pip install -U pip transformers
# !pip install sentencepiece



In [25]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [26]:
checkpoint = 'facebook/nllb-200-distilled-600M'

In [27]:
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [28]:
translator = pipeline("translation", model=model, tokenizer=tokenizer, src_lang="eng_Latn", tgt_lang='tha_Thai')

text_to_translate = "Work hard, play harder"

translated_text = translator(text_to_translate)

print(translated_text[0]['translation_text'])

Device set to use cuda:0


ทํางานหนักๆ เล่นหนักๆ


In [29]:
tokens = tokenizer(text_to_translate, return_tensors="tf") # return_tensors="tf" จะให้ผลลัพธ์เป็น TensorFlow Tensor

print(tokens['input_ids'][0])

tf.Tensor([256047  46157  19450 248079  18379 215428      2], shape=(7,), dtype=int32)


In [30]:
decoded_tokens = tokenizer.decode(tokens['input_ids'][0], skip_special_tokens=False)

decoded_tokens

'eng_Latn Work hard, play harder</s>'

In [31]:
token_list = tokenizer.convert_ids_to_tokens(tokens['input_ids'][0].numpy())

print(text_to_translate)
print(tokens['input_ids'][0].numpy())
print(token_list)

Work hard, play harder
[256047  46157  19450 248079  18379 215428      2]
['eng_Latn', '▁Work', '▁hard', ',', '▁play', '▁harder', '</s>']


In [32]:
# ข้อความภาษาไทยที่ได้จากการแปล
thai_translated_text = translated_text[0]['translation_text']

# ใช้ tokenizer เพื่อแปลงข้อความภาษาไทยเป็น token
thai_tokens = tokenizer(thai_translated_text, return_tensors="tf")

# แปลง token IDs เป็น list ของ token string
thai_token_list = tokenizer.convert_ids_to_tokens(thai_tokens['input_ids'][0].numpy())

print(thai_translated_text)
print(thai_tokens['input_ids'][0].numpy())
print(thai_token_list)

ทํางานหนักๆ เล่นหนักๆ
[256047  28276  24984 211830 250375  83383   7991 211830 250375      2]
['eng_Latn', '▁ทํา', 'งาน', 'หนัก', 'ๆ', '▁เล', '่น', 'หนัก', 'ๆ', '</s>']


In [33]:
print(model.config)

M2M100Config {
  "activation_dropout": 0.0,
  "activation_function": "relu",
  "architectures": [
    "M2M100ForConditionalGeneration"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "dtype": "float32",
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "max_length": 200,
  "max_position_embeddings": 1024,
  "model_type": "m2m_100",
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "scale_embedding": true,
  "tokenizer_class": "NllbTokenizer",
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 256206
}



In [34]:
A = [[1, 2, 3],
     [4, 5, 6]]

In [35]:
B = [[7, 8],
     [9, 1],
     [2, 3]]

In [36]:
rows_A = len(A)
cols_A = len(A[0])
rows_B = len(B)
cols_B = len(B[0])

In [37]:
rows_A, cols_A

(2, 3)

In [38]:
rows_B, cols_B

(3, 2)

In [39]:
if cols_A != rows_B:
  print("ไม่สามารถคูณเมทริกซ์ได้ เพราะจำนวน Columns ของ A ไม่เท่ากับจำนวน Rows ของ B")

In [40]:
C = [[0 for _ in range(cols_B)] for _ in range(rows_A)]

C

[[0, 0], [0, 0]]

In [41]:
for i in range(rows_A):
  for j in range(cols_B):
    for k in range(cols_A):
                C[i][j] += A[i][k] * B[k][j]

In [42]:
C

[[31, 19], [85, 55]]

In [43]:
import numpy as np

In [44]:
A_np = np.array(A)
B_np = np.array(B)

In [45]:
if A_np.shape[1] != B_np.shape[0]:
    print("ไม่สามารถคูณเมทริกซ์ได้ เพราะจำนวน Columns ของ A ไม่เท่ากับจำนวน Rows ของ B")
else:
    C_np = A_np @ B_np
    # C_np = np.matmul(A_np, B_np)
    print(C_np)

[[31 19]
 [85 55]]
