In [45]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, get_linear_schedule_with_warmup
from sklearn.preprocessing import LabelEncoder
import os

# Set random seed for reproducibility
torch.manual_seed(1)
np.random.seed(1)

# Block 1: Create Conversation Table (100 pairs)
conversation_data = {
    'en_question': [],
    'en_answer': [],
    'az_question': [],
    'az_answer': [],
    'tr_question': [],
    'tr_answer': [],
    'ru_question': [],
    'ru_answer': []
}

# Sample question-answer pairs (25 base pairs, repeated to reach 100)
questions_en = [
    "Hello, how are you?", "How are you today?", "Are you doing well?", "What's your name?",
    "Your name is what?", "What time is it?", "Is it what time?", "How's the weather?",
    "The weather is how?", "Do you like to read?", "Like to read, do you?", "What is your favorite color?",
    "Your favorite color is what?", "Where are you from?", "Are you from where?",
    "What do you do?", "Do what, you do?", "Can you help me?", "Help me, can you?",
    "What is the date today?", "Today, what is the date?", "Do you speak English?",
    "English, do you speak?", "How old are you?", "Are you how old?"
]
answers_en = [
    "Hi, I'm doing great!", "Hi, I'm doing great!", "Hi, I'm doing great!", "My name is Alex.",
    "My name is Alex.", "It's 5:10 PM.", "It's 5:10 PM.", "It's sunny today.",
    "It's sunny today.", "Yes, I love reading!", "Yes, I love reading!", "Blue is my favorite color.",
    "Blue is my favorite color.", "I'm from Earth.", "I'm from Earth.",
    "I'm a helper AI.", "I'm a helper AI.", "Sure, I can help!", "Sure, I can help!",
    "It's July 13, 2025.", "It's July 13, 2025.", "Yes, I speak English!",
    "Yes, I speak English!", "I'm ageless!", "I'm ageless!"
]
questions_az = [
    "Salam, sən necəsən?", "Bu gün sən necəsən?", "Yaxşı sən?", "Adın nədir?",
    "Nədir adın?", "Saat neçədir?", "Neçədir saat?", "Hava necədir?",
    "Necədir hava?", "Oxumağı sevirsən?", "Sevirsən oxumağı?", "Sevimli rəngin nədir?",
    "Nədir sevimli rəngin?", "Haralısan?", "Sən haralısan?", "Nə iş görürsən?",
    "Görürsən nə iş?", "Mənə kömək edə bilərsən?", "Kömək edə bilərsən mənə?",
    "Bu gün tarix nədir?", "Tarix nədir bu gün?", "İngiliscə danışırsan?",
    "Danışırsan ingiliscə?", "Sən neçə yaşındasan?", "Neçə yaşındasan sən?"
]
answers_az = [
    "Salam, mən əla!", "Salam, mən əla!", "Salam, mən əla!", "Adım Alexdir.",
    "Adım Alexdir.", "Saat 17:10-dur.", "Saat 17:10-dur.", "Bu gün günəşlidir.",
    "Bu gün günəşlidir.", "Bəli, oxumağı sevirəm!", "Bəli, oxumağı sevirəm!",
    "Mavi sevimli rəngimdir.", "Mavi sevimli rəngimdir.", "Mən Yerdənəm.",
    "Mən Yerdənəm.", "Mən köməkçi AI-yam.", "Mən köməkçi AI-yam.",
    "Əlbəttə, kömək edə bilərəm!", "Əlbəttə, kömək edə bilərəm!",
    "13 İyul 2025-dir.", "13 İyul 2025-dir.", "Bəli, ingiliscə danışıram!",
    "Bəli, ingiliscə danışıram!", "Mən yaşsızam!", "Mən yaşsızam!"
]
questions_tr = [
    "Merhaba, nasılsın?", "Bugün nasılsın?", "İyi misin?", "Adın ne?",
    "Ne adın?", "Saat kaç?", "Kaç saat?", "Hava nasıl?",
    "Nasıl hava?", "Okumayı sever misin?", "Sever misin okumayı?", "Favori rengin ne?",
    "Rengin ne favori?", "Nerelisin?", "Nereden geliyorsun?", "Ne iş yapıyorsun?",
    "Yapıyorsun ne iş?", "Bana yardım edebilir misiniz?", "Yardım edebilir misiniz bana?",
    "Bugün tarih ne?", "Tarix ne bugün?", "İngilizce konuşuyor musunuz?",
    "Konuşuyor musunuz İngilizce?", "Kaç yaşındasın?", "Yaşın kaç?"
]
answers_tr = [
    "Merhaba, harikayım!", "Merhaba, harikayım!", "Merhaba, harikayım!", "Adım Alex.",
    "Adım Alex.", "Saat 17:10.", "Saat 17:10.", "Bugün hava güneşli.",
    "Bugün hava güneşli.", "Evet, okumayı severim!", "Evet, okumayı severim!",
    "Mavi favori rengim.", "Mavi favori rengim.", "Dünya’danım.",
    "Dünya’danım.", "Yardımcı AI’yım.", "Yardımcı AI’yım.",
    "Tabii, yardım edebilirim!", "Tabii, yardım edebilirim!",
    "13 Temmuz 2025.", "13 Temmuz 2025.", "Evet, İngilizce konuşuyorum!",
    "Evet, İngilizce konuşuyorum!", "Yaşsızım!", "Yaşsızım!"
]
questions_ru = [
    "Привет, как дела?", "Как дела сегодня?", "Ты в порядке?", "Как тебя зовут?",
    "Зовут тебя как?", "Который час?", "Час который?", "Какая погода?",
    "Погода какая?", "Любишь читать?", "Читать любишь?", "Какой твой любимый цвет?",
    "Любимый цвет твой какой?", "Откуда ты?", "Ты откуда?", "Чем занимаешься?",
    "Занимаешься чем?", "Можешь помочь мне?", "Помочь мне можешь?",
    "Какой сегодня день?", "День сегодня какой?", "Говоришь по-английски?",
    "По-английски говоришь?", "Сколько тебе лет?", "Лет тебе сколько?"
]
answers_ru = [
    "Привет, я в порядке!", "Привет, я в порядке!", "Привет, я в порядке!", "Меня зовут Алекс.",
    "Меня зовут Алекс.", "Сейчас 17:10.", "Сейчас 17:10.", "Сегодня солнечно.",
    "Сегодня солнечно.", "Да, люблю читать!", "Да, люблю читать!",
    "Синий мой любимый цвет.", "Синий мой любимый цвет.", "Я с Земли.",
    "Я с Земли.", "Я помощник ИИ.", "Я помощник ИИ.",
    "Конечно, могу помочь!", "Конечно, могу помочь!",
    "Сегодня 13 июля 2025.", "Сегодня 13 июля 2025.", "Да, говорю по-английски!",
    "Да, говорю по-английски!", "Я без возраста!", "Я без возраста!"
]

# Extend to exactly 100 pairs
for i in range(100 // len(questions_en) + 1):
    for q_en, a_en, q_az, a_az, q_tr, a_tr, q_ru, a_ru in zip(
        questions_en, answers_en, questions_az, answers_az,
        questions_tr, answers_tr, questions_ru, answers_ru
    ):
        if len(conversation_data['en_question']) < 100:
            conversation_data['en_question'].append(q_en)
            conversation_data['en_answer'].append(a_en)
            conversation_data['az_question'].append(q_az)
            conversation_data['az_answer'].append(a_az)
            conversation_data['tr_question'].append(q_tr)
            conversation_data['tr_answer'].append(a_tr)
            conversation_data['ru_question'].append(q_ru)
            conversation_data['ru_answer'].append(a_ru)

# Create DataFrame
df = pd.DataFrame(conversation_data)
assert len(df) == 100, f"Expected 100 rows, got {len(df)}"

In [48]:
# Print first 5 rows of the DataFrame to inspect
print(df.head())
# Inspect first 3 rows for English
print(df[['en_question', 'en_answer']].head(3))

# Access a specific question-answer pair in Azerbaijani
print("\nExample Azerbaijani Pair:")
print(f"Question: {df['az_question'][0]}")
print(f"Answer: {df['az_answer'][0]}")

# Check a Russian pair
print("\nExample Russian Pair:")
print(f"Question: {df['ru_question'][0]}")
print(f"Answer: {df['ru_answer'][0]}")

           en_question             en_answer          az_question  \
0  Hello, how are you?  Hi, I'm doing great!  Salam, sən necəsən?   
1   How are you today?  Hi, I'm doing great!  Bu gün sən necəsən?   
2  Are you doing well?  Hi, I'm doing great!           Yaxşı sən?   
3    What's your name?      My name is Alex.          Adın nədir?   
4   Your name is what?      My name is Alex.          Nədir adın?   

         az_answer         tr_question            tr_answer  \
0  Salam, mən əla!  Merhaba, nasılsın?  Merhaba, harikayım!   
1  Salam, mən əla!     Bugün nasılsın?  Merhaba, harikayım!   
2  Salam, mən əla!          İyi misin?  Merhaba, harikayım!   
3    Adım Alexdir.            Adın ne?           Adım Alex.   
4    Adım Alexdir.            Ne adın?           Adım Alex.   

         ru_question             ru_answer  
0  Привет, как дела?  Привет, я в порядке!  
1  Как дела сегодня?  Привет, я в порядке!  
2      Ты в порядке?  Привет, я в порядке!  
3    Как тебя зовут?     М

In [49]:
# Block 2: Create Test Set (15 pairs)
test_data = {
    'en_question': [
        "Hi, how you doing?", "Are you okay today?", "What’s your full name?",
        "Time now is what?", "How is weather today?", "Do you enjoy reading?",
        "Your favorite colour?", "Where do you come from?", "What’s your job?",
        "Can you assist me?", "What day is today?", "Speak English, do you?",
        "How many years old are you?", "What’s the current time?", "Weather like what?"
    ],
    'en_answer': [
        "Hi, I'm doing great!", "Hi, I'm doing great!", "My name is Alex.",
        "It's 5:10 PM.", "It's sunny today.", "Yes, I love reading!",
        "Blue is my favorite color.", "I'm from Earth.", "I'm a helper AI.",
        "Sure, I can help!", "It's July 13, 2025.", "Yes, I speak English!",
        "I'm ageless!", "It's 5:10 PM.", "It's sunny today."
    ],
    'az_question': [
        "Salam, sən nə edirsən?", "Bu gün sən yaxşısan?", "Tam adın nədir?",
        "İndi saat nədir?", "Bu gün hava necədir?", "Oxumaqdan zövq alırsan?",
        "Sevimli rəngin hansıdır?", "Sən haradan gəlirsən?", "Sənin işin nədir?",
        "Mənə kömək edə bilərsənmi?", "Bu gün hansı gündür?", "İngiliscə danışırsanmı?",
        "Sən neçə yaşındasan?", "İndi saat neçədir?", "Hava necədir?"
    ],
    'az_answer': [
        "Salam, mən əla!", "Salam, mən əla!", "Adım Alexdir.",
        "Saat 17:10-dur.", "Bu gün günəşlidir.", "Bəli, oxumağı sevirəm!",
        "Mavi sevimli rəngimdir.", "Mən Yerdənəm.", "Mən köməkçi AI-yam.",
        "Əlbəttə, kömək edə bilərəm!", "13 İyul 2025-dir.", "Bəli, ingiliscə danışıram!",
        "Mən yaşsızam!", "Saat 17:10-dur.", "Bu gün günəşlidir."
    ],
    'tr_question': [
        "Merhaba, ne yapıyorsun?", "Bugün iyi misiniz?", "Tam adınız nedir?",
        "Şimdi saat kaç?", "Bugün hava nasıl?", "Okumaktan hoşlanır mısınız?",
        "Favori rengin nedir?", "Nereden geliyorsun?", "İşin nedir?",
        "Bana yardım edebilir misiniz?", "Bugün hangi gün?", "İngilizce konuşur musunuz?",
        "Kaç yaşındasın?", "Şu an saat kaç?", "Hava nasıl bugün?"
    ],
    'tr_answer': [
        "Merhaba, harikayım!", "Merhaba, harikayım!", "Adım Alex.",
        "Saat 17:10.", "Bugün hava güneşli.", "Evet, okumayı severim!",
        "Mavi favori rengim.", "Dünya’danım.", "Yardımcı AI’yım.",
        "Tabii, yardım edebilirim!", "13 Temmuz 2025.", "Evet, İngilizce konuşuyorum!",
        "Yaşsızım!", "Saat 17:10.", "Bugün hava güneşli."
    ],
    'ru_question': [
        "Привет, что делаешь?", "Сегодня ты в порядке?", "Какое твоё полное имя?",
        "Сейчас время какое?", "Какая сегодня погода?", "Нравится ли читать?",
        "Какой цвет любимый?", "Откуда ты родом?", "Какая у тебя работа?",
        "Можешь ли помочь мне?", "Какой день сегодня?", "Говоришь ли по-английски?",
        "Сколько лет тебе?", "Время сейчас какое?", "Погода сегодня какая?"
    ],
    'ru_answer': [
        "Привет, я в порядке!", "Привет, я в порядке!", "Меня зовут Алекс.",
        "Сейчас 17:10.", "Сегодня солнечно.", "Да, люблю читать!",
        "Синий мой любимый цвет.", "Я с Земли.", "Я помощник ИИ.",
        "Конечно, могу помочь!", "Сегодня 13 июля 2025.", "Да, говорю по-английски!",
        "Я без возраста!", "Сейчас 17:10.", "Сегодня солнечно."
    ]
}
test_df = pd.DataFrame(test_data)

In [50]:
# Print first 3 rows of the test DataFrame
print(test_df[['en_question', 'en_answer']].head(3))

# Access a specific Turkish question-answer pair
print("\nExample Turkish Pair:")
print(f"Question: {test_df['tr_question'][0]}")
print(f"Answer: {test_df['tr_answer'][0]}")

# Inspect first 3 test pairs for Turkish
print(test_df[['tr_question', 'tr_answer']].head(3))

# Check an Azerbaijani pair
print("\nExample Azerbaijani Pair:")
print(f"Question: {test_df['az_question'][3]}")
print(f"Answer: {test_df['az_answer'][3]}")

              en_question             en_answer
0      Hi, how you doing?  Hi, I'm doing great!
1     Are you okay today?  Hi, I'm doing great!
2  What’s your full name?      My name is Alex.

Example Turkish Pair:
Question: Merhaba, ne yapıyorsun?
Answer: Merhaba, harikayım!
               tr_question            tr_answer
0  Merhaba, ne yapıyorsun?  Merhaba, harikayım!
1       Bugün iyi misiniz?  Merhaba, harikayım!
2        Tam adınız nedir?           Adım Alex.

Example Azerbaijani Pair:
Question: İndi saat nədir?
Answer: Saat 17:10-dur.


In [51]:
# Block 3: Prepare Dataset and Tokenizer
# Combine all questions and answers
all_questions = df[['en_question', 'az_question', 'tr_question', 'ru_question']].values.flatten()
all_answers = df[['en_answer', 'az_answer', 'tr_answer', 'ru_answer']].values.flatten()
test_questions = test_df[['en_question', 'az_question', 'tr_question', 'ru_question']].values.flatten()
test_answers = test_df[['en_answer', 'az_answer', 'tr_answer', 'ru_answer']].values.flatten()

# Create a unified LabelEncoder
all_unique_answers = np.unique(np.concatenate([all_answers, test_answers]))
label_encoder = LabelEncoder()
label_encoder.fit(all_unique_answers)

class QADataset(Dataset):
    def __init__(self, questions, answers, tokenizer, label_encoder, max_len=128):
        self.questions = questions
        self.answers = answers
        self.tokenizer = tokenizer
        self.label_encoder = label_encoder
        self.max_len = max_len
        self.labels = self.label_encoder.transform(answers)

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = str(self.questions[idx])
        encoding = self.tokenizer.encode_plus(
            question,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Initialize tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

# Create datasets with unified LabelEncoder
train_dataset = QADataset(all_questions, all_answers, tokenizer, label_encoder)
test_dataset = QADataset(test_questions, test_answers, tokenizer, label_encoder)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16)

In [87]:
# Define multiple sample questions
sample_questions = ["Salam", "Salam.", "Salam, necəsən?", "Salam, sən necəsən?", "Yardımcı AI’yım.", "Merhaba, harikayım!"]

# Tokenize and inspect each sample question
for idx, question in enumerate(sample_questions, 1):
    print(f"\nSample Question {idx}: {question}")
    encoding = tokenizer.encode_plus(
        question,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )
    #101: [CLS] (start of sequence).
    #27162, 10147: Tokens for "Salam."
    #102: [SEP] (end of sequence).

    #"Salam" -  Input IDs: [101, 27162, 10147, 102, 0, 0, 0, 0, 0, 0]
    #"Salam." - Input IDs: [101, 27162, 10147, 119, 102, 0, 0, 0, 0, 0]
    #"Salam, necəsən?" - Input IDs: [101, 27162, 10147, 117, 10554, 28412, 52639, 10115, 136, 102]
    #"Salam, sən necəsən?" - Input IDs: [101, 27162, 10147, 117, 187, 16931, 10554, 28412, 52639, 10115]
    print(f"Input IDs: {encoding['input_ids'].flatten()[:20]} ...")  # Show first 20 tokens

    print(f"Attention Mask: {encoding['attention_mask'].flatten()[:20]} ...")

# Check a dataset sample (unchanged from original code)
sample = train_dataset[0]
print("\nDataset Sample:")
print(f"Input IDs Shape: {sample['input_ids'].shape}")
print(f"Label: {sample['labels']}")

# Check label encoding consistency
sample_train_answer = all_answers[0]
sample_test_answer = test_answers[0]
train_label = label_encoder.transform([sample_train_answer])[0]
test_label = label_encoder.transform([sample_test_answer])[0]
print(f"Train Answer: {sample_train_answer}, Label: {train_label}")
print(f"Test Answer: {sample_test_answer}, Label: {test_label}")

# Tokenize a sample question
sample_question = test_questions[0]
encoding = tokenizer.encode_plus(sample_question, add_special_tokens=True, max_length=128, padding='max_length', truncation=True, return_tensors='pt')
print(f"\nSample Question: {sample_question}")
print(f"Input IDs (first 10): {encoding['input_ids'].flatten()[:10]}")


Sample Question 1: Salam
Input IDs: tensor([  101, 27162, 10147,   102,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]) ...
Attention Mask: tensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ...

Sample Question 2: Salam.
Input IDs: tensor([  101, 27162, 10147,   119,   102,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]) ...
Attention Mask: tensor([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ...

Sample Question 3: Salam, necəsən?
Input IDs: tensor([  101, 27162, 10147,   117, 10554, 28412, 52639, 10115,   136,   102,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0]) ...
Attention Mask: tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ...

Sample Question 4: Salam, sən necəsən?
Input IDs: tensor([  101, 27162, 10147,   117,   187, 16931, 10554, 28412, 52639, 10115,

In [55]:
# Block 4: Define Transformer Model
class QATransformer(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(QATransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, 768)  # Embed token IDs to 768D
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=768, nhead=8, dim_feedforward=2048, dropout=0.1, batch_first=True
            ),
            num_layers=2
        )
        self.fc = nn.Linear(768, num_labels)
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_ids, attention_mask):
        x = self.embedding(input_ids)  # (batch, seq_len, 768)
        x = self.transformer(x, src_key_padding_mask=~attention_mask.bool())
        x = x[:, 0, :]  # Take CLS token
        x = self.dropout(x)
        return self.fc(x)

# Initialize model
num_labels = len(label_encoder.classes_)
model = QATransformer(num_labels, tokenizer.vocab_size)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [107]:
# Create a small batch for testing the model
batch = next(iter(train_loader))
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)

# Forward pass
model.eval()
with torch.no_grad():
    outputs = model(input_ids, attention_mask)
print("Batch Output Shape:", outputs.shape)
print("Sample Output (logits):", outputs[0].cpu().numpy())


Batch Output Shape: torch.Size([16, 48])
Sample Output (logits): [ 0.20239595 -0.42697942 -0.6546451   0.18611279 -0.8513564  -0.54569113
  0.33470756  0.44185644 -0.08457214 -0.50013006 -1.6893272  -0.53037447
  0.9064985  -1.5135612   0.8050715   0.7002033  -0.2128496  -0.5461014
 -1.3780142  -0.07173261 -0.5865791   0.24122158  0.16061197 -0.43602654
  0.2493856  -1.1426709  -1.6045959   0.47163722  0.11464106 -0.9998705
 -0.09878212 -0.3344373   0.31674254 -1.430573   -1.4499161  -0.5258833
 -0.14162497  0.64765966  0.59865427 17.516815   -2.2902617  -0.9585813
 -0.75877124  0.12267298  0.05040317  0.3329811   0.21010756 -0.3879267 ]


In [63]:
# Block 5: Load Model if Exists
model_path = 'model_multi_qa_checkpoint.pth'
if os.path.exists(model_path):
    print(f"Loading model from {model_path}")
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    print("Model loaded successfully!")
else:
    print(f"No pre-trained model found at {model_path}. Training from scratch.")

# Check if model file exists
print(f"Model exists: {os.path.exists(model_path)}")

Model exists: True


In [81]:
# Block 6: Training Loop
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
total_steps = len(train_loader) * 20  # 20 epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

model.train()
for epoch in range(10):
    total_loss = 0
    correct = 0
    total = 0
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()

        # Calculate training accuracy
        _, preds = torch.max(outputs, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_accuracy = correct / total
    print(f'Epoch {epoch+1}/10, Loss: {total_loss/len(train_loader):.8f}, Train Accuracy: {train_accuracy:.4f}')

Epoch 1/10, Loss: 0.00003605, Train Accuracy: 1.0000
Epoch 2/10, Loss: 0.00002033, Train Accuracy: 1.0000
Epoch 3/10, Loss: 0.00001294, Train Accuracy: 1.0000
Epoch 4/10, Loss: 0.00000932, Train Accuracy: 1.0000
Epoch 5/10, Loss: 0.00000724, Train Accuracy: 1.0000
Epoch 6/10, Loss: 0.00000619, Train Accuracy: 1.0000
Epoch 7/10, Loss: 0.00000505, Train Accuracy: 1.0000
Epoch 8/10, Loss: 0.00000470, Train Accuracy: 1.0000
Epoch 9/10, Loss: 0.00000420, Train Accuracy: 1.0000
Epoch 10/10, Loss: 0.00000370, Train Accuracy: 1.0000


In [85]:
# Simulate one training step
batch = next(iter(train_loader))
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

model.train()
optimizer.zero_grad()
outputs = model(input_ids, attention_mask)
loss = nn.CrossEntropyLoss()(outputs, labels)
loss.backward()
optimizer.step()
print(f"Single Step Loss: {loss.item()}")

Single Step Loss: 3.0174796847859398e-06


In [84]:
# Block 7: Save Model
checkpoint = {'model_state_dict': model.state_dict()}
torch.save(checkpoint, model_path)
print(f"Model saved to {model_path}")

Model saved to model_multi_qa_checkpoint.pth


In [83]:
# Block 8: Evaluate on Test Set
model.eval()
correct = 0
total = 0
predictions = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask)
        _, preds = torch.max(outputs, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        predictions.extend(preds.cpu().numpy())

accuracy = correct / total
print(f'\nTest Accuracy: {accuracy:.4f}')

# Sample predictions
print("\nSample Predictions:")
for i in range(min(5, len(test_questions))):
    pred_label = label_encoder.inverse_transform([predictions[i]])[0]
    print(f"Question: {test_questions[i]}")
    print(f"Predicted Answer: {pred_label}")
    print(f"True Answer: {test_answers[i]}\n")


Test Accuracy: 0.7167

Sample Predictions:
Question: Hi, how you doing?
Predicted Answer: Hi, I'm doing great!
True Answer: Hi, I'm doing great!

Question: Salam, sən nə edirsən?
Predicted Answer: Salam, mən əla!
True Answer: Salam, mən əla!

Question: Merhaba, ne yapıyorsun?
Predicted Answer: Yardımcı AI’yım.
True Answer: Merhaba, harikayım!

Question: Привет, что делаешь?
Predicted Answer: Привет, я в порядке!
True Answer: Привет, я в порядке!

Question: Are you okay today?
Predicted Answer: Hi, I'm doing great!
True Answer: Hi, I'm doing great!



In [86]:
# Predict for one test sample
sample = test_dataset[0]
input_ids = sample['input_ids'].unsqueeze(0).to(device)
attention_mask = sample['attention_mask'].unsqueeze(0).to(device)

model.eval()
with torch.no_grad():
    output = model(input_ids, attention_mask)
    pred = torch.max(output, dim=1)[1].cpu().numpy()
pred_label = label_encoder.inverse_transform(pred)[0]
print(f"Question: {test_questions[0]}")
print(f"Predicted Answer: {pred_label}")
print(f"True Answer: {test_answers[0]}")

Question: Hi, how you doing?
Predicted Answer: Hi, I'm doing great!
True Answer: Hi, I'm doing great!


In [93]:
# Sample question (updated time: 7:05 PM, July 13, 2025)
sample_question = "Merhaba, ne yapıyorsun?"  # English question
true_answer = "Merhaba, harikayım!"  # Updated time


# Tokenize
encoding = tokenizer.encode_plus(
    sample_question,
    add_special_tokens=True,
    max_length=128,
    padding='max_length',
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt'
)
input_ids = encoding['input_ids'].to(device)
attention_mask = encoding['attention_mask'].to(device)

# Forward pass
model.eval()
with torch.no_grad():
    outputs = model(input_ids, attention_mask)
    pred_label_idx = torch.max(outputs, dim=1)[1].cpu().numpy()[0]
    pred_answer = label_encoder.inverse_transform([pred_label_idx])[0]

# Print results
print(f"Question: {sample_question}")
print(f"Predicted Answer: {pred_answer}")
print(f"True Answer: {true_answer}")
print(f"Input IDs: {input_ids.flatten()}")
print(f"Output Logits: {outputs[0].cpu().numpy()}")

Question: Merhaba, ne yapıyorsun?
Predicted Answer: Yardımcı AI’yım.
True Answer: Merhaba, harikayım!
Input IDs: tensor([   101,  10734, 100025,  10113,    117,  10554,  10549,  84995,  26101,
         24883,    136,    102,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
       