In [1]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu
import nltk

nltk.download('punkt')

import logging
logging.disable(logging.WARNING)

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
def load_coqa_data(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data['data']

class CoQADataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        context = item['story']
        question = item['questions'][0]['input_text']
        answer = item['answers'][0]['input_text']

        # Prepare the input text for GPT-2
        input_text = f"Context: {context} Question: {question} Answer:"
        target_text = f"{answer}"

        # Tokenize the input and target texts
        inputs = self.tokenizer.encode_plus(
            input_text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        targets = self.tokenizer.encode_plus(
            target_text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': targets['input_ids'].flatten(),
        }

In [5]:
data = load_coqa_data('/kaggle/input/coqa-train-v1.0.json')
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

In [6]:
# Initialize tokenizer and model
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

train_dataset = CoQADataset(train_data, tokenizer)
val_dataset = CoQADataset(val_data, tokenizer)
test_dataset = CoQADataset(test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



In [7]:
def train(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc="Training")

    for batch in progress_bar:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass with GPT-2
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)

        # Get the loss from the output
        loss = outputs.loss
        total_loss += loss.item()

        # Backward pass and optimization step
        loss.backward()
        optimizer.step()

        # Update progress bar with current loss
        progress_bar.set_postfix({'loss': loss.item()})

    return total_loss / len(train_loader)

In [8]:
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.resize_token_embeddings(len(tokenizer))


# Set device and move model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Set optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



In [9]:
def validate(model, val_loader, device):
    model.eval()
    total_loss = 0
    progress_bar = tqdm(val_loader, desc="Validating")
    with torch.no_grad():
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass with GPT-2
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            progress_bar.set_postfix({'loss': loss.item()})

    return total_loss / len(val_loader)

In [14]:
# Test function
def test(model, test_loader, tokenizer, device):
    model.eval()
    all_predictions = []
    all_answers = []
    progress_bar = tqdm(test_loader, desc="Testing")
    with torch.no_grad():
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            # Generate answer using GPT-2 with max_new_tokens
            outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_new_tokens=50, num_beams=5, early_stopping=True)
            prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)

            # Collect the predictions
            all_predictions.append(prediction)

            # You need to decode the true answer for BLEU score calculation
            true_answer = tokenizer.decode(batch['labels'][0], skip_special_tokens=True)
            all_answers.append(true_answer)

    bleu_score = calculate_bleu(all_predictions, all_answers)
    return bleu_score


In [11]:
num_epochs = 5
best_loss = float('inf')
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train_loss = train(model, train_loader, optimizer, device)
    val_loss = validate(model, val_loader, device)
    print(f"Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), 'gpt2_qa_model.pth')
        print("Model saved!")
    else:
        print("Validation Loss Increased. Model Not Saved.")
    print("*" * 50)

Epoch 1/5


Training: 100%|██████████| 630/630 [13:33<00:00,  1.29s/it, loss=0.0819]
Validating: 100%|██████████| 135/135 [00:53<00:00,  2.52it/s, loss=0.0735]


Train Loss: 0.0996, Validation Loss: 0.0505
Model saved!
**************************************************
Epoch 2/5


Training: 100%|██████████| 630/630 [13:34<00:00,  1.29s/it, loss=0.0693]
Validating: 100%|██████████| 135/135 [00:53<00:00,  2.54it/s, loss=0.0725]


Train Loss: 0.0505, Validation Loss: 0.0499
Model saved!
**************************************************
Epoch 3/5


Training: 100%|██████████| 630/630 [13:33<00:00,  1.29s/it, loss=0.0488]
Validating: 100%|██████████| 135/135 [00:53<00:00,  2.54it/s, loss=0.07]  


Train Loss: 0.0488, Validation Loss: 0.0505
Validation Loss Increased. Model Not Saved.
**************************************************
Epoch 4/5


Training: 100%|██████████| 630/630 [13:34<00:00,  1.29s/it, loss=0.0486]
Validating: 100%|██████████| 135/135 [00:53<00:00,  2.54it/s, loss=0.0706]


Train Loss: 0.0478, Validation Loss: 0.0512
Validation Loss Increased. Model Not Saved.
**************************************************
Epoch 5/5


Training: 100%|██████████| 630/630 [13:34<00:00,  1.29s/it, loss=0.0314]
Validating: 100%|██████████| 135/135 [00:53<00:00,  2.54it/s, loss=0.0748]

Train Loss: 0.0470, Validation Loss: 0.0515
Validation Loss Increased. Model Not Saved.
**************************************************





In [15]:
# Calculate BLEU score
def calculate_bleu(predictions, references):
    bleu_scores = []
    for pred, ref in zip(predictions, references):
        bleu_scores.append(sentence_bleu([ref.split()], pred.split()))
    return sum(bleu_scores) / len(bleu_scores)


In [16]:
# Test the model
bleu_score = test(model, test_loader, tokenizer, device)
print(f"BLEU Score: {bleu_score:.4f}")


Testing: 100%|██████████| 135/135 [04:02<00:00,  1.80s/it]
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


BLEU Score: 0.1059


In [1]:
import torch
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, DistilBertForQuestionAnswering, DistilBertTokenizer, T5ForConditionalGeneration, T5Tokenizer
import gradio as gr

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def load_models():
    gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')
    gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    gpt2_model.load_state_dict(torch.load('gpt2_qa_model.pth', map_location=torch.device('cpu')))
    gpt2_model.eval()

    distilbert_model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased')
    distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    distilbert_model.load_state_dict(torch.load('distilbert_qa_model.pth', map_location=torch.device('cpu')))
    distilbert_model.eval()

    t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
    t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
    t5_model.load_state_dict(torch.load('t5_qa_model.pth', map_location=torch.device('cpu')), strict=False)
    t5_model.eval()

    return (gpt2_model, gpt2_tokenizer), (distilbert_model, distilbert_tokenizer), (t5_model, t5_tokenizer)

gpt2, distilbert, t5 = load_models()

def gpt2_inference(article, question):
    input_text = f"Context: {article}\nQuestion: {question}\nProvide a clear and concise answer:"

    inputs = gpt2[1].encode(input_text, return_tensors='pt').to(device)
    
    outputs = gpt2[0].generate(
        inputs,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        pad_token_id=gpt2[1].eos_token_id,
        num_return_sequences=1,
        top_k=50,  
        top_p=0.95  
    )
    
    generated_answer = gpt2[1].decode(outputs[0], skip_special_tokens=True)
    
    answer_start = generated_answer.find("Provide a clear and concise answer:") + len("Provide a clear and concise answer:")
    answer = generated_answer[answer_start:].strip()

    if "?" in answer:
        return "Answer not available or too brief."

    return answer


def distilbert_inference(article, question):
    inputs = distilbert[1].encode_plus(question, article, add_special_tokens=True, return_tensors='pt')
    with torch.no_grad():
        outputs = distilbert[0](**inputs)
    answer_start_scores, answer_end_scores = outputs.start_logits, outputs.end_logits
    answer_start = torch.argmax(answer_start_scores)
    answer_end = torch.argmax(answer_end_scores) + 1
    answer = distilbert[1].convert_tokens_to_string(distilbert[1].convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
    return answer.strip()


def t5_inference(article, question):
    input_text = f"question: {question} context: {article}"
    inputs = t5[1].encode(input_text, return_tensors='pt')
    outputs = t5[0].generate(inputs, max_new_tokens=50, num_beams=5, early_stopping=True)
    return t5[1].decode(outputs[0], skip_special_tokens=True)


def inference(selected_model, article, question):

    # Normalize the input
    selected_model = selected_model.strip().lower()
    print(f"Normalized Model: '{selected_model}'") 

    if selected_model == "gpt-2":
        return gpt2_inference(article, question)
    elif selected_model == "distilbert":
        return distilbert_inference(article, question)
    elif selected_model == "t5":
        return t5_inference(article, question)
    else:
        return "Model not recognized."
    

iface = gr.Interface(
    fn=inference,
    inputs=[
        gr.Dropdown(choices=["GPT-2", "DistilBERT", "T5"], label="Select Model", value="GPT-2"),  # Set default value
        gr.Textbox(lines=2, placeholder="Enter the article here"),
        gr.Textbox(lines=2, placeholder="Enter your question here")
    ],
    outputs="text",
    title="Question Answering with DistilBert, T5 and GPT2 Models",
    description="Choose a model to answer questions based on the provided article."
)

iface.launch(share=True)

Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://bfd70b7a8cf395f813.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


