In [None]:
# instalation of pytorch_lightning
!pip install git+https://github.com/PyTorchLightning/pytorch-lightning.git

Collecting git+https://github.com/PyTorchLightning/pytorch-lightning.git
  Cloning https://github.com/PyTorchLightning/pytorch-lightning.git to /tmp/pip-req-build-hm65jfu0
  Running command git clone --filter=blob:none --quiet https://github.com/PyTorchLightning/pytorch-lightning.git /tmp/pip-req-build-hm65jfu0
  Resolved https://github.com/PyTorchLightning/pytorch-lightning.git to commit 98005bbed0b7ded09a4b88c6fb6f72527a451d33
  Running command git submodule update --init --recursive -q
  Encountered 22 file(s) that should have been pointers, but weren't:
        .notebooks/course_UvA-DL/01-introduction-to-pytorch.ipynb
        .notebooks/course_UvA-DL/02-activation-functions.ipynb
        .notebooks/course_UvA-DL/03-initialization-and-optimization.ipynb
        .notebooks/course_UvA-DL/04-inception-resnet-densenet.ipynb
        .notebooks/course_UvA-DL/05-transformers-and-MH-attention.ipynb
        .notebooks/course_UvA-DL/06-graph-neural-networks.ipynb
        .notebooks/course_UvA

In [None]:
# libraries
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import pytorch_lightning as pl
import gdown
import os

In [None]:
# Download the model file

# file url
url = "https://drive.google.com/uc?id=1--UOafxtdlb4Oj7zxwuHLRLBP4IMIu6K"

# the file name
output = "ara_qa_model_epoch4.ckpt"

if os.path.exists(output):
    os.remove(output)

gdown.download(url, output, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1--UOafxtdlb4Oj7zxwuHLRLBP4IMIu6K
From (redirected): https://drive.google.com/uc?id=1--UOafxtdlb4Oj7zxwuHLRLBP4IMIu6K&confirm=t&uuid=1273c04d-d2f1-4f3a-8a5d-e5f68d61ac50
To: /content/ara_qa_model_epoch4.ckpt
100%|██████████| 4.41G/4.41G [01:21<00:00, 54.1MB/s]


'ara_qa_model_epoch4.ckpt'

In [None]:
# Model Class
class AraQAModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained("UBC-NLP/AraT5v2-base-1024", return_dict=True)

    def forward(self, input_ids, attention_mask, labels=None):
        output = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        return output.loss, output.logits

    def training_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        loss, outputs = self(input_ids, attention_mask, labels)
        self.log("train_loss", loss, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]
        loss, outputs = self(input_ids, attention_mask, labels)
        self.log("val_loss", loss, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=0.0001)
        scheduler = {
            'scheduler': ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True),
            'monitor': 'val_loss'
        }
        return [optimizer], [scheduler]

In [None]:
# Tokenizer
trained_model=AraQAModel.load_from_checkpoint("ara_qa_model_epoch4.ckpt")
trained_model.freeze()
tokenizer = T5Tokenizer.from_pretrained("UBC-NLP/AraT5v2-base-1024")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/699 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.37k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/2.35M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/8.40M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# The Function of question answering
def answer_question(context , question):
    input_encoding = tokenizer(
        question,
        context,
        max_length=512,
        padding="max_length",
        truncation="only_second",
        return_attention_mask=True,
        add_special_tokens=True,
        return_tensors="pt"
    )

    input_ids = input_encoding["input_ids"].to(trained_model.device)
    attention_mask = input_encoding["attention_mask"].to(trained_model.device)

    trained_model.eval()
    with torch.no_grad():
        generated_ids = trained_model.model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=64,
            num_beams=2,
            repetition_penalty=2.5,
            length_penalty=1.0,
            early_stopping=True
        )

    predicted_answer = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return predicted_answer

In [None]:
question =  '''
ما هي بعض الآثار الناتجة عن التغير المناخي؟
 '''
context = '''
يعتبر المناخ من أهم العوامل التي تؤثر على الحياة على سطح الأرض،
 ويشكل التغير المناخي تحديًا كبيرًا يواجه البشرية في القرن الحادي والعشرين.
 يعود السبب الرئيسي في التغير المناخي إلى النشاطات البشرية مثل حرق الوقود الأحفوري وقطع الأشجار،
 مما يؤدي إلى زيادة تركيزات غازات الدفيئة في الغلاف الجوي.
 من أبرز آثار التغير المناخي ارتفاع درجات الحرارة وذوبان الجليد القطبي وارتفاع منسوب البحار،
 بالإضافة إلى حدوث ظواهر مناخية متطرفة مثل الأعاصير والفيضانات والجفاف.
  تتطلب مواجهة التغير المناخي جهودًا جماعية من جميع دول العالم من خلال تبني سياسات صديقة للبيئة والاعتماد على مصادر الطاقة المتجددة.

   '''

In [None]:
print(answer_question(context, question))

ارتفاع درجات الحرارة وذوبان الجليد القطبي وارتفاع منسوب البحار
