In [None]:
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModelForCausalLM
import torch
import huggingface_hub

In [None]:
prompts_path = "/content/drive/MyDrive/Data Mining Project/Train/Authors_Train.xlsx"
arab_options = "/content/drive/MyDrive/Data Mining Project/Processed Entities/processed_arab_authors.csv"
western_options = "/content/drive/MyDrive/Data Mining Project/Processed Entities/processed_western_authors.csv"

In [None]:
def get_top_5_predictions(prompts_file, options_file1, options_file2, model_name):

    prompts_df = pd.read_excel(prompts_file)
    options_df1 = pd.read_csv(options_file1)
    options_df2 = pd.read_csv(options_file2)

    options = list(options_df1["Entity"]) + list(options_df2["Entity"])


    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map = 'auto', trust_remote_code=True)


    for index, row in prompts_df.iterrows():
        prompt_template = row["Prompt"]
        print(f"Prompt {index + 1}: {prompt_template}")

        option_scores = {}
        for option in options:
            completed_prompt = f"{prompt_template} {option}"
            inputs = tokenizer(completed_prompt, return_tensors="pt")
            with torch.no_grad():
                outputs = model(**inputs, labels=inputs.input_ids)
                loss = outputs.loss.item()  # Negative log-likelihood loss
            option_scores[option] = -loss


        sorted_options = sorted(option_scores.items(), key=lambda x: x[1], reverse=True)


        top_5 = sorted_options[:5]
        print("\nTop 5 Predictions with Scores:")
        for rank, (option, score) in enumerate(top_5, start=1):
            print(f"{rank}. {option}: {score:.4f}")
        print("-" * 50)

In [None]:
model_path = "inceptionai/jais-adapted-7b"
# huggingface_hub.login()

In [None]:
get_top_5_predictions(prompts_path, arab_options, western_options, model_path)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]



Prompt 1: يكتب عن العرض العسكري للقوات المسلحة اليمنية  الكاتب العربي 

Top 5 Predictions with Scores:
1. محمد متولي الشعراوي: -4.7709
2. عبد الله النفيسي: -4.9279
3. كاظم الداغستاني: -5.0194
4. علي فقندش: -5.1287
5. أحمد جمال الدين موسى: -5.1321
--------------------------------------------------
Prompt 2: مهرجان كتارا للرواية العربية يسلط الضوء على حياة وإرث الكاتب العربي الفاشل 

Top 5 Predictions with Scores:
1. محمد متولي الشعراوي: -4.4753
2. عبد الله النفيسي: -4.6452
3. عبد الله البرغوثي: -4.6729
4. مارون بغدادي: -4.7028
5. كاظم الداغستاني: -4.7148
--------------------------------------------------
Prompt 3: مهرجان كتارا للرواية العربية يسلط الضوء على حياة الكاتب العربي 
