# Buzuzu-Mavi Challenge - Inference

**You will need a Huggingface account to be able to access the datasets and the models**

Make sure you have generated an HF_TOKEN and added it to your notebook.

🚨 NB: In order to access the Inkuba model, make sure you have requested and granted access to the Gated InkubaLM model on huggingface.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from typing import List
from pathlib import Path
from copy import deepcopy
import time
import csv

from huggingface_hub import login
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset, DatasetDict

In [3]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
os.environ["HF_TOKEN"] = ""
login(token=os.environ["HF_TOKEN"])

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
output_path = Path("./output")
data_path = Path("./data")

In [None]:
model_configs = {
    "50M": {
        "model_weights_path": "./weights/vulavula_inkuba_instruct_tokenizer8k_pruned_v4.pth",
        "custom_config": {
            # if `custom_tokenizer_path` is not specified then it will use the default tokenizer from the model
            "custom_tokenizer_path": "./tokenizer",
            "hidden_size": 1024,
            "intermediate_size": 2816,
            "max_position_embeddings": 1024,
            "tie_word_embeddings": True,
        }
    },
    "100M": {
        "model_weights_path": "./weights/vulavula_inkuba_instruct_tokenizer60k_pruned.pth",
        "custom_config": {
            # if `custom_tokenizer_path` is not specified then it will use the default tokenizer from the model
            "hidden_size": 1024,
            "intermediate_size": 2816,
            "max_position_embeddings": 1024,
            "tie_word_embeddings": True,
        }
    },
    "40M": {
        "model_weights_path": "./weights/vulavula_inkuba_instruct_tokenizer8k_pruned_6layer_2stage.pth",
        "custom_config": {
            # if `custom_tokenizer_path` is not specified then it will use the default tokenizer from the model
            "custom_tokenizer_path": "./tokenizer",
            "hidden_size": 1024,
            "intermediate_size": 2816,
            "max_position_embeddings": 1024,
            "tie_word_embeddings": True,
            "num_hidden_layers": 6,
        }
    },
}

In [7]:
model_type = "40M"

In [8]:
base_model_name = "lelapa/InkubaLM-0.4B"
model_weights_path = model_configs[model_type]["model_weights_path"]
custom_config = model_configs[model_type]["custom_config"]

In [10]:
os.makedirs(output_path, exist_ok=True)

In [11]:
def run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    task_instruction,  # this needs to be commented out because we are using instruction in file
    dataset,
    csv_file_path,
    custom_instruct=False,
    sample_size=4,
    max_new_tokens=100,
    seed=42,
    do_sample=True,
    min_length=None,
    use_cache=True,
    top_p=1.0,
    temperature=0.5,
    top_k=5,
    repetition_penalty=1.2,
    length_penalty=1,
    **kwargs,
):
    """
    This function runs inference for the model. WHat is important about this function is ensuring that outputs are recoded as the logliklihood.
    Which is important for evaluation

    Inputs
    model: The model that is used to generate responses
    task_instruction: if a custom instruction is to be used for inference it is specified here
    sample_size: change this to determine how many samples to generate results for, useful for testing
    custom_instruct: If you want to use an instruction that is different to what is currently available in the dataset
    BASE_PROMPT: change this to change the base prompt
    max_new_tokens: this is the number of tokens that are generated, for sentiment and XNLI we expect one word answers tso the number of tokens can be smaller, for translation though the default is 100

    Outputs
    CSV file with task descriptions and inference logliklihood outputs

    """
    model.eval()
    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(
            [
                "ID",
                "Instruction",
                "Input Text",
                "Response",
                "Log-Likelihood",
                "Targets",
                "Task",
                "Langs",
            ]
        )

        for i, item in enumerate(dataset):
            if i >= sample_size:
                break

            if not custom_instruct:
                instruction = item["instruction"]
            else:
                instruction = task_instruction
            # print(instruction)
            input_text = item["inputs"]
            labels = item["targets"]
            langs = item["langs"]
            try:
                task = item["task"]
            except:
                task = "xnli"
            identity = item["ID"]

            # user_prompt = BASE_PROMPT.format(f"{instruction}\n{input_text}")
            user_prompt = BASE_PROMPT.format(instruction, input_text)
            # print(user_prompt)
            batch = tokenizer(user_prompt, return_tensors="pt")
            batch = {k: v.to(model.device) for k, v in batch.items()}

            start = time.perf_counter()

            with torch.no_grad():
                outputs = model.generate(
                    **batch,
                    max_new_tokens=max_new_tokens,
                    do_sample=do_sample,
                    top_p=top_p,
                    temperature=temperature,
                    min_length=min_length,
                    use_cache=use_cache,
                    top_k=top_k,
                    repetition_penalty=repetition_penalty,
                    length_penalty=length_penalty,
                    **kwargs,
                )

            # e2e_inference_time = (time.perf_counter() - start) * 1000
            # print(f"Inference time: {e2e_inference_time} ms")

            output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)[
                len(user_prompt) :
            ]
            # output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # print("<START OF OUTPUT>", output_text, "<END OF OUTPUT>")

            if task != "mmt":
                with torch.no_grad():
                    logits = model(
                        **batch
                    ).logits  # Shape: [batch_size, seq_length, vocab_size]
                    log_probs = torch.nn.functional.log_softmax(
                        logits, dim=-1
                    )  # Shape: [batch_size, seq_length, vocab_size]
                    # print(log_probs)

                    # compute the log-likelihood of the target tokens
                    t_labels = (
                        torch.tensor([0, 1, 2])
                        .unsqueeze(0)
                        .unsqueeze(0)
                        .expand(
                            batch["input_ids"].size(0), batch["input_ids"].size(1), -1
                        )
                        .to(model.device)
                    )
                    # print(t_labels)

                    # Gathering log-likelihoods for the labels
                    log_likelihoods_per_class = log_probs.gather(
                        2, t_labels
                    )  # Shape: [batch_size, seq_length, 3]
                    # print(log_likelihoods_per_class)

                    # sum or average over the sequence to get a final score
                    log_likelihoods_per_class = log_likelihoods_per_class.mean(
                        dim=1
                    )  # Shape: [batch_size, 3]
                    # print(log_likelihoods_per_class)
            else:
                log_likelihoods_per_class = []
            # print(instruction)
            # print("---\n")
            writer.writerow(
                [
                    identity,
                    instruction,
                    input_text,
                    output_text,
                    log_likelihoods_per_class,
                    labels,
                    task,
                    langs,
                ]
            )  # Save ground truth

def process_likelihood(likelihood_str: str) -> List[float]:
    """
    Process a likelihood string to clean and convert it to a list of floats.
    """
    # Clean the string to remove unwanted characters
    clean_str = (
        likelihood_str.replace("tensor(", "").replace(")", "").strip()
        .replace("[[", "").replace("]]", "").strip()
        .replace(" device='cuda:0'", "").replace(" dtype=torch.float16", "").strip()
        .replace("tensor", "").strip()
    )

    # Remove any empty strings caused by extra commas
    clean_str = clean_str.replace(",,", ",")  # Remove duplicate commas if they exist

    # Convert to a list of floats
    likelihood = [
        float(x) for x in clean_str.split(",") if x.strip()
    ]  # Ensure non-empty strings are converted
    return likelihood

def create_submission(output_path, test_flag: bool):
    """
    Creates submission files based on the provided test_flag.

    Args:
    test_flag (bool): If True, creates a test submission file; otherwise, creates a final submission file.
    """
    if test_flag:
        try:
            df1 = pd.read_csv(os.path.join(
                output_path,
                "hau_sent_prediction_dev.csv")
                 )
            df2 = pd.read_csv(os.path.join(
                output_path,
                "swa_sent_prediction_dev.csv")
            )
            df3 = pd.read_csv(os.path.join(
                output_path,
                "hau_mt_prediction_dev.csv")
                             )
            df4 = pd.read_csv(os.path.join(
                output_path,
                "swa_mt_prediction_dev.csv"))
            df5 = pd.read_csv(os.path.join(
                output_path,
                "hau_xnli_prediction_dev.csv"))
            df6 = pd.read_csv(os.path.join(
                output_path,
                "swa_xnli_prediction_dev.csv"))
        except FileNotFoundError as e:
            print(
                "Seems you have not completed all the tasks, please complete all the tasks before attempting to create your submission file"
            )
            raise e
    else:
        filename = "submission.csv"
        try:
            df1 = pd.read_csv(os.path.join(
                output_path,
                "hau_sent_prediction.csv"))
            df2 = pd.read_csv(os.path.join(
                output_path,
                "swa_sent_prediction.csv"))
            df3 = pd.read_csv(os.path.join(
                output_path,
                "hau_mt_prediction.csv"))
            df4 = pd.read_csv(os.path.join(
                output_path,
                "swa_mt_prediction.csv"))
            df5 = pd.read_csv(os.path.join(
                output_path,
                "hau_xnli_prediction.csv"))
            df6 = pd.read_csv(os.path.join(
                output_path,
                "swa_xnli_prediction.csv"))
        except FileNotFoundError as e:
            print(
                "Seems you have not completed all the tasks, please complete all the tasks before attempting to create your submission file"
            )
            raise e

    # Combine and process data
    resmt = pd.concat([df3, df4], ignore_index=True)
    res_log = pd.concat([df1, df2, df5, df6], ignore_index=True)
    res_log.drop(columns=["Response"], inplace=True)
    res_log.rename(columns={"Log-Likelihood": "Response"}, inplace=True)
    res = pd.concat([res_log, resmt], ignore_index=True)

    def process_row(row):
        if "xnli" in row["ID"] or "sent" in row["ID"]:
            likelihoods = process_likelihood(row["Response"])
            predicted_label = np.argmax(likelihoods)
            return predicted_label
        return row["Response"]  # Default for other cases

    # Update the Response column in-place
    res["Response"] = res.apply(process_row, axis=1)

    if test_flag:
        filename = os.path.join(
                output_path,
                "submission_test.csv")
        # Save the submission file
        submission = res[["ID", "Response", "Targets"]]
        submission.to_csv(filename, index=False)
    else:
        filename = os.path.join(
                output_path,
                "submission.csv")
        # Save the submission file
        submission = res[["ID", "Response"]]
        submission.to_csv(filename, index=False)
    return submission

## Load the model

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if custom_config is not None:
    tokenizer_path = custom_config.get("custom_tokenizer_path", base_model_name)
else:
    tokenizer_path = base_model_name
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    return_dict=True,
    low_cpu_mem_usage=True,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float32,
)

# update the model config if useing a custom config or custom tokenizer
if custom_config is not None:
    config_prn = deepcopy(model.config)
    config_prn.vocab_size = tokenizer.vocab_size
    config_prn.hidden_size = custom_config.get("hidden_size", config_prn.hidden_size)
    config_prn.intermediate_size = custom_config.get("intermediate_size", config_prn.intermediate_size)
    config_prn.max_position_embeddings = custom_config.get("max_position_embeddings", config_prn.max_position_embeddings)
    config_prn.tie_word_embeddings = custom_config.get("tie_word_embeddings", config_prn.tie_word_embeddings)
    config_prn.num_hidden_layers = custom_config.get("num_hidden_layers", config_prn.num_hidden_layers)
    model = AutoModelForCausalLM.from_config(config_prn).to(torch.float32).to(device)
    model.load_state_dict(torch.load(model_weights_path, map_location=device))

model.eval();
print(f"memory (GB): {model.get_memory_footprint() / 1024**3:.2f}")
print(f"num params:  {model.num_parameters():,.0f}")

  warn(
2025-04-09 08:24:46.780462: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-09 08:24:46.780514: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-09 08:24:46.781654: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-09 08:24:46.787692: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
VulavulaLlamaForCausalLM has generative capab

memory (GB): 0.16
num params:  42,087,424


In [13]:
BASE_PROMPT = (
    "Below is an instruction that describes a task. "
    "Write a response that appropriately completes the request."
    "\n\n### Instruction:\n{}"
    "\n\n### Input:\n{}"
    "\n\n### Response:\n"
)
print(BASE_PROMPT.format("test1", "test2"))

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
test1

### Input:
test2

### Response:



In [14]:
def llm_classify(text, classes):
    # Encode classes
    tokens = [tokenizer.tokenize(f"_{word}")[1:] for word in classes]
    label_token_ids = [tokenizer.convert_tokens_to_ids(t) for t in tokens]

    inputs = tokenizer(text, return_tensors="pt").to(model.device)
    
    # Calculate log probs for each label
    scores = {}
    for sentiment, tokens in zip(classes, label_token_ids):
        # print(sentiment)
        input_ids = inputs.input_ids.clone()
        total_logprob = 0.0
        for token in tokens:
            outputs = model(input_ids)
            logits = outputs.logits[:, -1, :]
            log_prob = torch.log_softmax(logits, dim=-1)[0, token].item()
            # print(token, log_prob)
            total_logprob += log_prob
            input_ids = torch.cat([input_ids, torch.tensor([[token]]).to(model.device)], dim=1)
        scores[sentiment] = total_logprob
    
    predicted = max(scores, key=scores.get)
    scores = {k: np.exp(v) for k, v in scores.items()}
    return predicted, scores

In [15]:
# llm_classify(user_prompt, ["Chanya", "Wastani", "Hasi"])

In [16]:
# from functools import partial

# f = partial(llm_classify, classes=["Chanya", "Wastani", "Hasi"])
# res = pd.read_csv("output/swa_sent_prediction_dev.csv")

# res["result2"] = [f(BASE_PROMPT.format(row["Instruction"], row["Input Text"]))[0] for _, row in res.iterrows()]
# res

In [17]:
# from functools import partial

# f = partial(llm_classify, classes=['entailment', 'neutral', 'contradiction'])
# res = pd.read_csv("output/swa_xnli_prediction_dev.csv")

# res["result2"] = [f(BASE_PROMPT.format(row["Instruction"], row["Input Text"]))[0] for _, row in res.iterrows()]
# res

## Run inference on test data

Generate sentiment files

In [18]:
# Load the Swahili dataset from a local CSV file
sentiment_test_df = pd.read_csv(
    os.path.join(data_path, "SentimentTest.csv")
)

sentiment_test_df["instruction"] = sentiment_test_df["instruction"].str.replace("Neutral:", "Tsaka-tsaki:")

hau_dataset = Dataset.from_pandas(
    sentiment_test_df[sentiment_test_df['langs']=='hausa']
)
swa_dataset = Dataset.from_pandas(
    sentiment_test_df[sentiment_test_df['langs']=='swahili']
)

# If you need a DatasetDict to mimic the Hugging Face structure
dataset_dict = DatasetDict({
    "swahili": swa_dataset,
    "hausa": hau_dataset
})

# Print to verify
print(swa_dataset)
print(hau_dataset)

Dataset({
    features: ['ID', 'task', 'langs', 'data_source', 'instruction', 'inputs', 'targets', '__index_level_0__'],
    num_rows: 150
})
Dataset({
    features: ['ID', 'task', 'langs', 'data_source', 'instruction', 'inputs', 'targets', '__index_level_0__'],
    num_rows: 150
})


In [19]:
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=swa_dataset,
    csv_file_path=os.path.join(
        output_path,
        "swa_sent_prediction.csv"
    ),
    max_new_tokens=15,
    custom_instruct=False,
    do_sample=False, temperature=None, top_k=None,
    repetition_penalty=None, length_penalty=None
)
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=hau_dataset,
    csv_file_path=os.path.join(
        output_path,
        "hau_sent_prediction.csv"),
    max_new_tokens=15,
    custom_instruct=False,
    do_sample=False, temperature=None, top_k=None,
    repetition_penalty=None, length_penalty=None
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

In [20]:
sent2eng = {
    "Kyakkyawa": "positive", "Korau": "negative", "Tsaka-tsaki": "neutral",
    "Chanya": "positive", "Hasi": "negative", "Wastani": "neutral",
}
engsent2idx = {"positive": 0, "neutral": 1, "negative": 2}
sent2idx = {k: engsent2idx[v] for k, v in sent2eng.items()}

def _to_tensor(x):
    t = torch.zeros((1, 3))
    t[0, x] = 1.
    return str(t)

fps = [output_path/"hau_sent_prediction.csv", output_path/"swa_sent_prediction.csv"]
for fp in fps:
    df = pd.read_csv(fp)
    df["pred"] = df["Response"].str.strip().map(sent2idx)
    df["Log-Likelihood"] = df["pred"].apply(_to_tensor)
    df.to_csv(fp)

Generate AfriXnli files

In [21]:
# Load the Swahili dataset from a local CSV file
xnli_test_df = pd.read_csv(
    os.path.join(data_path, "XNLITest.csv")
)

instruction = (
    "You will be given two {lang} sentences. Your job is to  is to predict textual entailment. "
    "In other words, does sentence A imply/contradict/neither sentence B. "
    "Your response must be one word: entailment, contradiction, or neutral."
)
xnli_test_df["instruction"] = xnli_test_df["langs"].apply(
    lambda l: instruction.format(lang="Swahili" if l=="swa" else "Hausa")
)
xnli_test_df["inputs"] = xnli_test_df.apply(lambda row: f"Sentence A: {row['premise']}\nSentence B: {row['inputs']}", axis=1)

hau_dataset = Dataset.from_pandas(
    xnli_test_df[xnli_test_df['langs']=='hau']
)
swa_dataset = Dataset.from_pandas(
    xnli_test_df[xnli_test_df['langs']=='swa']
)

# If you need a DatasetDict to mimic the Hugging Face structure
dataset_dict = DatasetDict({
    "swahili": swa_dataset,
    "hausa": hau_dataset
})

# Print to verify
print(swa_dataset)
print(hau_dataset)

Dataset({
    features: ['ID', 'langs', 'premise', 'inputs', 'instruction', 'targets', '__index_level_0__'],
    num_rows: 150
})
Dataset({
    features: ['ID', 'langs', 'premise', 'inputs', 'instruction', 'targets', '__index_level_0__'],
    num_rows: 150
})


In [22]:
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=hau_dataset,
    csv_file_path=os.path.join(
        output_path,
        "hau_xnli_prediction.csv"),
    max_new_tokens=15,
    custom_instruct=False,
    do_sample=False, temperature=None, top_k=None,
    repetition_penalty=None, length_penalty=None
)
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=swa_dataset,
    csv_file_path=os.path.join(
        output_path,
        "swa_xnli_prediction.csv"),
    max_new_tokens=15,
    custom_instruct=False,
    do_sample=False, temperature=None, top_k=None,
    repetition_penalty=None, length_penalty=None
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

In [23]:
label_map = {0: 'entailment', 1: 'neutral', 2: 'contradiction'}
inv_label_map = {v: k for k, v in label_map.items()}

def _to_tensor(x):
    t = torch.zeros((1, 3))
    t[0, x] = 1.
    return str(t)

fps = [output_path/"hau_xnli_prediction.csv", output_path/"swa_xnli_prediction.csv"]
for fp in fps:
    df = pd.read_csv(fp)
    df["pred"] = df["Response"].str.strip().map(inv_label_map)
    df["Log-Likelihood"] = df["pred"].apply(_to_tensor)
    df.to_csv(fp)

Generate MT files

In [24]:
# Load the Swahili dataset from a local CSV file
xnli_test_df = pd.read_csv(
    os.path.join(data_path, "MTTest.csv")
)

instr_hau = 'Translate the following from English into Hausa.'
instr_swa = 'Translate the following from English into Swahili.'
xnli_test_df["instruction"] = xnli_test_df["langs"].apply(
    lambda x: instr_hau if x == "eng-hau" else instr_swa
)

hau_dataset = Dataset.from_pandas(
    xnli_test_df[xnli_test_df['langs']=='eng-hau']
)
swa_dataset = Dataset.from_pandas(
    xnli_test_df[xnli_test_df['langs']=='eng-swa']
)

# If you need a DatasetDict to mimic the Hugging Face structure
dataset_dict = DatasetDict({
    "swahili": swa_dataset,
    "hausa": hau_dataset
})

# Print to verify
print(swa_dataset)
print(hau_dataset)

Dataset({
    features: ['ID', 'task', 'langs', 'data_source', 'instruction', 'inputs', 'targets', '__index_level_0__'],
    num_rows: 150
})
Dataset({
    features: ['ID', 'task', 'langs', 'data_source', 'instruction', 'inputs', 'targets', '__index_level_0__'],
    num_rows: 150
})


In [25]:
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=hau_dataset,
    csv_file_path=os.path.join(
        output_path,
        "hau_mt_prediction.csv"),
    custom_instruct=False,
    num_beams=4,
    do_sample=False, temperature=None, top_k=None,
)
run_model_function(
    model,
    tokenizer,
    BASE_PROMPT,
    sample_size=150,
    task_instruction=None,
    dataset=swa_dataset,
    csv_file_path=os.path.join(
        output_path,
        "swa_mt_prediction.csv"),
    custom_instruct=False,
    num_beams=4,
    do_sample=False, temperature=None, top_k=None,
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

Now create the submission file:

In [26]:
create_submission(output_path=output_path, test_flag=False)

Unnamed: 0,ID,Response
0,ID_f3c74c7b_sentiment_test__hausa,1
1,ID_aad19dbf_sentiment_test__hausa,0
2,ID_f6de0381_sentiment_test__hausa,2
3,ID_885caf5c_sentiment_test__hausa,2
4,ID_7b906b9a_sentiment_test__hausa,0
...,...,...
895,ID_d054836f_test__mt_eng-swa,nilipiga kura siku ya kwanza
896,ID_863222b7_test__mt_eng-swa,tafadhali usisahau kuchangia.
897,ID_c5df2cca_test__mt_eng-swa,unadhani kuna faida yoyote ya kucheza kwenye c...
898,ID_eda1ff7d_test__mt_eng-swa,nini kama anaweza kubadilisha?
