In [1]:
import pandas as pd
import glob
import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


2025-11-03 19:43:10.724272: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-03 19:43:10.739536: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-03 19:43:10.757125: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-03 19:43:10.762256: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-03 19:43:10.775413: I tensorflow/core/platform/cpu_feature_guar

In [2]:

# âœ… Load Qwen model
model_name = "Qwen/Qwen2.5-3B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
llm = pipeline("text-generation", model=model, tokenizer=tokenizer)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


In [3]:

# =============================
# LLM PROMPT FUNCTION
# =============================

def ask_llm(question, options, answer):
    prompt = """
You are an evaluator responsible for checking the quality of multiple-choice questions (MCQs).

Task: Assess the following dataset entry:

- Question: {question}
- Options: {options}
- Correct Answer: {answer}

Evaluation Criteria:
1. Is the question factually accurate and logically correct?
2. Is the labeled correct answer actually correct?
3. Does the question function well as a proper MCQ? (Clear wording, non-overlapping options, etc.)

Response Format (strictly follow this):
Score: 1 or 0
Mistake: (brief explanation or "no mistake")
Improvement: (suggest correction or "no correction needed")

Few-Shot Examples:

Example 1:
Question: Where did Boli Khela originate?
Options: A) India, B) Bangladesh, C) Nepal, D) Sri Lanka
Correct Answer: B) Bangladesh
Expected Output:
Score: 1
Mistake: no mistake
Improvement: no correction needed

Example 2:
Question: What is the primary objective in Boli Khela?
Options: A) Running, B) Wrestling, C) Throwing a ball, D) Kicking a ball
Correct Answer: B) Wrestling
Expected Output:
Score: 1
Mistake: no mistake
Improvement: no correction needed

Example 3:
Question: When is Boli Khela traditionally played?
Options: A) Winter, B) Summer, C) During festivals, D) Spring
Correct Answer: C) During festivals
Expected Output:
Score: 1
Mistake: no mistake
Improvement: no correction needed
"""


    out = llm(prompt, max_length=350, do_sample=False)
    return out[0]["generated_text"].strip()


In [4]:
base_path = "/DATA/rohan_kirti/niladri/pks/"  # main folder containing all countries
output_file = "/DATA/rohan_kirti/niladri/pks/final_llm_results.csv"
# =============================
# CONFIGURATION
# =============================



# âœ… Regional language column mapping (update if needed)
COL_QUESTION = "Question"
COL_OPTIONS  = "Options"
COL_ANSWER   = "Answer"


In [5]:
# âœ… If output file doesnâ€™t exist â†’ write header
import csv
if not os.path.exists(output_file):
    with open(output_file, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.writer(f)
        writer.writerow(["country", "file", "sheet",
                         "question", "option", "answer", "llm_output"])

# âœ… Load already processed rows (to skip on restart)
processed = set()
df_existing = pd.read_csv(output_file, encoding="utf-8-sig")
for i, row in df_existing.iterrows():
    processed.add((row["file"], row["sheet"], row["question"]))


In [6]:

# =============================
# MAIN PROCESS
# =============================
i=1
excel_files = glob.glob(os.path.join(base_path, "**/*.xlsx"), recursive=True)

for file in excel_files:
    if i>11:
        break
    i+=1
    country = os.path.basename(os.path.dirname(file))
    xls = pd.ExcelFile(file)
    print(f"\nðŸ“Œ Processing: {file}")

    for sheet in xls.sheet_names:
        df = pd.read_excel(file, sheet_name=sheet)

        if COL_QUESTION not in df.columns:
            continue

        df = df[[COL_QUESTION, COL_OPTIONS, COL_ANSWER]].dropna(how="all")

        for idx, row in df.iterrows():
            key = (os.path.basename(file), sheet, row[COL_QUESTION])

            if key in processed:
                continue

            q, option, ans = row[COL_QUESTION], row[COL_OPTIONS], row[COL_ANSWER]

            try:
                llm_response = ask_llm(q, option, ans)
            except Exception as e:
                llm_response = f"ERROR: {e}"

            # âœ… WRITE IMMEDIATELY to CSV
            with open(output_file, "a", newline="", encoding="utf-8-sig") as f:
                writer = csv.writer(f)
                writer.writerow([country, os.path.basename(file), sheet,
                                 q, option, ans, llm_response])

            processed.add(key)

            print(f"âœ… Saved â†’ {country} | {sheet} | Row {idx}")
            # time.sleep(0.3)  # prevent overload

print("\nâœ… COMPLETE!")
print("ðŸ“„ Live-updating CSV:", output_file)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Both `max_new_tokens` (=256) and `max_length`(=350) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



ðŸ“Œ Processing: /DATA/rohan_kirti/niladri/pks/Bangladesh_HBQ.xlsx


: 

In [None]:
# =============================
# SAVE RESULTS
# =============================

df_out = pd.DataFrame(processed_data)
df_out.to_csv(output_file, index=False, encoding="utf-8-sig")

print("\nâœ… DONE! Results saved to:", output_file)
print(f"âœ… Total responses generated: {len(df_out)}")
