In [None]:
!wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf

In [None]:
from langchain.chains import LLMChain
from langchain import PromptTemplate
from langchain.llms import LlamaCpp
from datasets import load_dataset
from json_fix import save_processed_data, load_json_safely
import re

llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=35,                # Tối ưu cho RTX 4060 8GB (thử nghiệm 32-38)
    max_tokens=2048,
    n_ctx=2048,
    n_batch=2048,                   # Tối đa hoá batch size để GPU xử lý song song
    n_threads=8,                  # Số physical cores (6P + 8E)
    n_threads_batch=10,             # Tập trung vào 10 threads cho E-cores
    seed=42,
    verbose=False,
    device="cuda",
    flash_attn=True,                # Bắt buộc dùng Flash Attention
    tensor_split="",
    main_gpu=0,
    mmap=True,                      # Tăng tốc đọc model
    mul_mat_q=True,                 # Tối ưu phép nhân ma trận
    offload_kqv=False,              # Tắt offload để giảm latency
    low_vram=False,                 # Tắt chế độ tiết kiệm VRAM
    stop=["</answer>"],
    temperature=0.3,
    top_k=20,                       # Giới hạn không gian tìm kiếm
    repeat_penalty=1.05
)

template = """
<|user|>
You are a math expert. Please answer the following question by providing a detailed, step-by-step explanation, as if you were explaining to a 6-year-old. All answer must be in Vietnamese. Use clear, simple Vietnamese and appropriate mathematical notation.

A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
The assistant first thinks about the reasoning process in the mind and then provides the user
with the answer. The reasoning process and answer are enclosed within <think> </think> and
<answer> </answer> tags, respectively, i.e., <think> reasoning process here </think>
<answer> answer here </answer>. User: {input}.
<|assistant|>
"""

prompt_template = PromptTemplate(input_variables=["input"], template=template)

llm_chain = LLMChain(llm=llm, prompt=prompt_template)

data = load_dataset("5CD-AI/Vietnamese-395k-meta-math-MetaMathQA-gg-translated")
data = data["train"].shuffle(seed=42).select(range(150))
data = data

eval_template = """
<|user|>
As a math teacher, verify if the *student's reasoning* and *answer* (in <answer> tags) match the *correct answer*. Respond only Yes or No.

*Question:* {question}

*Student's reasoning and answer:*
{student_reasoning}

*Correct answer:* {correct_answer}
<|assistant|>
"""

eval_prompt = PromptTemplate(
    input_variables=["question", "student_reasoning", "correct_answer"],
    template=eval_template
)

eval_chain = LLMChain(llm=llm, prompt=eval_prompt)

def evaluate_reasoning(example):
    try:
        # Fix 1: Extract text from reasoning response
        student_reasoning_text = example["reasoning"]["text"] if isinstance(example["reasoning"], dict) else str(example["reasoning"])

        # Fix 2: Handle potential API errors
        eval_response = eval_chain.invoke({
            "question": example["query_vi"],
            "student_reasoning": student_reasoning_text,
            "correct_answer": example["response_vi"]
        })

        # Fix 3: Check for English response
        judgment = eval_response["text"].strip().lower()
        example["is_correct"] = "Có" if "yes" in judgment else "Không"

    except Exception as e:
        print(f"Error processing example: {e}")
        example["is_correct"] = "Không"  # Default to incorrect on error

    return example

# Fix 4: Properly extract reasoning text in initial processing
def reasoning(example):
    try:
        response = llm_chain.invoke({"input": example["query_vi"]})
        example["reasoning"] = response["text"]  # Store just the text
    except Exception as e:
        print(f"Error generating reasoning: {e}")
        example["reasoning"] = ""
    return example

data = data.map(reasoning)
data = data.map(evaluate_reasoning)

                n_threads_batch was transferred to model_kwargs.
                Please confirm that n_threads_batch is what you intended.
                device was transferred to model_kwargs.
                Please confirm that device is what you intended.
                flash_attn was transferred to model_kwargs.
                Please confirm that flash_attn is what you intended.
                tensor_split was transferred to model_kwargs.
                Please confirm that tensor_split is what you intended.
                main_gpu was transferred to model_kwargs.
                Please confirm that main_gpu is what you intended.
                mmap was transferred to model_kwargs.
                Please confirm that mmap is what you intended.
                mul_mat_q was transferred to model_kwargs.
                Please confirm that mul_mat_q is what you intended.
                offload_kqv was transferred to model_kwargs.
                Please confirm that offload_kqv 

Error processing example: Requested tokens (2170) exceed context window of 2048


Map:   6%|▌         | 9/150 [26:22<6:29:07, 165.59s/ examples]

Error processing example: Requested tokens (3394) exceed context window of 2048


Map:  19%|█▉        | 29/150 [1:28:39<9:18:08, 276.76s/ examples]

Error processing example: Requested tokens (2208) exceed context window of 2048


Map:  25%|██▍       | 37/150 [1:56:38<6:31:13, 207.73s/ examples]

Error processing example: Requested tokens (2557) exceed context window of 2048
Error processing example: Requested tokens (2301) exceed context window of 2048


Map:  27%|██▋       | 41/150 [2:10:30<5:46:56, 190.98s/ examples]


KeyboardInterrupt: 

In [8]:
from json_fix import save_processed_data, load_json_safely

processed_data = data.to_list()
success = save_processed_data(processed_data)
try:
    data = load_json_safely("processed_data.json")
except json.JSONDecodeError as e:
    print(f"Error reading JSON: {str(e)}")

Dataset saved successfully with 150 items


In [21]:
from json_fix import save_processed_data, load_json_safely
import json
from tqdm import tqdm

def extract_answer(reasoning):
    """Extract answer from reasoning text between <answer> tags"""
    try:
        if isinstance(reasoning, dict) and "text" in reasoning:
            reasoning = reasoning["text"]

        start_idx = reasoning.lower().find("<answer>")
        if start_idx == -1:
            return ""
        answer = reasoning[start_idx + len("<answer>"):].strip()
        return answer
    except (AttributeError, TypeError):
        return ""

def evaluate_reasoning(item):
    """Evaluate reasoning for a single item"""
    try:
        # Extract text from reasoning response
        student_reasoning_text = item["reasoning"]["text"] if isinstance(item["reasoning"], dict) else str(item["reasoning"])

        # Handle potential API errors
        eval_response = eval_chain.invoke({
            "question": item["query_vi"],
            "student_reasoning": student_reasoning_text,
            "correct_answer": item["response_vi"]
        })

        # Check for English response
        judgment = eval_response["text"].strip().lower()
        item["is_correct"] = "Có" if "yes" in judgment else "Không"

    except Exception as e:
        print(f"Error processing item: {e}")
        item["is_correct"] = "Không"  # Default to incorrect on error

    return item

def process_dataset(data_list):
    """Process a list of data items"""
    try:
        # First save the processed data
        success = save_processed_data(data_list)
        if not success:
            raise Exception("Failed to save processed data")

        # Load the saved data
        loaded_data = load_json_safely("processed_data.json")
        if not isinstance(loaded_data, dict) or "data" not in loaded_data:
            raise ValueError("Invalid data format after loading")

        # Process each item in the data
        processed_items = []
        for item in tqdm(loaded_data["data"]):
            # Extract answer
            item["extracted_answer"] = extract_answer(item["reasoning"])
            # Evaluate reasoning
            processed_item = evaluate_reasoning(item)
            processed_items.append(processed_item)

        return processed_items

    except Exception as e:
        print(f"Error in data processing pipeline: {e}")
        return None

# Usage example:
# processed_data = data.to_list()  # If working with pandas DataFrame
# Or if data is already a list:
# processed_data = data
results = process_dataset(processed_data)

Dataset saved successfully with 150 items


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [03:06<?, ?it/s]


KeyboardInterrupt: 