In [3]:
# !pip install transformers
!pip install -U bitsandbytes



In [None]:
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from tqdm import tqdm  # Import tqdm for progress 

try:
    import bitsandbytes as bnb
except ImportError:
    raise ImportError(
        "The `bitsandbytes` library is required for 4-bit quantization. "
        "Please install it using: `pip install -U bitsandbytes`"
    )

# Load the dataset
df = pd.read_csv("../../Dataset/MATH/Pre_processed_test/filtered_test_serial.csv")

# Model name
model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = torch.device('mps')/

# Send you tensor to GPU
# my_tensor = my_tensor.to(device)
print(f"Using device: {device}")

# 4-bit quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load the model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quantization_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to get the model's solution for a batch of questions
def get_model_solution_batch(questions, batch_size=32):
    model_solutions = []
    for i in tqdm(range(0, len(questions), batch_size), desc="Processing batches"):
        batch_questions = questions[i:i + batch_size]
        
        # Prepare messages for the batch
        messages_batch = [
            [
                {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}. The answer can be integer or fraction or decimal, anything is possible"},
                {"role": "user", "content": question}
            ]
            for question in batch_questions
        ]
        
        # Tokenize the batch
        texts = [tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]
        model_inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(device)
        
        # Generate responses for the batch
        with torch.no_grad():
            generated_ids = model.generate(
                **model_inputs,
                max_new_tokens=512
            )
        
        # Decode the generated responses
        responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        model_solutions.extend(responses)
    
    return model_solutions

# Apply the function to the dataset in batches
batch_size = 32
df["model_solution"] = get_model_solution_batch(df["question"].tolist(), batch_size=batch_size)

# Save the results to a new CSV file
output_answer_csv = "math_m1_answers.csv"
df.to_csv(output_answer_csv, index=False)
print(f"Model answers saved to: {output_answer_csv}")

Using device: mps


ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [None]:
# !pip3 install torch torchvision torchaudio

In [None]:
# ! pip install 'accelerate>=0.26.0'

In [None]:
# import torch
# print(torch.backends.mps.is_available())  # Should return True
# print(torch.backends.mps.is_built())      # Should return True

In [None]:
# import accelerate
# print(accelerate.__version__)

In [1]:
import os
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from tqdm import tqdm

# Ensure correct GPU is used
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Change "0" if needed
device = "cuda" if torch.cuda.is_available() else "cpu"


# Load dataset
dataset_path = "../../Dataset/MATH/Pre_processed_test/filtered_test_serial.csv"
output_csv = "math_m1_answers.csv"
checkpoint_csv = "checkpoint_math_answers.csv"


# # Load dataset
# dataset_path = "/kaggle/input/math-final/filtered_test_serial.csv"
# output_csv = "/kaggle/working/math_m1_answers.csv"
# checkpoint_csv = "/kaggle/working/checkpoint_math_answers.csv"

df = pd.read_csv(dataset_path)

# Check if checkpoint exists (Resume from last save)
if os.path.exists(checkpoint_csv):
    print(f"Resuming from checkpoint: {checkpoint_csv}")
    df_checkpoint = pd.read_csv(checkpoint_csv)
    df = df[df["Serial Number"].isin(df_checkpoint["Serial Number"]) == False]  # Remove already processed rows
else:
    df_checkpoint = pd.DataFrame(columns=df.columns.tolist() + ["model_solution"])  # Initialize empty checkpoint

# Model name
# model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"

# # 4-bit quantization config
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16
# )

tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen2.5-Math-1.5B-bnb-4bit")
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen2.5-Math-1.5B-bnb-4bit")
model.to_device()
# Load model and tokenizer
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     quantization_config=quantization_config,
#     device_map="cuda"  # Ensure model is fully on one GPU
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to get model solutions in batch
def get_model_solution_batch(questions, serial_numbers, batch_size=16, checkpoint_interval=50):
    model_solutions = []
    checkpoint_data = []

    for i in tqdm(range(0, len(questions), batch_size), desc="Processing batches"):
        batch_questions = questions[i:i + batch_size]
        batch_serials = serial_numbers[i:i + batch_size]
        print("Processing batch ", i, "\n" )
        # Prepare input messages
        messages_batch = [
            [
                {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
                {"role": "user", "content": question}
            ]
            for question in batch_questions
        ]

        # Tokenize
        texts = [tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) for messages in messages_batch]
        model_inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)

        # Move inputs to same device as model
        model_inputs = {key: value.to(model.device) for key, value in model_inputs.items()}  

        # Generate responses
        try:
            with torch.no_grad():
                generated_ids = model.generate(**model_inputs, max_new_tokens=512)
            responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

        except torch.cuda.OutOfMemoryError:
            print("CUDA Out of Memory! Skipping batch.")
            torch.cuda.empty_cache()
            continue

        # Store results
        for serial, question, response in zip(batch_serials, batch_questions, responses):
            checkpoint_data.append([serial, question, response])

        # Save checkpoint every few batches
        if (i // batch_size) % checkpoint_interval == 0:
            checkpoint_df = pd.DataFrame(checkpoint_data, columns=["Serial Number", "question", "model_solution"])
            checkpoint_df.to_csv(checkpoint_csv, mode='a', index=False, header=not os.path.exists(checkpoint_csv))
            checkpoint_data = []  # Reset buffer

    return checkpoint_data

# Run inference
batch_size = 8  # Reduce batch size for stability
df["model_solution"] = get_model_solution_batch(df["question"].tolist(), df["Serial Number"].tolist(), batch_size=batch_size)

# Save final results
df.to_csv(output_csv, index=False)
print(f"Model answers saved to: {output_csv}")

  from .autonotebook import tqdm as notebook_tqdm


ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`