In [1]:
!pip install transformers peft torch accelerate bitsandbytes


Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.me

In [8]:
import gdown
import os

os.makedirs("model_files", exist_ok=True)
os.chdir("model_files")

print("Downloading your trained model files from Google Drive...")

folder_url = "https://drive.google.com/drive/folders/1AKzzA2WObNmontbDVaAY8ZPNF7g669Mr"

gdown.download_folder(folder_url, quiet=False, use_cookies=False)

Downloading your trained model files from Google Drive...


Retrieving folder contents


Processing file 1ufkk-Pok1Jj27YxjWSmD92pXrC5DKv1q adapter_config.json
Processing file 1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1 adapter_model.safetensors
Processing file 1CvMpQvyBcuzhD7uD-stCtNxMZuo1ufzp sample_training_data.json
Processing file 1CxtjWrFArCUY2-Jnn_QinsX_tOYoPyAD special_tokens_map.json
Processing file 1SEt3zkMfSpB969JwEbJhTmefGRNp5Iij tokenizer_config.json
Processing file 16l-plbzWhdXhJSPB1eSvnD7YS-vsSkrZ tokenizer.json


Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1ufkk-Pok1Jj27YxjWSmD92pXrC5DKv1q
To: /content/model_files/model_files/QA_finetuning_test/adapter_config.json
100%|██████████| 765/765 [00:00<00:00, 2.02MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1
From (redirected): https://drive.google.com/uc?id=1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1&confirm=t&uuid=f4618106-7e5a-4a76-9eab-91a86e72c49e
To: /content/model_files/model_files/QA_finetuning_test/adapter_model.safetensors
100%|██████████| 446M/446M [00:42<00:00, 10.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CvMpQvyBcuzhD7uD-stCtNxMZuo1ufzp
To: /content/model_files/model_files/QA_finetuning_test/sample_training_data.json
100%|██████████| 10.4k/10.4k [00:00<00:00, 4.70MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CxtjWrFArCUY2-Jnn_QinsX_tOYoPyAD
To: /conten

['/content/model_files/model_files/QA_finetuning_test/adapter_config.json',
 '/content/model_files/model_files/QA_finetuning_test/adapter_model.safetensors',
 '/content/model_files/model_files/QA_finetuning_test/sample_training_data.json',
 '/content/model_files/model_files/QA_finetuning_test/special_tokens_map.json',
 '/content/model_files/model_files/QA_finetuning_test/tokenizer_config.json',
 '/content/model_files/model_files/QA_finetuning_test/tokenizer.json']

In [3]:
print("\nDownloaded files:")
for root, dirs, files in os.walk("."):
    for file in files:
        filepath = os.path.join(root, file)
        size = os.path.getsize(filepath) / (1024*1024)  # Size in MB
        print(f"  {filepath}: {size:.1f} MB")


Downloaded files:
  ./QA_finetuning_test/sample_training_data.json: 0.0 MB
  ./QA_finetuning_test/tokenizer_config.json: 0.0 MB
  ./QA_finetuning_test/tokenizer.json: 6.8 MB
  ./QA_finetuning_test/adapter_model.safetensors: 425.0 MB
  ./QA_finetuning_test/adapter_config.json: 0.0 MB
  ./QA_finetuning_test/special_tokens_map.json: 0.0 MB


In [4]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

CUDA available: True
GPU device: Tesla T4


In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import json
import random

def load_model_with_fallback():
    """Load your fine-tuned model with fallback options"""
    base_model_name = "microsoft/Phi-4-reasoning-plus"
    adapter_path = "./QA_finetuning_test/"

    print(f"Loading from adapter path: {adapter_path}")

    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)

    print("Loading base model...")
    # Load base model without device_map="auto" initially
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        trust_remote_code=True
    )

    # Move base model to GPU explicitly
    if torch.cuda.is_available():
        base_model = base_model.to("cuda")
        print("Base model moved to GPU.")


    try:
        # Method 1: Standard loading
        print("  Trying standard PEFT loading...")
        model = PeftModel.from_pretrained(base_model, adapter_path)
        print("Standard loading successful!")
        return model, tokenizer

    except Exception as e1:
        print(f"  Standard loading failed: {str(e1)[:100]}...")

        try:
            # Method 2: Load with specific config
            print("  Trying with explicit config...")
            peft_config = PeftConfig.from_pretrained(adapter_path)
            model = PeftModel.from_pretrained(
                base_model,
                adapter_path,
                config=peft_config,
                is_trainable=False
            )
            print("Config-based loading successful!")
            return model, tokenizer

        except Exception as e2:
            print(f"  Config loading failed: {str(e2)[:100]}...")

            try:
                # Method 3: Force loading with adapter_name
                print("  Trying with default adapter name...")
                model = PeftModel.from_pretrained(
                    base_model,
                    adapter_path,
                    adapter_name="default"
                )
                print("Named adapter loading successful!")
                return model, tokenizer

            except Exception as e3:
                print(f"  Named loading failed: {str(e3)[:100]}...")

                # Method 4: Just use base model (no fine-tuning)
                print("  Using base model without fine-tuning...")
                print("  This will test the base Phi-4 model, not your trained version")
                return base_model, tokenizer

def inspect_adapter_files():
    adapter_path = "./QA_finetuning_test/"

    print("\n Inspecting adapter files:")

    try:
        with open(f"{adapter_path}/adapter_config.json", 'r') as f:
            config = json.load(f)
        print(f" Adapter config loaded:")
        for key, value in config.items():
            print(f"  {key}: {value}")
    except Exception as e:
        print(f" Error reading adapter config: {e}")

    import os
    print(f"\n📁 Files in adapter directory:")
    for file in os.listdir(adapter_path):
        size = os.path.getsize(f"{adapter_path}/{file}") / (1024*1024)
        print(f"  {file}: {size:.1f} MB")

inspect_adapter_files()

print("\n Loading your fine-tuned Phi-4 model...")
try:
    model, tokenizer = load_model_with_fallback()
    model_loaded = True
    print(" Model loading completed!")
except Exception as e:
    print(f"All loading methods failed: {e}")
    model_loaded = False


 Inspecting adapter files:
 Error reading adapter config: [Errno 2] No such file or directory: './QA_finetuning_test//adapter_config.json'

📁 Files in adapter directory:


FileNotFoundError: [Errno 2] No such file or directory: './QA_finetuning_test/'

In [6]:

def load_questions_from_sample_data():
    sample_file = "./QA_finetuning_test/sample_training_data.json"

    print(f" Loading questions from {sample_file}...")

    with open(sample_file, 'r') as f:
        data = json.load(f)

    print(f"Found {len(data)} conversations in sample data")

    questions = []

    for i, conversation in enumerate(data):
        messages = conversation['messages']

        for j, message in enumerate(messages):
            if (message['role'] == 'user' and
                len(message['content']) > 100 and
                'Consider the following constraints' in message['content']):

                original_answer = None
                if j + 1 < len(messages) and messages[j + 1]['role'] == 'assistant':
                    original_answer = messages[j + 1]['content']

                questions.append({
                    'conversation_id': i,
                    'question': message['content'],
                    'original_answer': original_answer
                })
                break

    print(f" Extracted {len(questions)} questions from sample training data")
    return questions

if model_loaded:
    test_questions = load_questions_from_sample_data()

def generate_answer(model, tokenizer, question):
    """Generate answer for a given question"""
    prompt = f"""<|im_start|>system
You are an expert in symbolic computation and polynomial decomposition.
Your task is to help rewrite a target polynomial into the required form based on given inequality premises.
<|im_end|>
<|im_start|>user
{question}
<|im_end|>
<|im_start|>assistant
"""

    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,  # Reduced for faster generation
            temperature=0.3,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=1.1
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response.split("<|im_start|>assistant")[-1].strip()
    return answer

if not model_loaded:
    print("Cannot run tests - model failed to load")
else:
    results = []

    num_tests = min(2, len(test_questions))
    test_subset = test_questions[:num_tests]

    print(f"\n Testing model with {num_tests} questions...")
    print("=" * 80)

    for i, q_data in enumerate(test_subset, 1):
        question = q_data['question']
        original_answer = q_data['original_answer']

        print(f"\n QUESTION {i}:")
        print(f"{question[:150]}...")

        print(f"\n GENERATING ANSWER...")

        try:
            generated_answer = generate_answer(model, tokenizer, question)

            print(f"GENERATED: {generated_answer}")
            print(f" ORIGINAL:  {original_answer[:150]}..." if original_answer else "N/A")
            print("-" * 80)

            results.append({
                "question_id": i,
                "question": question,
                "generated_answer": generated_answer,
                "original_answer": original_answer,
                "status": "success"
            })

        except Exception as e:
            print(f"Generation error: {str(e)}")
            print("-" * 80)

            results.append({
                "question_id": i,
                "question": question,
                "generated_answer": f"Error: {str(e)}",
                "original_answer": original_answer,
                "status": "error"
            })

    if results:
        with open('/content/adapter_test_results.json', 'w') as f:
            json.dump(results, f, indent=2)

        successful_results = [r for r in results if r['status'] == 'success']
        print(f"Success rate: {len(successful_results)}/{len(results)}")

        from google.colab import files
        files.download('/content/adapter_test_results.json')

 Loading questions from ./QA_finetuning_test/sample_training_data.json...


JSONDecodeError: Expecting property name enclosed in double quotes: line 101 column 1 (char 10383)

In [7]:
import os


# seeing contents of json file

sample_file = "./QA_finetuning_test/sample_training_data.json"

if os.path.exists(sample_file):
    print(f"Content of {sample_file}:")
    with open(sample_file, 'r') as f:
        print(f.read())
else:
    print(f"File not found: {sample_file}")

Content of ./QA_finetuning_test/sample_training_data.json:
[
  {
    "messages": [
      {
        "role": "system",
        "content": "You are an expert in symbolic computation and polynomial decomposition.\nYour task is to help rewrite a target polynomial into the required form based on given inequality premises."
      },
      {
        "role": "user",
        "content": "Consider the following constraints:\ng_1 = (z - 2) >= 0\ng_2 = (y + 2*z - 3) >= 0\ng_3 = (2*y + z) >= 0\ng_4 = (y^2*z^2 - 2*y*z - 2) >= 0\ng_5 = (z + 2) >= 0\n\nConsider the target polynomial:\nf = y*z^3 + 6*y*z^2 + 12*y*z + 11*y + 2*z^4 + 9*z^3 + 6*z^2 - 14*z - 33\n\nOur objective is to rewrite f in the following form:\nf = C_1 * h_1 * s_1 + C_2 * h_2 * s_2 + ... + C_m * h_m * s_m\nwhere:\nC_i is a positive constant,\nh_i is the product of a subset of g_j, and\ns_i is a square of a polynomial.\n\nWe plan to work it out in the following steps:\n1. Decompose the terms and introduce new terms if necessary.\n2. Rear

In [1]:
def generate_answer(model, tokenizer, question):
    """Generate answer for a given question"""
    prompt = f"""<|im_start|>system
You are an expert in symbolic computation and polynomial decomposition.
Your task is to help rewrite a target polynomial into the required form based on given inequality premises.
<|im_end|>
<|im_start|>user
{question}
<|im_end|>
<|im_start|>assistant
"""

    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.3,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            repetition_penalty=1.1
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response.split("<|im_start|>assistant")[-1].strip()
    return answer

if 'model' in locals() and 'tokenizer' in locals() and model_loaded:
    # example question for model
    question = "What is the simplified form of the expression (x+y)^2?"

    print(f"Asking the model: {question}")

    try:
        generated_answer = generate_answer(model, tokenizer, question)
        print(f"\nModel's Answer:\n{generated_answer}")

    except Exception as e:
        print(f"Error during generation: {str(e)}")

else:
    print("Model was not loaded successfully. Cannot generate an answer.")

Model was not loaded successfully. Cannot generate an answer.
