**Install Libraries**



In [None]:
!pip install transformers peft torch accelerate bitsandbytes


In [26]:
!pip install ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m69.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


**Download Google drive folder with trained model data**

In [18]:
import gdown
import os

os.makedirs("model_files", exist_ok=True)
os.chdir("model_files")

print("Downloading your trained model files from Google Drive...")

folder_url = "https://drive.google.com/drive/folders/1AKzzA2WObNmontbDVaAY8ZPNF7g669Mr"

gdown.download_folder(folder_url, quiet=False, use_cookies=False)

Downloading your trained model files from Google Drive...


Retrieving folder contents


Processing file 1ufkk-Pok1Jj27YxjWSmD92pXrC5DKv1q adapter_config.json
Processing file 1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1 adapter_model.safetensors
Processing file 1Tdxku1odefxYlIzr_NBf0IImK26iSV_C sample_training_data.json
Processing file 1CxtjWrFArCUY2-Jnn_QinsX_tOYoPyAD special_tokens_map.json
Processing file 1SEt3zkMfSpB969JwEbJhTmefGRNp5Iij tokenizer_config.json
Processing file 16l-plbzWhdXhJSPB1eSvnD7YS-vsSkrZ tokenizer.json


Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1ufkk-Pok1Jj27YxjWSmD92pXrC5DKv1q
To: /content/model_files/model_files/QA_finetuning_test/adapter_config.json
100%|██████████| 765/765 [00:00<00:00, 2.98MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1
From (redirected): https://drive.google.com/uc?id=1TyESkqVSppq_w1MROrBxQvXghQSNtWJ1&confirm=t&uuid=2fd11ce8-1410-47f6-bf9c-4fc3ed9700ad
To: /content/model_files/model_files/QA_finetuning_test/adapter_model.safetensors
100%|██████████| 446M/446M [00:08<00:00, 54.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Tdxku1odefxYlIzr_NBf0IImK26iSV_C
To: /content/model_files/model_files/QA_finetuning_test/sample_training_data.json
100%|██████████| 20.9k/20.9k [00:00<00:00, 32.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CxtjWrFArCUY2-Jnn_QinsX_tOYoPyAD
To: /conten

['/content/model_files/model_files/QA_finetuning_test/adapter_config.json',
 '/content/model_files/model_files/QA_finetuning_test/adapter_model.safetensors',
 '/content/model_files/model_files/QA_finetuning_test/sample_training_data.json',
 '/content/model_files/model_files/QA_finetuning_test/special_tokens_map.json',
 '/content/model_files/model_files/QA_finetuning_test/tokenizer_config.json',
 '/content/model_files/model_files/QA_finetuning_test/tokenizer.json']

**Check downloaded files**

In [19]:
print("\nDownloaded files:")
for root, dirs, files in os.walk("."):
    for file in files:
        filepath = os.path.join(root, file)
        size = os.path.getsize(filepath) / (1024*1024)  # Size in MB
        print(f"  {filepath}: {size:.1f} MB")


Downloaded files:
  ./QA_finetuning_test/adapter_model.safetensors: 425.0 MB
  ./QA_finetuning_test/special_tokens_map.json: 0.0 MB
  ./QA_finetuning_test/sample_training_data.json: 0.0 MB
  ./QA_finetuning_test/adapter_config.json: 0.0 MB
  ./QA_finetuning_test/tokenizer.json: 6.8 MB
  ./QA_finetuning_test/tokenizer_config.json: 0.0 MB


**Change runtime type if needed** \\
Loading the model will need higher RAM

In [4]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

CUDA available: True
GPU device: NVIDIA A100-SXM4-40GB


**Load trained Phi4 model** \\
trained on 34 files from Bohan's SOS dataset

**Test with sample conversations from dataset** \\
Resulting metric is a success rate of the answers \\
Results saved to `adapter_test_results.json`

In [22]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

def load_questions_from_sample_data():
    sample_file = "./QA_finetuning_test/sample_training_data.json"

    print(f" Loading questions from {sample_file}...")

    with open(sample_file, 'r') as f:
        data = json.load(f)

    print(f"Found {len(data)} conversations in sample data")

    questions = []

    for i, conversation in enumerate(data):
        messages = conversation['messages']

        for j, message in enumerate(messages):
            if (message['role'] == 'user' and
                len(message['content']) > 100 and
                'Consider the following constraints' in message['content']):

                original_answer = None
                if j + 1 < len(messages) and messages[j + 1]['role'] == 'assistant':
                    original_answer = messages[j + 1]['content']

                questions.append({
                    'conversation_id': i,
                    'question': message['content'],
                    'original_answer': original_answer
                })
                break

    print(f" Extracted {len(questions)} questions from sample training data")
    return questions

if 'model_loaded' not in globals():
    model_loaded = False
    print("model_loaded variable not found, assuming model is not loaded.")

if model_loaded:
    try:
        test_questions = load_questions_from_sample_data()
    except Exception as e:
        print(f"Error loading questions: {e}")
        test_questions = []
else:
    print("Model not loaded, skipping loading test questions.")
    test_questions = []


def generate_answer(model, tokenizer, question):
    prompt = f"""<|im_start|>system
You are an expert in symbolic computation and polynomial decomposition.
Your task is to help rewrite a target polynomial into the required form based on given inequality premises.
<|im_end|>
<|im_start|>user
{question}
<|im_end|>
<|im_start|>assistant
"""

    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.3,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id, # Added pad_token_id
            eos_token_id=tokenizer.eos_token_id, # Added eos_token_id
            repetition_penalty=1.1 # Added repetition_penalty
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    if "<|im_start|>assistant" in response:
        answer = response.split("<|im_start|>assistant")[-1].strip()
    else:
        answer = response.strip()

    if answer.endswith("<|im_end|>"):
        answer = answer[:-len("<|im_end|>")].strip()

    return answer

if not model_loaded:
    print("Cannot run tests - model failed to load")
else:
    results = []

    num_tests = min(10, len(test_questions))
    test_subset = test_questions[:num_tests]

    print(f"\n Testing model with {num_tests} questions...")
    print("=" * 80)

    for i, q_data in enumerate(test_subset, 1):
        question = q_data['question']
        original_answer = q_data['original_answer']

        print(f"\n QUESTION {i}:")
        print(f"{question[:150]}...")

        print(f"\n GENERATING ANSWER...")

        try:
            generated_answer = generate_answer(model, tokenizer, question)

            print(f"GENERATED: {generated_answer}")
            print(f" ORIGINAL:  {original_answer[:150]}..." if original_answer else "N/A")
            print("-" * 80)

            results.append({
                "question_id": i,
                "question": question,
                "generated_answer": generated_answer,
                "original_answer": original_answer,
                "status": "success"
            })

        except Exception as e:
            print(f"Generation error: {str(e)}")
            print("-" * 80)

            results.append({
                "question_id": i,
                "question": question,
                "generated_answer": f"Error: {str(e)}",
                "original_answer": original_answer,
                "status": "error"
            })

    if results:
        with open('/content/adapter_test_results.json', 'w') as f:
            json.dump(results, f, indent=2)

        successful_results = [r for r in results if r['status'] == 'success']
        print(f"Success rate: {len(successful_results)}/{len(results)}")

        from google.colab import files
        try:
            files.download('/content/adapter_test_results.json')
        except Exception as e:
            print(f"Error downloading file: {e}")

 Loading questions from ./QA_finetuning_test/sample_training_data.json...
Found 5 conversations in sample data
 Extracted 5 questions from sample training data

 Testing model with 5 questions...

 QUESTION 1:
Consider the following constraints:
g_1 = (z - 2) >= 0
g_2 = (y + 2*z - 3) >= 0
g_3 = (2*y + z) >= 0
g_4 = (y^2*z^2 - 2*y*z - 2) >= 0
g_5 = (z + 2) >=...

 GENERATING ANSWER...
GENERATED: y*z^3 + 6*y*z^2 + 12*y*z + 8*y + 3*y + 2*z^4 + 9*z^3 + 6*z^2 - 20*z + 6*z - 24 - 9
 ORIGINAL:  y*z^3 + 6*y*z^2 + 12*y*z + 8*y + 3*y + 2*z^4 + 9*z^3 + 6*z^2 - 20*z + 6*z - 24 - 9...
--------------------------------------------------------------------------------

 QUESTION 2:
Consider the following constraints:
g_1 = (x*z + 2) >= 0
g_2 = (z + 1) >= 0
g_3 = (z*(x - 3)) >= 0

Consider the target polynomial:
f = 8*x^6*z^3 + 8*...

 GENERATING ANSWER...
GENERATED: 8*x^6*z^3 + 8*x^6*z^2 - 8*x^5*z^4 - 8*x^5*z^3 - 48*x^5*z^2 - 48*x^5*z + 2*x^4*z^5 + 2*x^4*z^4 + 48*x^4*z^3 + 48*x^4*z^2 + 72*x^4*z + 72*x^

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>