### Evaluation

In [None]:
from transformers import BitsAndBytesConfig
#Model configs

model_name = "mistralai/Mistral-7B-Instruct-v0.3"
checkpoint_dir = "../output/checkpoints"
model_output_dir = "../output/final_adapter"
training_data = "../data/combinations.jsonl"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4"
)

In [None]:
! nvidia-smi

Sun Jul 27 15:53:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.40                 Driver Version: 576.40         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5070 Ti   WDDM  |   00000000:01:00.0 Off |                  N/A |
|  0%   42C    P8             19W /  300W |    6658MiB /  16303MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
from datasets import load_dataset, Dataset, DatasetDict


# load the dataset and split it into train, validation and test sets
dataset = load_dataset("json", data_files=training_data, split='train')
shuffled_dataset = dataset.shuffle(seed=42)
train_temp_split = shuffled_dataset.train_test_split(test_size=0.3) #30% for validation and test
temp_dataset = train_temp_split['test']
validation_test_split = temp_dataset.train_test_split(test_size=1/3)# 10% for validation and 20% for test
split_datasets = DatasetDict({
    'train': train_temp_split['train'],
    'validation': validation_test_split['train'],
    'test': validation_test_split['test']
})

In [None]:
import json
import re
import numpy as np
import torch
from jsonschema import validate, ValidationError
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# function to extact JSON from a string
def extract_json_from_string(input_string):
    try:
        # Use a regular expression to find the first JSON object in the string
        json_match = re.search(r'\{.*?\}', input_string, re.DOTALL)
        if json_match:
            json_str = json_match.group(0)  # Extract the JSON string
            return json.loads(json_str)  # Parse and return the JSON object
        else:
            print("No JSON object found in the string.")
            return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        print(input_string)
        return None


# Validate a JSON string against a schema
def is_valid_json_schema(json_generated, schema):
    try:
        validate(instance=json_generated, schema=schema)
        return True
    except (json.JSONDecodeError, ValidationError):
        return False
    
# Compare two JSON strings at field level and match field values
def compare_json_field_values(json_ref, json_gen):
    obj1=json_ref
    obj2=json_gen
    matching_fields = []
    matching_values = 0
    total_fields = max(len(obj1), len(obj2))
    for key in obj1:
        if key in obj2:
            matching_fields.append(key)
            if obj1[key] == obj2[key]:
                matching_values += 1
    score = matching_values
    percentage = score / total_fields if total_fields > 0 else 0
    result = {
        "matching_fields": matching_fields,
        "matching_values": score,
        "total_fields": total_fields,
        "percentage": percentage
    }
    return result['percentage']


def test_model(adapter_dir, test_dataset):

    torch.cuda.empty_cache()

    valid_json = []
    valid_json_schema = []
    comparison_results = []
    
    base_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                    quantization_config=quantization_config , 
                                                    device_map="auto")
    
    model = PeftModel.from_pretrained(base_model, model_output_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_output_dir)

    for i, row in enumerate(test_dataset):

        if( i == 1):
            break

        prompt = f"<s><INST>{row['messages'][0]['content']}</INST>"
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=1000,
                temperature=0.7,
                do_sample=True,
                top_p=0.9,
                eos_token_id=tokenizer.eos_token_id
            )

        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        
        if("</INST>" in generated_text):
            generated_json = extract_json_from_string(generated_text.split("<INST>")[-1].strip())
            gold_json = extract_json_from_string(row['messages'][1]['content'])
            schema = extract_json_from_string(generated_text.split("<INST>")[-2].strip())
        
        if generated_json is not None:
            valid_json.append(True)
        else:
            valid_json.append(False)

        
        if schema is not None:
            if generated_json is not None and is_valid_json_schema(generated_json, schema):
                valid_json_schema.append(True)
            else:
                valid_json_schema.append(False)
        
        if gold_json is not None:
            if generated_json is not None:
                comparison_score = compare_json_field_values(gold_json, generated_json)
            else:
                comparison_score = 0.0
            comparison_results.append(comparison_score)

    
    result = {
        "valid_json": np.mean(valid_json) * 100,
        "valid_json_schema": np.mean(valid_json_schema) * 100,
        "comparison_results": np.mean(comparison_results) * 100
    }
    
    return result
    

In [None]:
test_model(model_output_dir, split_datasets["test"])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Error decoding JSON: Expecting ',' delimiter: line 1 column 422 (char 421)
You are a Json test data generator generating valid json data based on a json schema. Given the below schema,

    ### Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "ma

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


{'valid_json': np.float64(0.0),
 'valid_json_schema': np.float64(nan),
 'comparison_results': np.float64(0.0)}

In [None]:
import re

# A string with a JSON object embedded inside, spanning multiple lines.
input_string = """
You are a Json test data generator generating valid json data based on a json schema. Given the below schema,

    ### Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Middle", "nullable": true, "minLength": 1, "maxLength": 25}, "name_prefix": {"type": "string", "description": "Name Prefix", "nullable": true, "minLength": 1, "maxLength": 10}, "name_suffix": {"type": "string", "description": "Name Suffix", "nullable": true, "minLength": 1, "maxLength": 10}, "identification_code_qualifier": {"type": "string", "description": "Identification Code Qualifier (e.g., MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, ZZ = Mutually Defined)", "enum": ["MI", "XX", "SV", "FI", "PI", "ZZ"], "nullable": true}, "identification_code": {"type": "string", "description": "Identification Code", "nullable": true, "minLength": 2, "maxLength": 80}, "nm1_segment": {"type": "string", "description": "NM1 Segment format of the data (e.g., NM1*IL*1*DOE*JOHN****MI*123456789~)", "nullable": true, "maxLength": 255}}, "required": ["entity_identifier_code", "entity_type_qualifier", "name_last_or_organizationName", "nm1_segment"]}

    Create a JSON object that contains all properties
    </INST> {"entity_identifier_code": "1P", "entity_type_qualifier": "2", "name_last_or_organizationName": "Kim", "name_first": "Nathan", "name_middle": "B", "name_prefix": "Mr.", "name_suffix": "MD", "identification_code_qualifier": "SV", "identification_code": "JdTcDdWYsQdZjPkYbGdPzRsMwGj", "nm1_segment": "NM1*1P*2*Kim*Nathan*B*Mr.*MD*SV*JdTcDdWYsQdZjPkYbGdPzRsMwGj~"}
"""

# The regex search
json_match = re.search(r'\{(?:[^{}]|(?R))*\}', input_string, re.DOTALL)

if json_match:
    # .group(0) extracts the entire matched string
    extracted_json_string = json_match.group(0)
    print("--- Match Found ---")
    print(extracted_json_string)
else:
    print("No match found.")



--- Match Found ---
{"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}
