### Evaluation

In [1]:
import logging
from transformers import BitsAndBytesConfig
#Model configs

logging.basicConfig(
    level=logging.INFO,
    format='%(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

model_name = "mistralai/Mistral-7B-Instruct-v0.3"
training_data = "../data/combinations.jsonl"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4"
)

In [2]:
! nvidia-smi

Tue Aug  5 22:00:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.40                 Driver Version: 576.40         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 5070 Ti   WDDM  |   00000000:01:00.0 Off |                  N/A |
|  0%   42C    P8             19W /  300W |    7443MiB /  16303MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
from datasets import load_dataset, Dataset, DatasetDict


# load the dataset and split it into train, validation and test sets
dataset = load_dataset("json", data_files=training_data, split='train')
shuffled_dataset = dataset.shuffle(seed=42)
train_temp_split = shuffled_dataset.train_test_split(test_size=0.3) #30% for validation and test
temp_dataset = train_temp_split['test']
validation_test_split = temp_dataset.train_test_split(test_size=1/3)# 10% for validation and 20% for test
split_datasets = DatasetDict({
    'train': train_temp_split['train'],
    'validation': validation_test_split['train'],
    'test': validation_test_split['test']
})

INFO - PyTorch version 2.7.0+cu128 available.


Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
import os
import mlflow

def get_last_run_id():
    runs = mlflow.search_runs(order_by=["start_time desc"], max_results=1)
    if not runs.empty:
        return runs.iloc[0].run_id
    else:
        logger.warning("No runs found in MLflow.")
        return None

def get_run_artifact_uri(last_run_id):
    if last_run_id:
        run_info = mlflow.get_run(last_run_id)
        artifact_path = run_info.info.artifact_uri
        if artifact_path.startswith("file://"):
            artifact_path = artifact_path.replace("file://", "")
        elif artifact_path.startswith("file:"):
            artifact_path = artifact_path.replace("file:", "")
        if "/training/.." in artifact_path:
            artifact_path = artifact_path.replace("/training/..", "")
        artifact_path = artifact_path+ "/adapters"
        folders = [f for f in os.listdir(artifact_path) if os.path.isdir(os.path.join(artifact_path, f))]
        logger.info(f"Models available: {', '.join(folders)}")
        if folders:
            last_updated_folder = max(folders, key=lambda f: os.path.getmtime(os.path.join(artifact_path, f)))
            logger.info(f"Last updated model: {last_updated_folder}")
            return os.path.join(artifact_path, last_updated_folder)
        else:
            logger.warning("No folders found in the artifact URI.")
        return None
    else:
        logger.warning("No runs found in MLflow.")
        return None
    
mlflow.end_run()

mlflow_uri = "file:../mlruns"
mlflow_experiment_name = "Synthetic-data-generator-fine-tuning"

mlflow.set_tracking_uri(mlflow_uri)
mlflow.set_experiment(mlflow_experiment_name)

run_id = get_last_run_id()
mlflow.start_run(run_id=run_id)
adapter_dir = get_run_artifact_uri(run_id)

print(f"Last run ID: {run_id}")
print(f"Artifact URI: {adapter_dir}")

INFO - Models available: epoch-5
INFO - Last updated model: epoch-5


Last run ID: 219f4ed29a504b2cb00eea34fdf06c57
Artifact URI: c:\Users\Admin\Documents\Projects\Repositories\synthetic-data-generator\mlruns/686303321124169868/219f4ed29a504b2cb00eea34fdf06c57/artifacts/adapters\epoch-5


In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                    quantization_config=quantization_config , 
                                                    device_map="auto")
    
model = PeftModel.from_pretrained(base_model, adapter_dir)
tokenizer = AutoTokenizer.from_pretrained(adapter_dir)

INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
import json
import re
import numpy as np
import torch
from jsonschema import validate, ValidationError
from transformers import StoppingCriteria, StoppingCriteriaList


# function to extact JSON from a string
def find_and_parse_json(text: str) -> dict:

    try:
        # Find the first opening curly brace
        first_brace_index = text.find('{')
        if first_brace_index == -1:
            return None # No JSON object found

        # Start searching from the first brace
        brace_level = 1
        for i, char in enumerate(text[first_brace_index + 1:]):
            if char == '{':
                brace_level += 1
            elif char == '}':
                brace_level -= 1
            
            if brace_level == 0:
                # We found the matching closing brace
                last_brace_index = first_brace_index + i + 1
                json_string = text[first_brace_index : last_brace_index + 1]
                
                # Now, use the built-in json library to parse it
                return json.loads(json_string)
        
        return None # No complete JSON object found
    except (json.JSONDecodeError, IndexError):
        # Handle cases where the substring is not valid JSON or string is malformed
        return None


# Validate a JSON string against a schema
def is_valid_json_schema(json_generated, schema):
    try:
        validate(instance=json_generated, schema=schema)
        return True
    except (json.JSONDecodeError, ValidationError):
        return False
    
# domainn specific checks
def domain_specific_checks(json_ref, json_gen, domain_field, pattern):
    score = 0
    if domain_field in json_ref.keys():
        # Check if NM1 segment is present in both JSONs
        logger.debug(f"{json_ref[domain_field]}  {json_gen.get(domain_field)}")
        if domain_field in json_gen.keys():
            #NM1*PR*2*Palmer*Darlene*M*Mr.*DVM*PI*SqLFzolMNdaVXE~
            # pattern = r'^NM1(?:\*[^*]*){9}~$'
            value = json_gen[domain_field]
            if not bool(re.match(pattern, value)):
                score += 2.5
                logger.debug(f"{domain_field} {value} does not match the expected pattern, reducing score by 2.5")
            # check if all values in the json_gen are present in the value except for the nm1_segment
            for key in json_gen.keys():
                if key != domain_field and json_gen[key] not in value:
                    score += 2.5
                    logger.debug(f"Value {json_gen[key]} not found in {domain_field} {value}, reducing score by 2.5")
                    break
        else:
            score += 5
            logger.debug(f"{domain_field} not found in json_gen, reducing score by 5")

    return score
    

# Compare two JSON strings at field level and match field values
def compare_json_field_values(json_ref, json_gen):
   
    if not isinstance(json_ref, dict) or not isinstance(json_gen, dict):
        return 0.0
    
    score = 10
    # json_gen has all fields of json_ref, if not reduce the score proportional to no of fields missing
    logger.debug(f"Starting score: {score}")
    if set(json_gen.keys()).issubset(set(json_ref.keys())):
        for key in json_ref.keys():
            if json_gen.get(key) is None:
                reduce_score = 3 / len(json_ref.keys())
                logger.debug(f"Key {key} is missing in json_gen, reducing score by {reduce_score}")
                score -= reduce_score
    else:
        reduce_score = (len(json_ref.keys()) - len(json_gen.keys())) * (3 / len(json_ref.keys()))
        logger.debug(f"Keys in json_ref not present in json_gen, reducing score by {reduce_score}")
        score -= reduce_score

    # Compare values of each field if they dont match reduce the score proportional to no of fields not matching in a scale of  0 to 5
    count_mismatch = 0
    for key in json_ref.keys():
        if key in json_gen:
            if json_ref[key] != json_gen[key]:
                count_mismatch += 1
        else:
            count_mismatch += 1
    count_mismatch/= len(json_ref.keys())
    logger.debug(f"Count of mismatched fields: {count_mismatch}, reducing score by {count_mismatch * 2}")    
    score -= (count_mismatch * 2)

    # domainn specific checks
    score -= domain_specific_checks(json_ref, json_gen, "nm1_segment", r'^NM1(?:\*[^*]*){9}~$')
    score -= domain_specific_checks(json_ref, json_gen, "dtp_segment", r'^DTP(?:\*[^*]*){3}~$')
    score -= domain_specific_checks(json_ref, json_gen, "ref_segment", r'^REF(?:\*[^*]*){2}~$')

    # If the score is less than 0, set it to 0
    if score < 0.0:
        score = 0.0

    logger.debug(f"Final score: {score}")

    return score

def get_result(valid_json, valid_json_schema, comparison_results):
    return json.dumps({
        "valid_json": np.mean(valid_json) * 100,
        "valid_json_schema": np.mean(valid_json_schema) * 100,
        "domain_field_match": np.mean(comparison_results) * 10
    })


def test_model(model, tokenizer, test_dataset):

    torch.cuda.empty_cache()

    valid_json = []
    valid_json_schema = []
    comparison_results = []

    stop_words = ["[INST]"]
    stop_word_ids = [tokenizer(stop_word, add_special_tokens=False).input_ids[0] for stop_word in stop_words]
    stop_word_ids.append(tokenizer.eos_token_id)  # Add EOS token ID to the stop words
    print(f"Tokens to stop on: {stop_words}")
    print(f"Corresponding Token IDs: {stop_word_ids}")
        
    for i, row in enumerate(test_dataset):

        logger.info(f"Processing row {i+1}/{len(test_dataset)}")

        prompt = f"<s>[INST] {row['messages'][0]['content']} [/INST]"
        # print(f"Prompt: \n {prompt} \n ---------------------------------")
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=1000,
                temperature=0.7,
                do_sample=True,                
                top_p=0.9,
                eos_token_id=stop_word_ids
            )

        generated_text = tokenizer.decode(output[0], skip_special_tokens=False)

        logger.info(f"Generated text: \n {generated_text}")
        
        if("[/INST]" in generated_text):
            generated_json = find_and_parse_json(generated_text.split("[/INST]")[-1].strip())
            gold_json = find_and_parse_json(row['messages'][1]['content'])
            schema = find_and_parse_json(generated_text.split("[/INST]")[-2].strip())
            # print(f"Generated JSON: {generated_json} \n Goled JSON: {gold_json} \n Schema: {schema}")
        
        # is Json valid?
        if generated_json is not None:
            valid_json.append(True)
        else:
            valid_json.append(False)

        # is JSON valid against schema?
        if schema is not None:
            if generated_json is not None and is_valid_json_schema(generated_json, schema):
                valid_json_schema.append(True)
            else:
                valid_json_schema.append(False)
        
        # compare JSON field values
        if gold_json is not None:
            if generated_json is not None:
                comparison_score = compare_json_field_values(gold_json, generated_json)
            else:
                comparison_score = 0.0
            comparison_results.append(comparison_score)

        logger.info(get_result(valid_json, valid_json_schema, comparison_results))
        print("------------------------------------------------------------")

    
    return get_result(valid_json, valid_json_schema, comparison_results)
    

In [7]:
logger.setLevel(logging.DEBUG) 
result = test_model(model, tokenizer, split_datasets["test"])

result = json.loads(result)

epoch = re.search(r'(\d+)$', adapter_dir).group(1)
mlflow.log_metric("syntactic-accuracy", result['valid_json'], step=int(epoch))
mlflow.log_metric("schema-accuracy", result['valid_json_schema'], step=int(epoch))
mlflow.log_metric("domain-field-match", result['domain_field_match'], step=int(epoch))

mlflow.end_run()
logger.info(f"Final Result: {result}")
logger.info("Evaluation completed and results logged to MLflow.")

Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Tokens to stop on: ['[INST]']
Corresponding Token IDs: [3, 2]
Processing row 1/30


DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by 0.3333333333333333
DEBUG - Count of mismatched fields: 0.6666666666666666, reducing score by 1.3333333333333333
DEBUG - NM1*1P*2*Goodman*John**Mr.*DDS*SV*XDaTpoLYBLDOsEpcqiJvcaInHTmWafWfZzhwphvoAmaVwLJHuJcpp~  NM1*1P*2*Lawrence**W*Mx.*DVM*SV*~
DEBUG - Final score: 8.333333333333332
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 83.33333333333331}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a Json test data generator generating valid json data based on a json schema. 
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "N

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*154*D8*20020406-20020411  DTP*050*D8*19950822-19950827
DEBUG - dtp_segment DTP*050*D8*19950822-19950827 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 69.16666666666666}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 DTP Segment", "description": "Schema for X12 DTP segment, which contains date/time period information.", "type": "object", "properties": {"dtp_segment": {"type": "string", "description": "Date/Time Period (e.g., DTP*291*D8*20220101-20220131~)", "minLength": 1, "maxLength": 30}}, "required": ["dtp_segment"]}
Create a JSON object that contains all properties [/INST] {"dtp_segment": "DTP*050*D8*19950822-19950827"}</s>
------------------------------------------------------------
Processing row 3/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*PI*eQStMljcNyrSIBBUonJw~  REF*PR*cjY~
DEBUG - Final score: 8.0
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 72.77777777777777}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "maxLen

DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by -0.375
DEBUG - Count of mismatched fields: 0.875, reducing score by 1.75
DEBUG - NM1*PR*2*Wilson*Jacqueline***PhD*PI*QsuFNASzfRyJAiiNSmIVyDOgccfXSdgnSSqDPWLbQvqijjANlxnqZoFmQXMXjGklouvzGojUW~  NM1*IL*1*Hall*Amanda**Mrs.*MD*PI*BgGqKTNWXkqoRYCWZGXWKNmLKwGvJJTvMWvJwLKv~
DEBUG - Final score: 8.625
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 76.14583333333333}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*SV*tIZPudoXwLwNkTE~  REF*FI*QwZJZrKWdXrKqoWgKm~
DEBUG - Final score: 8.0
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 76.91666666666666}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "maxLength"

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*XX*DXVwqiSUGC~  REF*PI*dLWuZY~
DEBUG - Final score: 8.0
INFO - {"valid_json": 100.0, "valid_json_schema": 100.0, "domain_field_match": 77.43055555555554}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "maxLength"

INFO - {"valid_json": 85.71428571428571, "valid_json_schema": 100.0, "domain_field_match": 66.3690476190476}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Give a sample for the X12 NM1 Segment schema [/INST] {"entity_identifier_code": "IL", "entity_type_qualifier": "1", "name_last_or_organizationName": "Powell", "name_first": "William", "name_middle": "B", "name_prefix": "Mr.", "name_suffix": "Jr.", "identification_code_qualifier": "MI", "identification_code": "XgHkDKoGKhKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxqKoTfYgCqKcNqUJxq

DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by 0.0
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - NM1*03*1*Wilkins*Jeremy*E*Dr.*DVM**nCfKvTZKhZWlIcIdXhMOFFLKNnldmbMDpaWsVTxlSnVZDnVckToJimRYhsqhCSxbiegQZszOFhIaMuDK~  NM1*IL*2*Hall*Anthony**Mr.*DDS*MI*YDXDGJXZOdYxJYyVXywZKRUJVyJKbJgJCXyJgXyYxZyZy~
DEBUG - Final score: 8.0
INFO - {"valid_json": 87.5, "valid_json_schema": 100.0, "domain_field_match": 68.07291666666666}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*156*RD8*19850729-19850803  DTP*155*RD8*19780913-19780918
DEBUG - dtp_segment DTP*155*RD8*19780913-19780918 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 88.88888888888889, "valid_json_schema": 100.0, "domain_field_match": 66.62037037037037}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Give a sample for the X12 DTP Segment schema and include all properties [/INST] {"dtp_segment": "DTP*155*RD8*19780913-19780918"}

The X12 DTP Segment schema is: "DTP*155*RD8*19780913-19780918". The DTP segment is a data segment that identifies the date and period of a recurring payment or service. "DTP*" is the segment identifier, "155" is the recurring payment or service identifier, "RD8" is the type of recurring payment or service, "19780913-19780918" is the date and period of the recurring payment or service.</s>
------------------------------------------------------------
Processing row 10/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - NM1*IL*1*Lewis*Rebecca**Miss**PI*CifaexfRtcNPukxTGmSpcEEIvPBWJglCjd~  NM1*PR*2*Lawrence*Katherine**Mr.**SV*JxMtZrQKrJDKBKjKXyVnPnwUoIxwKxGtNwP~
DEBUG - Final score: 8.0
INFO - {"valid_json": 90.0, "valid_json_schema": 100.0, "domain_field_match": 67.95833333333333}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a Json test data generator generating valid json data based on a json schema. 
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "N

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.0, reducing score by 0.0
DEBUG - NM1*PR*1*Chen*Kevin*M**MD*ZZ*BsWYJmZhWUOwCvZJqhvNvdHhYydDMjrQxgXtLTnHnCIzRlOaCqtcuMUbvAfOWCFuhmXJwKEygC~  NM1*PR*1*Chen*Kevin*M**MD*ZZ*BsWYJmZhWUOwCvZJqhvNvdHhYydDMjrQxgXtLTnHnCIzRlOaCqtcuMUbvAfOWCFuhmXJwKEygC~
DEBUG - Final score: 10.0
INFO - {"valid_json": 90.9090909090909, "valid_json_schema": 100.0, "domain_field_match": 70.87121212121212}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by 0.0
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - NM1*1P*1*Taylor**U*Miss*III*MI*UqTADOZNHctkqiCjSPPItSEosohoFl~  NM1*03*2*Lee*Katherine**Mrs.*DDS*SV*zrVmYCgVZJyxZTZkLBwQwZzRgDxhZTZkLBwZzRgDxh~
DEBUG - Final score: 8.0
INFO - {"valid_json": 91.66666666666666, "valid_json_schema": 100.0, "domain_field_match": 71.63194444444444}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.0, reducing score by 0.0
DEBUG - NM1*IL*1*Calderon*Jennifer*B*Mr.*DDS*MI*~  NM1*IL*1*Calderon*Jennifer*B*Mr.*DDS*MI*~
DEBUG - Final score: 10.0
INFO - {"valid_json": 92.3076923076923, "valid_json_schema": 100.0, "domain_field_match": 73.81410256410257}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*152*D8*20030116-20030121  DTP*151*D8*19981219-19981224
DEBUG - dtp_segment DTP*151*D8*19981219-19981224 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 92.85714285714286, "valid_json_schema": 100.0, "domain_field_match": 72.47023809523809}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 DTP Segment", "description": "Schema for X12 DTP segment, which contains date/time period information.", "type": "object", "properties": {"dtp_segment": {"type": "string", "description": "Date/Time Period (e.g., DTP*291*D8*20220101-20220131~)", "minLength": 1, "maxLength": 30}}, "required": ["dtp_segment"]}
Generate a JSON object that includes all properties [/INST] {"dtp_segment": "DTP*151*D8*19981219-19981224"}</s>
------------------------------------------------------------
Processing row 15/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.5555555555555556, reducing score by 1.1111111111111112
DEBUG - NM1*IL*1*Garcia*Michelle*H*Mr.**PI*SNnSFWsmNGXFLVWSJrCSAAlJslZECkxKjeVBFrxykZCzhbXZCuTyCROakCGCzbYczYGlRy~  NM1*IL*1*Garcia*Jacob*T*Mr.**XX*OlHqzDWfMkUHKHmKGpZvYrDZbvqZcKBg~
DEBUG - Final score: 8.88888888888889
INFO - {"valid_json": 93.33333333333333, "valid_json_schema": 100.0, "domain_field_match": 73.56481481481481}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*154*RD8*20120110-20120115  DTP*348*RD8*19960713-19960718
DEBUG - dtp_segment DTP*348*RD8*19960713-19960718 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 93.75, "valid_json_schema": 100.0, "domain_field_match": 72.40451388888889}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 DTP Segment", "description": "Schema for X12 DTP segment, which contains date/time period information.", "type": "object", "properties": {"dtp_segment": {"type": "string", "description": "Date/Time Period (e.g., DTP*291*D8*20220101-20220131~)", "minLength": 1, "maxLength": 30}}, "required": ["dtp_segment"]}
Generate a JSON object that includes all properties [/INST] {"dtp_segment": "DTP*348*RD8*19960713-19960718"}</s>
------------------------------------------------------------
Processing row 17/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.8, reducing score by 1.6
DEBUG - NM1*03*1*Morales*Nathan*U*Mr.*Jr.*SV*kCwMVihe~  NM1*PR*1*Nguyen*Rachel*C*Mx.*PhD*SV*NXkDVZkLmKY~
DEBUG - Final score: 8.4
INFO - {"valid_json": 94.11764705882352, "valid_json_schema": 100.0, "domain_field_match": 73.08660130718954}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*ZZ*veEIQGXNulbQQYJhKwRB~  REF*PI*hGsXtY~
DEBUG - Final score: 8.0
INFO - {"valid_json": 94.44444444444444, "valid_json_schema": 100.0, "domain_field_match": 73.47067901234567}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a Json test data generator generating valid json data based on a json schema. 
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "max

DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by 0.0
DEBUG - Count of mismatched fields: 0.8888888888888888, reducing score by 1.7777777777777777
DEBUG - NM1*IL*2*Burnett*Charles**Mrs.*PhD*SV*CboddODkrEtSVJSRc~  NM1*IL*1*James*William*N*Mx.*DDS*XX*~
DEBUG - Final score: 8.222222222222221
INFO - {"valid_json": 94.73684210526315, "valid_json_schema": 100.0, "domain_field_match": 73.9312865497076}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.0, reducing score by 0.0
DEBUG - NM1*03*2*Hampton***Mx.***~  NM1*03*2*Hampton***Mx.***~
DEBUG - Final score: 10.0
INFO - {"valid_json": 95.0, "valid_json_schema": 100.0, "domain_field_match": 75.23472222222222}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*FI*LTbLVeagPfPbiHw~  REF*ZZ*Ja~
DEBUG - Final score: 8.0
INFO - {"valid_json": 95.23809523809523, "valid_json_schema": 100.0, "domain_field_match": 75.4616402116402}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Generate a sample json for the X12 REF Segment schema with all properties [/INST] {"reference_identification_qualifier": "ZZ", "reference_identification": "Ja", "ref_segment": "REF*ZZ*Ja~"}

{"reference_identification_qualifier": "MI", "reference_identification": "fNJVzY", "ref_segment": "REF*MI*fNJVzY~"}

{"reference_identification_qualifier": "XX", "reference_identification": "KHvTp", "ref_segment": "REF*XX*KHvTp~"}

{"reference_identification_qualifier": "FI", "reference_identification": "yYZ", "ref_segment": "REF*FI*yYZ~"}

{"reference_identification_qualifier": "RD", "reference_identification": "YX", "ref_segment": "REF*RD*YX~"}

{"reference_identification_qualifier": "XX", "reference_identification": "xY", "ref_segment": "REF*XX*xY~"}

{"reference_identification_qualifier": "MI", "reference_identification": "vY", "ref_segment": "REF*MI*vY~"}

{"reference_identification_q

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*151*RD8*19700905-19700910  DTP*151*RD8*20190704-20190709
DEBUG - dtp_segment DTP*151*RD8*20190704-20190709 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 95.45454545454545, "valid_json_schema": 100.0, "domain_field_match": 74.53156565656565}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 DTP Segment", "description": "Schema for X12 DTP segment, which contains date/time period information.", "type": "object", "properties": {"dtp_segment": {"type": "string", "description": "Date/Time Period (e.g., DTP*291*D8*20220101-20220131~)", "minLength": 1, "maxLength": 30}}, "required": ["dtp_segment"]}
Create a JSON object that contains all properties [/INST] {"dtp_segment": "DTP*151*RD8*20190704-20190709"}</s>
------------------------------------------------------------
Processing row 23/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*MI*ZNGQGuafNwyBzy~  REF*PI*b~
DEBUG - Final score: 8.0
INFO - {"valid_json": 95.65217391304348, "valid_json_schema": 100.0, "domain_field_match": 74.76932367149759}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "maxLen

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - DTP*349*RD8*20130601-20130606  DTP*152*RD8*19850207-19850212
DEBUG - dtp_segment DTP*152*RD8*19850207-19850212 does not match the expected pattern, reducing score by 2.5
DEBUG - Final score: 5.5
INFO - {"valid_json": 95.83333333333334, "valid_json_schema": 100.0, "domain_field_match": 73.94560185185185}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a Json test data generator generating valid json data based on a json schema. 
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 DTP Segment", "description": "Schema for X12 DTP segment, which contains date/time period information.", "type": "object", "properties": {"dtp_segment": {"type": "string", "description": "Date/Time Period (e.g., DTP*291*D8*20220101-20220131~)", "minLength": 1, "maxLength": 30}}, "required": ["dtp_segment"]}
Produce a JSON object that adheres to the schema with all properties [/INST] {"dtp_segment": "DTP*152*RD8*19850207-19850212"}</s>
------------------------------------------------------------
Processing row 25/30


DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by -1.2857142857142856
DEBUG - Count of mismatched fields: 0.8571428571428571, reducing score by 1.7142857142857142
DEBUG - NM1*1P*2*Jones**R**DDS*MI*~  NM1*PR*1*Garcia*Matthew*L*Mr.*Jr.*MI*RqCkzIbUUlxTUQGzYVJbxYGKtGxoIbIyKmJIYCJLmJLbDG~
DEBUG - Final score: 9.571428571428571
INFO - {"valid_json": 96.0, "valid_json_schema": 100.0, "domain_field_match": 74.8163492063492}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a Json test data generator generating valid json data based on a json schema. 
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "N

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.9, reducing score by 1.8
DEBUG - NM1*IL*2*James*Teresa*R*Mrs.*MD*FI*lpuvhZFBxLimIZClFCIADQuDiSkHHXbMqUenJcFXnUKokO~  NM1*1P*2*Jones*Rachel*C*Mx.*DDS*XX*YCkLgTmQoXkJJGQYWnCzZXpDgDGgTgkjLnVbCkZDqvJVgLNZCmXg~
DEBUG - Final score: 8.2
INFO - {"valid_json": 96.15384615384616, "valid_json_schema": 100.0, "domain_field_match": 75.09264346764346}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name Mid

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*ZZ*Phxbspf~  REF*MI*YbxkLVNfYkD~
DEBUG - Final score: 8.0
INFO - {"valid_json": 96.29629629629629, "valid_json_schema": 100.0, "domain_field_match": 75.2743974132863}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Generate a sample json for the X12 REF Segment schema with all properties [/INST] {"reference_identification_qualifier": "MI", "reference_identification": "YbxkLVNfYkD", "ref_segment": "REF*MI*YbxkLVNfYkD~"}</s>
------------------------------------------------------------
Processing row 28/30


DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 1.0, reducing score by 2.0
DEBUG - REF*1P*NHvUoMeJnAClrLsyn~  REF*MI*fkCp~
DEBUG - Final score: 8.0
INFO - {"valid_json": 96.42857142857143, "valid_json_schema": 100.0, "domain_field_match": 75.44316893424036}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a sample data generator that creates valid JSON data based on a schema.
Generate a sample json for the X12 REF Segment schema with all properties [/INST] {"reference_identification_qualifier": "MI", "reference_identification": "fkCp", "ref_segment": "REF*MI*fkCp~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "YUYL", "ref_segment": "REF*ZZ*YUYL~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "uQJw", "ref_segment": "REF*ZZ*uQJw~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "jh", "ref_segment": "REF*ZZ*jh~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "d", "ref_segment": "REF*ZZ*d~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "kC", "ref_segment": "REF*ZZ*kC~"}

{"reference_identification_qualifier": "ZZ", "reference_identification": "lY", "ref_segment": "REF*ZZ*lY~"}

{"reference_identification_qualifi

DEBUG - Starting score: 10
DEBUG - Keys in json_ref not present in json_gen, reducing score by 0.0
DEBUG - Count of mismatched fields: 0.8888888888888888, reducing score by 1.7777777777777777
DEBUG - NM1*IL*1*Williams*Blake*X*Mx.**MI*SwBJJVWPMcmoClcKAbZqlSXumleRKrFMgQClsMOrsvbpdqnnjLJRjHGlLs~  NM1*03*1*Crawford*Ashley**Mr.*DDS*XX*DzYfqQgmHqjQYoYvjqvZqwKZJYbBw~
DEBUG - Final score: 8.222222222222221
INFO - {"valid_json": 96.55172413793103, "valid_json_schema": 100.0, "domain_field_match": 75.67692939244664}
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 NM1 Segment", "description": "Schema for X12 NM1 segment, which contains subscriber's or organization name information.", "type": "object", "properties": {"entity_identifier_code": {"type": "string", "description": "Entity Identifier Code (e.g., IL for Insured or 03 for dependent or PR for Payer or 1P for Provider)", "enum": ["IL", "03", "PR", "1P"]}, "entity_type_qualifier": {"type": "string", "description": "Entity Type Qualifier (1 = Person, 2 = Non-Person Entity)", "enum": ["1", "2"]}, "name_last_or_organizationName": {"type": "string", "description": "Name Last or Organization Name", "minLength": 1, "maxLength": 60}, "name_first": {"type": "string", "description": "Name First", "nullable": true, "minLength": 1, "maxLength": 35}, "name_middle": {"type": "string", "description": "Name

DEBUG - Starting score: 10
DEBUG - Count of mismatched fields: 0.6666666666666666, reducing score by 1.3333333333333333
DEBUG - REF*MI*XLaeMgWDE~  REF*MI*DgQPbB~
DEBUG - Final score: 8.666666666666666
INFO - {"valid_json": 96.66666666666667, "valid_json_schema": 100.0, "domain_field_match": 76.04325396825396}
INFO - Final Result: {'valid_json': 96.66666666666667, 'valid_json_schema': 100.0, 'domain_field_match': 76.04325396825396}
INFO - Evaluation completed and results logged to MLflow.


Generated text: 
 <s><s>[INST] You are a structured data generator that creates valid JSON data based on a schema.
Given the Schema: {"$schema": "http://json-schema.org/draft-07/schema#", "title": "X12 REF Segment", "description": "Schema for X12 REF segment, which contains reference identification information.", "type": "object", "properties": {"reference_identification_qualifier": {"type": "string", "description": "Reference Identification Qualifier (e.g., ZZ = Mutually Defined, MI = Member ID, XX = National Provider Identifier, SV = Service Provider Number, FI = Federal Tax ID, PI = Payer Identifier, PR = Payer Name, 1P = Provider Name)", "enum": ["ZZ", "MI", "XX", "SV", "FI", "PI", "PR", "1P"]}, "reference_identification": {"type": "string", "description": "Reference Identification (e.g., Member ID, National Provider Identifier)", "minLength": 1, "maxLength": 20}, "ref_segment": {"type": "string", "description": "Ref segment in the format REF*XX*123456789~", "minLength": 1, "maxLen