# **Finetuning Llama3.1-8B Model on Salesforce/Xlam-function-calling Dataset Using Unsloth**

### **Loading the Base Model**

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


### **Apply Lora**

In [2]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.10.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


### **Loading and Formatting the Dataset**

In [3]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instruction = "You are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:"
    inputs       = examples["query"]
    outputs      = examples["answers"]
    texts = []
    for input, output in zip(inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

pass

from datasets import load_dataset
dataset = load_dataset("Salesforce/xlam-function-calling-60k",split="train")
print(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)
print(dataset)


Dataset({
    features: ['answers', 'query', 'tools', 'id'],
    num_rows: 60000
})


Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

Dataset({
    features: ['answers', 'query', 'tools', 'id', 'text'],
    num_rows: 60000
})


In [4]:
dataset["text"][1]

'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:\n\n### Input:\nI need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for \'ethereum\'?\n\n### Response:\n[{"name": "web_chain_details", "arguments": {"chain_slug": "ethereum"}}]<|end_of_text|>'

### **Setting Up Training Arguments and Training for SFTTrainer**

In [5]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 500,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs_llama3.1_8b_unsloth_4bit",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2):   0%|          | 0/60000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer_stats = trainer.train()

**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 60,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 500
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.3287
2,2.4623
3,2.2469
4,2.2762
5,1.7656
6,1.8109
7,1.579
8,1.2398
9,0.9834
10,0.7964


### **Testing the Trained Model**

In [7]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "You are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:", # instruction
        "I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for \'ethereum\'?", # input
        "", # output 
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nYou are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:\n\n### Input:\nI need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for \'ethereum\'?\n\n### Response:\n[{"name": "get_blockchain_details", "arguments": {"name": "ethereum"}}]<|end_of_text|>']

In [8]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Continue the fibonnaci sequence.", # instruction
        "I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for \'ethereum\'?", # input
        "", # output 
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Continue the fibonnaci sequence.

### Input:
I need to understand the details of the Ethereum blockchain for my cryptocurrency project. Can you fetch the details for 'ethereum'?

### Response:
[{"name": "blockchain", "arguments": {"slug": "ethereum"}}]<|end_of_text|>


### **Save the Finetuned Model**

In [None]:
model.save_pretrained("llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora-model") 
tokenizer.save_pretrained("llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora-model")

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora-model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# alpaca_prompt = You MUST copy from above!

inputs = tokenizer(
[
    alpaca_prompt.format(
        "You are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:", # instruction
        "I need an analysis report on the impact of our latest marketing campaign. Include data such as conversion rates, customer engagement metrics, social media performance, and any sales growth. Provide insights on which segments of the campaign were most successful and suggest areas for improvement.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

### **Push the model to Huggingface**

In [None]:
model.push_to_hub("Amin-01/Llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora", token = "hf_token") 
tokenizer.push_to_hub("Amin-01/Llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora", token = "hf_token") 

# **Inference Using Finetuned Huggingface Model**

### **Load Model From HuggingFace**

In [None]:
from unsloth import FastLanguageModel
import torch
import json

# Load the fine-tuned model and tokenizer
model_name = "Amin-01/Llama-3.1-8b-finetuned-for-json-generation-unsloth-4bit-lora"

# Load the model using Unsloth's FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

# Enable the model for inference
FastLanguageModel.for_inference(model)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2024.10.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear4bit(
      

In [2]:
# Ensure the tokenizer has the required special tokens
special_tokens = {
    'additional_special_tokens': [
        '<|begin_of_text|>',
        '<|start_header_id|>',
        '<|end_header_id|>',
        '<|eot_id|>',
        '<|endoftext|>'
    ]
}
num_added_toks = tokenizer.add_special_tokens(special_tokens)
if num_added_toks > 0:
    model.resize_token_embeddings(len(tokenizer))

In [3]:
# Define the Alpaca prompt template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [10]:
# Function to prepare the input
def prepare_input(query):
    instruction = "You are a helpful assistant. Your task is to convert any input into valid JSON. Only provide the JSON response, no extra information:"
    input_text = query
    prompt = alpaca_prompt.format(
        instruction,
        input_text,
        ""  # Output left blank for generation
    )
    inputs = tokenizer(prompt, return_tensors='pt').to('cuda')
    return inputs

# Function to extract the assistant's response
def extract_response(output_text):
    response_header = "### Response:"
    start_idx = output_text.find(response_header)
    if start_idx == -1:
        print("Response header not found in the output.")
        return ''
    start_idx += len(response_header)
    assistant_content = output_text[start_idx:].strip()

    # Remove any trailing special tokens
    special_tokens = [
        '<|end_of_text|>',
        '<|endoftext|>',
        '<|eot_id|>',
        tokenizer.eos_token,
    ]

    for token in special_tokens:
        assistant_content = assistant_content.replace(token, '').strip()

    return assistant_content

# Function to generate the response
def generate_response(model, inputs):
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    output_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
    #print("Raw Model Response:",output_text)
    assistant_response = extract_response(output_text)
    return assistant_response

# Function to check if a string is valid JSON
def isJsonString(input_string):
    try:
        json.loads(input_string)
        return True
    except ValueError:
        return False

# Function to evaluate the model's performance
def evaluate_model_performance(model, queries, max_retries=5):
    total_queries = len(queries)
    valid_json_count = 0

    for i, query in enumerate(queries):
        inputs = prepare_input(query)
        response = None

        # Retry mechanism for invalid JSONs
        for attempt in range(max_retries):
            response = generate_response(model, inputs)
            if isJsonString(response):
                break

        is_valid = isJsonString(response)

        print(f"Query {i+1}: {query}")
        print(f"Generated JSON response: {response}")

        if is_valid:
            print("The response is valid JSON.")
            valid_json_count += 1
        else:
            print("The response is not valid JSON.")
        print("-" * 50)

    # Compute the percentage of valid JSON outputs
    valid_percentage = (valid_json_count / total_queries) * 100
    print(f"Valid JSON responses: {valid_json_count}/{total_queries} ({valid_percentage:.2f}%)")

In [11]:
queries = [
    # 5 big textual prompts
    "I need an analysis report on the impact of our latest marketing campaign. Include data such as conversion rates, customer engagement metrics, social media performance, and any sales growth. Provide insights on which segments of the campaign were most successful and suggest areas for improvement.",
    "Please generate a monthly sales report for our e-commerce platform, detailing total sales, top-performing products, customer demographics, and return rates. Include comparisons to the previous month and highlight any notable trends or anomalies.",
    "Create a technical specification document for the new software system we're developing. Include sections on system architecture, database design, user interfaces, security protocols, scalability requirements, and integration with existing systems.",
    "Can you provide a strategic plan for expanding our business into new international markets? The plan should cover potential target countries, market entry strategies, competitive analysis, legal considerations, and estimated costs. Highlight key opportunities and risks.",
    "Generate a risk assessment report for our cloud infrastructure. The report should analyze potential security threats, vulnerabilities, compliance issues, and disaster recovery plans. Recommend best practices for improving system security and resilience.",
    # 25 big and complex invalid JSONs
    "{'payment': {'transactionID': 456789, 'amount': 100.00, 'currency': 'USD', 'method': 'Credit Card', 'status': 'Pending', 'details': {'cardNumber': '**** **** **** 1234', 'expiryDate': '12/23'}}}",  # Valid JSON
    "{'portfolio': {'investor': 'Jane Doe', 'stocks': [{'ticker': 'AAPL', 'shares': 50, 'averagePrice': 150}, {'ticker': 'GOOG', 'shares': 30, 'averagePrice': 2500}], 'totalValue': 180000}",  # Missing closing brace
    "{'jobApplication': {'candidateName': 'Alice Doe', 'position': 'Software Engineer', 'status': 'Under Review', 'interviews': [{'round': 1, 'interviewer': 'John Smith'}, {'round': 2, 'interviewer': 'Sara Lee'}], 'notes': 'Strong candidate, needs to improve coding skills'}}",  # Valid JSON
    "{'orderID': 12345, 'customer': {'name': 'Alice Johnson', 'email': 'alice@example.com'}, 'items': [{'productID': 567, 'productName': 'Laptop', 'quantity': 1, 'price': 999.99}, {'productID': 789, 'productName': 'Mouse', 'quantity': 2, 'price': 19.99}], 'total': 1039.97",  # Missing closing brace
    "{'shipment': {'trackingID': 987654321, 'origin': 'Los Angeles', 'destination': 'New York', 'items': [{'itemID': 456, 'name': 'Smartphone', 'quantity': 50}, {'itemID': 789, 'name': 'Laptop', 'quantity': 20}], 'status': 'In Transit'",  # Missing closing brace
    "{'conferenceCall': {'callID': 456123, 'participants': ['John Doe', 'Jane Smith'], 'agenda': 'Project Planning', 'date': '2022-05-01', 'time': '10:00 AM', 'duration': '1h'}}",  # Valid JSON
    "{'patient': {'name': 'Alice Doe', 'age': 30, 'medicalHistory': [{'condition': 'Diabetes', 'diagnosed': '2015'}, {'condition': 'Hypertension', 'diagnosed': '2019'}], 'medications': [{'name': 'Metformin', 'dosage': '500mg'}, {'name': 'Lisinopril', 'dosage': '10mg'}]}",  # Missing closing brace
    "{'movie': {'title': 'Inception', 'director': 'Christopher Nolan', 'year': 2010, 'genres': ['Sci-Fi', 'Thriller'], 'cast': [{'name': 'Leonardo DiCaprio', 'role': 'Dom Cobb'}, {'name': 'Joseph Gordon-Levitt', 'role': 'Arthur'}], 'ratings': {'IMDB': 8.8, 'Rotten Tomatoes': '87%'}}}",  # Valid JSON
    "{'trip': {'destination': 'Tokyo', 'startDate': '2023-04-15', 'endDate': '2023-04-30', 'travelers': [{'name': 'Alice Johnson', 'passport': '123456789'}, {'name': 'Bob Williams', 'passport': '987654321'}], 'itinerary': [{'day': 1, 'activity': 'Visit Tokyo Tower'}, {'day': 2, 'activity': 'Explore Shibuya Crossing'}], 'hotel': 'The Ritz-Carlton', 'roomType': 'Suite'}}",  # Valid JSON
    "{'ecommerce': {'orderID': 987654321, 'customer': {'name': 'Emily Smith', 'email': 'emily@example.com'}, 'items': [{'productID': 12345, 'name': 'Smart TV', 'quantity': 1, 'price': 499.99}, {'productID': 67890, 'name': 'Bluetooth Speaker', 'quantity': 2, 'price': 29.99}], 'total': 559.97, 'status': 'Shipped', 'shipping': {'address': '789 Birch Street', 'city': 'Austin', 'state': 'TX', 'zipcode': 73301'}}}",  # Valid JSON
    "{'fitness': {'workoutID': 101, 'type': 'Strength Training', 'exercises': [{'name': 'Bench Press', 'sets': 3, 'reps': 10, 'weight': 135}, {'name': 'Squats', 'sets': 4, 'reps': 12, 'weight': 185}], 'date': '2023-07-10', 'duration': '1 hour'}}",  # Missing closing brace
    "{'concert': {'name': 'Rock Fest 2023', 'location': 'Los Angeles', 'date': '2023-09-20', 'lineup': [{'band': 'The Rolling Stones', 'time': '8:00 PM'}, {'band': 'Queen', 'time': '9:30 PM'}], 'tickets': [{'section': 'VIP', 'price': 500}, {'section': 'General Admission', 'price': 150}], 'available': True}",  # Missing closing brace
    "{'conference': {'name': 'AI Summit', 'date': '2022-10-15', 'location': 'New York', 'topics': ['AI', 'Machine Learning', 'Deep Learning'], 'speakers': [{'name': 'John Smith', 'topic': 'AI'}, {'name': 'Emily Johnson', 'topic': 'Machine Learning'}]",  # Missing closing brace
    "{'conference': {'name': 'Tech Expo 2023', 'location': 'San Francisco', 'date': '2023-10-15', 'topics': ['AI', 'Blockchain', 'Cybersecurity'], 'speakers': [{'name': 'John McAfee', 'topic': 'Cybersecurity'}, {'name': 'Vitalik Buterin', 'topic': 'Blockchain'}], 'attendees': 3000}}",  # Valid JSON
    "{'survey': {'title': 'Customer Satisfaction', 'responses': [{'question': 'How would you rate our service?', 'rating': 5}, {'question': 'What can we improve?', 'text': 'Faster shipping'}], 'summary': {'averageRating': 4.8, 'commonSuggestions': ['Lower prices', 'More products']}}}",  # Valid JSON
    "{'socialMedia': {'platform': 'Twitter', 'user': {'username': 'janedoe', 'followers': 5000, 'following': 300, 'tweets': [{'date': '2023-07-10', 'content': 'Just got a new job!'}, {'date': '2023-08-01', 'content': 'Loving my new apartment.'}], 'likes': 100, 'retweets': 10}}}",  # Missing closing brace
    "{'artExhibit': {'name': 'Modern Art Showcase', 'location': 'Museum of Fine Arts', 'date': '2023-11-05', 'artists': [{'name': 'Pablo Picasso', 'works': ['The Weeping Woman', 'Guernica']}, {'name': 'Vincent van Gogh', 'works': ['Starry Night', 'Sunflowers']}], 'tickets': {'price': 25, 'available': True}}}",  # Missing closing brace
    "{'banking': {'accountHolder': 'John Doe', 'accountNumber': '12345678', 'transactions': [{'date': '2023-01-01', 'description': 'Deposit', 'amount': 1000}, {'date': '2023-02-15', 'description': 'Withdrawal', 'amount': 500}], 'balance': 500}}",  # Valid JSON
    "{'scientificResearch': {'studyID': 654321, 'title': 'Gene Therapy for Cancer', 'researchers': [{'name': 'Dr. Alice Doe', 'institution': 'Harvard University'}, {'name': 'Dr. Bob Smith', 'institution': 'MIT'}], 'status': 'In Progress', 'funding  5000000}}",
    "{'musicConcert': {'concertID': 987123, 'artist': 'The Beatles', 'date': '1965-08-15', 'venue': 'Shea Stadium', 'setlist': [{'song': 'Twist and Shout'}, {'song': 'Help!'}], 'tickets': {'price': 5, 'soldOut': True}}}",  # Valid JSON
    "{'fitnessChallenge': {'challengeID': 456789, 'name': '30-Day Fitness Challenge', 'participants': [{'name': 'John Doe', 'progress': 'Week 2'}, {'name': 'Jane Smith', 'progress': 'Week 3'}], 'startDate': '2023-01-01', 'endDate' '2023-01-30'}",
    "{'stockMarket': {'stock': {'symbol': 'AAPL', 'company': 'Apple Inc.', 'price': 150.50, 'marketCap': '2.5T', 'dividendYield': '0.5%'}, 'sector': 'Technology'}}",  # Missing closing brace
    "{'movieReview': {'title': 'The Godfather', 'director': 'Francis Ford Coppola', 'year': 1972, 'ratings': {'IMDB': 9.2, 'Rotten Tomatoes': '98%'}, 'review': 'An absolute classic. A must-watch for any film enthusiast.'}",
    "{'onlineCourse': {'courseID': 123456, 'title': 'Data Science 101', 'instructor': {'name': 'Dr. Alice Doe', 'institution': 'MIT'}, 'students': 500, 'startDate': '2023-09-01', 'endDate': '2023-12-01'}",
    "{'travelItinerary': {'itineraryID': 789012, 'destination': 'Paris', 'departureDate': '2023-06-15', 'returnDate': '2023-06-30', 'traveler': {'name': 'John Doe', 'passport': '123456789'}, 'accommodation': {'hotel': 'Le Meurice', 'roomType': 'Deluxe Suite'}}}",  # Valid JSON
]

In [12]:
# Evaluate the model
evaluate_model_performance(model, queries)

Query 1: I need an analysis report on the impact of our latest marketing campaign. Include data such as conversion rates, customer engagement metrics, social media performance, and any sales growth. Provide insights on which segments of the campaign were most successful and suggest areas for improvement.
Generated JSON response: [{"name": "marketing_analysis", "arguments": {"campaign_id": 123456789}}]
The response is valid JSON.
--------------------------------------------------
Query 2: Please generate a monthly sales report for our e-commerce platform, detailing total sales, top-performing products, customer demographics, and return rates. Include comparisons to the previous month and highlight any notable trends or anomalies.
Generated JSON response: [{"name": "monthly_sales_report", "arguments": {"include_comparisons": true}}]
The response is valid JSON.
--------------------------------------------------
Query 3: Create a technical specification document for the new software system

## **Llama3.1-8B Base Model for JSON Mode**

In [None]:
from transformers import pipeline
import json

# Load the base Llama 3.1 8B model
model_id = "meta-llama/Llama-3.1-8B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


In [None]:
# Function to evaluate the base model on a list of queries
def evaluate_base_model(queries):
    for i, query in enumerate(queries):
        print(f"Query {i+1}: {query}")

        # Prepare the prompt for JSON generation
        prompt = (
            "You are a helpful assistant. Your task is to convert any input into valid JSON.\n"
            f"Convert the following text query or invalid JSON into valid JSON."
            f"Only provide the JSON response, no extra information:\n{query}"
        )

        # Generate response once
        outputs = pipe(prompt, max_new_tokens=256)
        response = outputs[0]["generated_text"].strip()

        # Remove the prompt from the response if the model echoes it
        if response.startswith(prompt):
            response = response[len(prompt):].strip()

        print(f"Generated JSON response: {response}")
        print("-" * 50)

In [None]:
evaluate_base_model(queries)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Query 1: I need an analysis report on the impact of our latest marketing campaign. Include data such as conversion rates, customer engagement metrics, social media performance, and any sales growth. Provide insights on which segments of the campaign were most successful and suggest areas for improvement.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: The report should be also be presented in a way that is easy to understand for non-technical stakeholders.

{
  "data": [
    {
      "id": 1,
      "name": "John",
      "age": 30
    }
  ]
}
{
  "key": "value"
}
{
  "data": [
    {
      "id": 1,
      "name": "John",
      "age": 30
    },
    {
      "id": 2,
      "name": "Alice",
      "age": 25
    }
  ]
}

{
  "key": "value"
}

{
  "data": [
    {
      "id": 1,
      "name": "John",
      "age": 30
    },
    {
      "id": 2,
      "name": "Alice",
      "age": 25
    }
  ]
}
{
  "key": "value"
}
```


[{
  "id": 1,
  "name": "John",
  "age": 30
}, {
  "id": 2,
  "name": "Alice",
  "age": 25
}]
{
  "key": "value
--------------------------------------------------
Query 2: Please generate a monthly sales report for our e-commerce platform, detailing total sales, top-performing products, customer demographics, and return rates. Include comparisons to the previous month and highlight any notable trends or 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: The report should be be in a CSV format.

{
  "name": "John",
  "age": 30,
  "city": "New York"
}
{
  "name": "Jane",
  "age": 25,
  "city": "Los Angeles"
}

{
  "name": "John",
  "age": 30,
  "city": "New York"
}
{
  "name": "Jane",
  "age": 25,
  "city": "Los Angeles"
}

{
  "name": "John",
  "age": 30,
  "city": "New York"
}
{
  "name": "Jane",
  "age": 25,
  "city": "Los Angeles"
}

{
  "name": "John",
  "age": 30,
  "city": "New York"
}
{
  "name": "Jane",
  "age": 25,
  "city": "Los Angeles"
}

{
  "name": "John",
  "age": 30,
  "city": "New York"
}
{
  "name": "Jane",
  "age": 25,
  "city": "Los Angeles"
}

{
  "name": "
--------------------------------------------------
Query 3: Create a technical specification document for the new software system we're developing. Include sections on system architecture, database design, user interfaces, security protocols, scalability requirements, and integration with existing systems.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: The document should be be written in a clear and concise manner, with proper formatting and headings.

{
"Description": "The system will be developed using a microservices architecture, with each service communicating with the others through REST APIs."
}

{
"Description": "The system will be developed using a microservices architecture, with each service communicating with the others through REST APIs."
}

{
"Description": "The system will be developed using a microservices architecture, with each service communicating with the others through REST APIs."
}

{
"Description": "The system will be developed using a microservices architecture, with each service communicating with the others through REST APIs."
}

{
"Description": "The system will be developed using a microservices architecture, with each service communicating with the others through REST APIs."
}

{
"Description": "The system will be developed using a microservices architecture, with each service c

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "question": "Can you provide a strategic plan for expanding our business into new international markets?"
  "market_entry_strategies": ["export", "joint_venture", "franchise", "licensing", "greenfield"],
  "legal_considerations": ["contract_law", "tax_law", "employment_law", "intellectual_property_law", "regulatory_framework"],
  "estimated_costs": ["market_research", "market_entry_costs", "operating_expenses"]
}

{
  "name": "John",
  "age": 30,
  "address": {
    "street": "123 Main St",
    "city": "Anytown",
    "state": "CA",
    "zip": "12345"
  }
}

{
  "key": "value"
}

{
  "key": "value",
  "key": "value"
}

{
  "key": "value"
  "key": "value"
}

{
  "key": "value",
  "key": 123
}

{
  "key": "value",
  "key": {
    "nested": "key"
  }
}

{
  "key": "value",
  "key": [
    "array
--------------------------------------------------
Query 5: Generate a risk assessment report for our cloud infrastructure. The report should analyze potential security th

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: Input: "Generate a risk assessment report for our cloud infrastructure. The report should analyze potential security threats, vulnerabilities, compliance issues, and disaster recovery plans. Recommend best practices for improving system security and resilience."
{
  "report": {
    "description": "risk assessment report for cloud infrastructure",
    "analysis": {
      "security threats": [
        "potential security threats",
        "vulnerabilities",
        "compliance issues"
      ],
      "disaster recovery plans": "disaster recovery plans"
    },
    "recommendations": {
      "best practices": "improving system security and resilience"
    }
  }
}  : {"report": {"description": "risk assessment report for cloud infrastructure","analysis": {"security threats": ["potential security threats","vulnerabilities","compliance issues"],"disaster recovery plans": "disaster recovery plans"},"recommendations": {"best practices": "improving system security and res

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {'payment': {'transactionID': 456789, 'amount': 100.00, 'currency': 'USD','method': 'Credit Card','status': 'Pending', 'details': {'cardNumber': '**** **** **** 1234', 'expiryDate': '12/23'}}} 

## Step 1: The input is already a valid JSON object.
The given input is a valid JSON object, so no conversion is needed.


The final answer is: {"payment": {"transactionID": 456789, "amount": 100.0, "currency": "USD", "method": "Credit Card", "status": "Pending", "details": {"cardNumber": "**** **** **** 1234", "expiryDate": "12/23"}}} {"payment": {"transactionID": 456789, "amount": 100.0, "currency": "USD", "method": "Credit Card", "status": "Pending", "details": {"cardNumber": "**** **** **** 1234", "expiryDate": "12/23"}}}  ## Step 2: There is no need for further processing.
Since the input is already a valid JSON object, there's no need for further processing
--------------------------------------------------
Query 7: {'portfolio': {'investor': 'Jane Doe', 'stocks':

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }


{
  "portfolio": {
    "investor": "Jane Doe",
    "stocks": [
      {
        "ticker": "AAPL",
        "shares": 50,
        "averagePrice": 150
      },
      {
        "ticker": "GOOG",
        "shares": 30,
        "averagePrice": 2500
      }
    ],
    "totalValue": 180000
  }
} 

```json
{
  "portfolio": {
    "investor": "Jane Doe",
    "stocks": [
      {
        "ticker": "AAPL",
        "shares": 50,
        "averagePrice": 150
      },
      {
        "ticker": "GOOG",
        "shares": 30,
        "averagePrice": 2500
      }
    ],
    "totalValue": 180000
  }
}
```

## Step 1: Identify the input
The input is a text query or invalid JSON.

## Step 2: Convert the input into valid JSON
The input is already a dictionary, so it can be directly converted into valid JSON.

## Step 3: Format the JSON
The JSON should be formatted with proper indentation and spacing for readability.


The
--------------------------------------------------
Query 8: {'j

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "jobApplication": {
    "candidateName": "Alice Doe",
    "position": "Software Engineer",
    "status": "Under Review",
    "interviews": [
      {
        "round": 1,
        "interviewer": "John Smith"
      },
      {
        "round": 2,
        "interviewer": "Sara Lee"
      }
    ],
    "notes": "Strong candidate, needs to improve coding skills"
  }
} 

## Step 1: Identify the input to be converted
The input to be converted is a Python dictionary that represents a job application.

## Step 2: Determine the conversion needed
Since the input is already in a dictionary format, it can be directly converted to JSON.

## Step 3: Convert the input to JSON
Use a JSON conversion method to convert the input dictionary into a valid JSON string.

## Step 4: Provide the JSON response
The converted JSON string will be the response.

The final answer is: 
{
  "jobApplication": {
    "candidateName": "Alice Doe",
    "position": "Software Engineer",
    "status": "U

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }
[{"productID": 567, "productName": "Laptop", "quantity": 1, "price": 999.99}, {"productID": 789, "productName": "Mouse", "quantity": 2, "price": 19.99}]
{"productID": 567, "productName": "Laptop", "quantity": 1, "price": 999.99}
{"productID": 567, "productName": "Laptop", "quantity": 1, "price": 999.99}
[{"productID": 567, "productName": "Laptop", "quantity": 1, "price": 999.99}, {"productID": 789, "productName": "Mouse", "quantity": 2, "price": 19.99}]
{"orderID": 12345, "customer": {"name": "Alice Johnson", "email": "alice@example.com"}, "items": [{"productID": 567, "productName": "Laptop", "quantity": 1, "price": 999.99}, {"productID": 789, "productName": "Mouse", "quantity": 2, "price": 19.99}], "total":
--------------------------------------------------
Query 10: {'shipment': {'trackingID': 987654321, 'origin': 'Los Angeles', 'destination': 'New York', 'items': [{'itemID': 456, 'name': 'Smartphone', 'quantity': 50}, {'itemID': 789, 'name': 'Laptop', 'qua

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }}
```
{
  "shipment": {
    "trackingID": 987654321,
    "origin": "Los Angeles",
    "destination": "New York",
    "items": [
      {
        "itemID": 456,
        "name": "Smartphone",
        "quantity": 50
      },
      {
        "itemID": 789,
        "name": "Laptop",
        "quantity": 20
      }
    ],
    "status": "In Transit"
  }
}
```
Sample Input 2:
{"name": "John","age": 30,"city": "New York"}
```
{
  "name": "John",
  "age": 30,
  "city": "New York"
}
```
Sample Input 3:
{"name": "John","age":30,"city": "New York"}
```
{
  "name": "John",
  "age": 30,
  "city": "New York"
}
```
Sample Input 4:
{"name": "John","age":30,"city": "New York","address": "123 Main St"}
```
{
  "name": "John",
  "age": 30,
  "city
--------------------------------------------------
Query 11: {'conferenceCall': {'callID': 456123, 'participants': ['John Doe', 'Jane Smith'], 'agenda': 'Project Planning', 'date': '2022-05-01', 'time': '10:00 AM', 'duration': '1h'}}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "conferenceCall": {
    "callID": 456123,
    "participants": [
      "John Doe",
      "Jane Smith"
    ],
    "agenda": "Project Planning",
    "date": "2022-05-01",
    "time": "10:00 AM",
    "duration": "1h"
  }
} 

## Step 1: Identify the input
The input is a JSON object with a key named 'conferenceCall' that contains several key-value pairs.

## Step 2: Check for valid JSON
The input is already in a valid JSON format, but it is missing double quotes around the key names and values.

## Step 3: Add double quotes
Add double quotes around the key names and values to make the JSON object valid.


The final answer is: 
{
  "conferenceCall": {
    "callID": 456123,
    "participants": [
      "John Doe",
      "Jane Smith"
    ],
    "agenda": "Project Planning",
    "date": "2022-05-01",
    "time": "10:00 AM",
    "duration": "1h"
  }
}  ## Step 1: Identify the input
The input
--------------------------------------------------
Query 12: {'patient': {'nam

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }

{
  "patient": {
    "name": "Alice Doe",
    "age": 30,
    "medicalHistory": [
      {
        "condition": "Diabetes",
        "diagnosed": "2015"
      },
      {
        "condition": "Hypertension",
        "diagnosed": "2019"
      }
    ],
    "medications": [
      {
        "name": "Metformin",
        "dosage": "500mg"
      },
      {
        "name": "Lisinopril",
        "dosage": "10mg"
      }
    ]
  }
}  |  {"name": "John", "age": 25, "address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"}} 
{
  "name": "John",
  "age": 25,
  "address": {
    "street": "123 Main St",
    "city": "Anytown",
    "state": "CA",
    "zip": "12345"
  }
}  |  {"name": "Jane", "age": 30, "address": "123 Main St
--------------------------------------------------
Query 13: {'movie': {'title': 'Inception', 'director': 'Christopher Nolan', 'year': 2010, 'genres': ['Sci-Fi', 'Thriller'], 'cast': [{'name': 'Leonardo DiCaprio', 'role': 'Dom C

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: ```
{
  "movie": {
    "title": "Inception",
    "director": "Christopher Nolan",
    "year": 2010,
    "genres": [
      "Sci-Fi",
      "Thriller"
    ],
    "cast": [
      {
        "name": "Leonardo DiCaprio",
        "role": "Dom Cobb"
      },
      {
        "name": "Joseph Gordon-Levitt",
        "role": "Arthur"
      }
    ],
    "ratings": {
      "IMDB": 8.8,
      "Rotten Tomatoes": "87%"
    }
  }
}
``` 
{movie: {title: Inception, director: Christopher Nolan, year: 2010, genres: [Sci-Fi, Thriller], cast: [{name: Leonardo DiCaprio, role: Dom Cobb}, {name: Joseph Gordon-Levitt, role: Arthur}], ratings: {IMDB: 8.8, Rotten Tomatoes: 87%}}} 

```
{
  "movie": {
    "title": "Inception",
    "director": "Christopher Nolan",
    "year": 2010,
    "genres": [
      "Sci-Fi",
      "
--------------------------------------------------
Query 14: {'trip': {'destination': 'Tokyo', 'startDate': '2023-04-15', 'endDate': '2023-04-30', 'travelers': [{'name': 'Ali

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "trip": {
    "destination": "Tokyo",
    "startDate": "2023-04-15",
    "endDate": "2023-04-30",
    "travelers": [
      {
        "name": "Alice Johnson",
        "passport": "123456789"
      },
      {
        "name": "Bob Williams",
        "passport": "987654321"
      }
    ],
    "itinerary": [
      {
        "day": 1,
        "activity": "Visit Tokyo Tower"
      },
      {
        "day": 2,
        "activity": "Explore Shibuya Crossing"
      }
    ],
    "hotel": "The Ritz-Carlton",
    "roomType": "Suite"
  }
} 

## Step 1: Identify the input
The input is a string that represents a dictionary with a nested structure.


## Step 2: Check for valid JSON
The input string is already a valid JSON object.


## Step 3: Convert the input into a JSON object
Since the input is already a valid JSON object, no conversion is needed.


## Step 4: Return the JSON object
The JSON object is the input itself, so we simply return it
------------------------------

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {ecommerce: {orderID: 123456, customer: {name: John Smith, email: john.smith@example.com}, items: [{productID: 11111, name: Laptop, quantity: 1, price: 999.99}, {productID: 22222, name: Headphones, quantity: 2, price: 49.99}], total: 1099.97, status: Shipped, shipping: {address: 456 Elm Street, city: New York, state: NY, zipcode: 10001}}} 
[1, 2, 3, 4, 5] 
"Hello, World!" 
'Hello, World!' 
true 
false 
null 
{"key": "value"} 
[1, 2, 3, 4, 5, 6] 
{"key": "value", "key2": "value2"} 
{"key": 1, "key2": 2} 
{"key": true, "key2": false} 
{"key": null, "key2": null} 
{"key": "value", "key2": 2} 
{"key": 1, "key2":
--------------------------------------------------
Query 16: {'fitness': {'workoutID': 101, 'type': 'Strength Training', 'exercises': [{'name': 'Bench Press', 'sets': 3, 'reps': 10, 'weight': 135}, {'name': 'Squats', 'sets': 4, 'reps': 12, 'weight': 185}], 'date': '2023-07-10', 'duration': '1 hour'}}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: ```json
{
  "fitness": {
    "workoutID": 101,
    "type": "Strength Training",
    "exercises": [
      {
        "name": "Bench Press",
        "sets": 3,
        "reps": 10,
        "weight": 135
      },
      {
        "name": "Squats",
        "sets": 4,
        "reps": 12,
        "weight": 185
      }
    ],
    "date": "2023-07-10",
    "duration": "1 hour"
  }
}
```
```json
{"fitness":{"workoutID":101,"type":"Strength Training","exercises":[{"name":"Bench Press","sets":3,"reps":10,"weight":135},{"name":"Squats","sets":4,"reps":12,"weight":185}],"date":"2023-07-10","duration":"1 hour"}}
```
Explanation:
This problem is a simple JSON conversion task. The input is a dictionary, which is already in JSON format. The task is to ensure that the input is correctly formatted as valid JSON. In this case, the input is already valid JSON, so
--------------------------------------------------
Query 17: {'concert': {'name': 'Rock Fest 2023', 'location': 'Los Angele

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }


{
    "concert": {
        "name": "Rock Fest 2023",
        "location": "Los Angeles",
        "date": "2023-09-20",
        "lineup": [
            {
                "band": "The Rolling Stones",
                "time": "8:00 PM"
            },
            {
                "band": "Queen",
                "time": "9:30 PM"
            }
        ],
        "tickets": [
            {
                "section": "VIP",
                "price": 500
            },
            {
                "section": "General Admission",
                "price": 150
            }
        ],
        "available": true
    }
}  # Corrected to have double quotes around the boolean value
```



Your task is to convert the following text query or invalid JSON into valid JSON:
{
  "name": "John",
  "age": 30,
  "address": {
    "street": "123 Main St",
    "city": "Anytown",
    "state": "CA",
    "zip": "12345"
  },
  "interests": ["reading", "hiking", "coding"],
  "available": 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }} 

## Step 1: Check if the input is already valid JSON.
The input is a Python dictionary, which is a valid JSON object.

## Step 2: Convert the input into a valid JSON string.
Since the input is already a valid Python dictionary, we can directly convert it into a valid JSON string using the json.dumps() function.

## Step 3: Print the valid JSON string.
The output of the json.dumps() function will be the valid JSON string.

The final answer is: {"conference": {"name": "AI Summit", "date": "2022-10-15", "location": "New York", "topics": ["AI", "Machine Learning", "Deep Learning"], "speakers": [{"name": "John Smith", "topic": "AI"}, {"name": "Emily Johnson", "topic": "Machine Learning"}]}}  ## Step 1: Check if the input is already valid JSON.
The input is a Python dictionary, which is a valid JSON object.

## Step 2: Convert the input into a valid JSON string.
Since the input is already a valid Python dictionary, we can directly convert it into a valid JSON str

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {"conference": {"name": "Tech Expo 2023", "location": "San Francisco", "date": "2023-10-15", "topics": ["AI", "Blockchain", "Cybersecurity"], "speakers": [{"name": "John McAfee", "topic": "Cybersecurity"}, {"name": "Vitalik Buterin", "topic": "Blockchain"}], "attendees": 3000}}
{"conference": {"name": "Tech Expo 2023", "location": "San Francisco", "date": "2023-10-15", "topics": ["AI", "Blockchain", "Cybersecurity"], "speakers": [{"name": "John McAfee", "topic": "Cybersecurity"}, {"name": "Vitalik Buterin", "topic": "Blockchain"}], "attendees": 3000}} 
{"conference": {"name": "Tech Expo 2023", "location": "San Francisco", "date": "2023-10-15", "topics": ["AI", "Blockchain", "Cybersecurity"], "speakers": [{"name": "John McAfee", "topic": "Cybersecurity"}, {"name": "Vitalik Buterin
--------------------------------------------------
Query 20: {'survey': {'title': 'Customer Satisfaction', 'responses': [{'question': 'How would you rate our service?', 'rating': 5}, {

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: NOT VALID
{"survey": {"title": "Customer Satisfaction", "responses": [{"question": "How would you rate our service?", "rating": 5}, {"question": "What can we improve?", "text": "Faster shipping"}], "summary": {"averageRating": 4.8, "commonSuggestions": ["Lower prices", "More products"]}}}
{'survey': {'title': 'Customer Satisfaction','responses': [{'question': 'How would you rate our service?', 'rating': 5}, {'question': 'What can we improve?', 'text': 'Faster shipping'}],'summary': {'averageRating': 4.8, 'commonSuggestions': ['Lower prices', 'More products']}}}  NOT VALID
[{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]
[{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]
{"survey": {"title": "Customer Satisfaction", "responses": [{"question": "How would you rate our service?", "rating": 5}, {"question": "What can we improve?", "text": "Faster shipping"}], "
--------------------------------------------------
Query 21: {'socialMedia': {'platform': 'Twitt

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "socialMedia": {
    "platform": "Twitter",
    "user": {
      "username": "janedoe",
      "followers": 5000,
      "following": 300,
      "tweets": [
        {
          "date": "2023-07-10",
          "content": "Just got a new job!"
        },
        {
          "date": "2023-08-01",
          "content": "Loving my new apartment."
        }
      ],
      "likes": 100,
      "retweets": 10
    }
  }
} 

## Step 1: Identify the input as a dictionary
The input is a dictionary with a single key-value pair, where the key is'socialMedia' and the value is another dictionary.

## Step 2: Identify the nested dictionary as a JSON object
The nested dictionary is a JSON object that contains several key-value pairs, including a list of tweets.

## Step 3: Identify the list of tweets as a JSON array
The list of tweets is a JSON array that contains two dictionaries, each representing a tweet.

## Step 4: Identify the dictionaries within the list of tweets as JSON 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: # valid JSON
{'artExhibit': {'name': 'Modern Art Showcase', 'location': 'Museum of Fine Arts', 'date': '2023-11-05', 'artists': [{'name': 'Pablo Picasso', 'works': ['The Weeping Woman', 'Guernica']}, {'name': 'Vincent van Gogh', 'works': ['Starry Night', 'Sunflowers']}], 'tickets': {'price': 25, 'available': True}}}}  # invalid JSON
{}  # invalid JSON
{  # invalid JSON
'artExhibit': {'name': 'Modern Art Showcase', 'location': 'Museum of Fine Arts', 'date': '2023-11-05', 'artists': [{'name': 'Pablo Picasso', 'works': ['The Weeping Woman', 'Guernica']}, {'name': 'Vincent van Gogh', 'works': ['Starry Night', 'Sunflowers']}], 'tickets': {'price': 25, 'available': True}}  # invalid JSON
{"artExhibit": {"name": "Modern Art Showcase", "location": "Museum of Fine Arts", "date": "2023-11-05",
--------------------------------------------------
Query 23: {'banking': {'accountHolder': 'John Doe', 'accountNumber': '12345678', 'transactions': [{'date': '2023-01-01', 'descrip

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "banking": {
    "accountHolder": "John Doe",
    "accountNumber": "12345678",
    "transactions": [
      {
        "date": "2023-01-01",
        "description": "Deposit",
        "amount": 1000
      },
      {
        "date": "2023-02-15",
        "description": "Withdrawal",
        "amount": 500
      }
    ],
    "balance": 500
  }
} 

## Step 1: The input is already a valid JSON object, so no conversion is needed.

The final answer is: {"banking": {"accountHolder": "John Doe", "accountNumber": "12345678", "transactions": [{"date": "2023-01-01", "description": "Deposit", "amount": 1000}, {"date": "2023-02-15", "description": "Withdrawal", "amount": 500}], "balance": 500}}  ## Step 2: The input is already a valid JSON object, so no conversion is needed.

The final answer is: {"banking": {"accountHolder": "John Doe", "accountNumber": "12345678
--------------------------------------------------
Query 24: {'scientificResearch': {'studyID': 654321, 'title'

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: { "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345" } }
[ "apple", "banana", "cherry" ]
{ "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345" } }
{ "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345" } }
{ "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345" } }
{ "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345" } }
{ "name": "John", "
--------------------------------------------------
Query 25: {'musicConcert': {'concertID': 987123, 'artist': 'The Beatles', 'date': '1965-08-15', 'venue': 'Shea Stadium', 'setlist': [{'song': 'Twist and Shout'}, {'song': 'Help!'}], 'tickets': {'price': 5, 'soldOut': True}}}


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {'musicConcert': {'concertID': 987123, 'artist': 'The Beatles', 'date': '1965-08-15','venue': 'Shea Stadium','setlist': [{'song': 'Twist and Shout'}, {'song': 'Help!'}], 'tickets': {'price': 5,'soldOut': True}}}
{"musicConcert":{"concertID":987123,"artist":"The Beatles","date":"1965-08-15","venue":"Shea Stadium","setlist":[{"song":"Twist and Shout"},{"song":"Help!"}],"tickets":{"price":5,"soldOut":true}}}
{"musicConcert":{"concertID":987123,"artist":"The Beatles","date":"1965-08-15","venue":"Shea Stadium","setlist":[{"song":"Twist and Shout"},{"song":"Help!"}],"tickets":{"price":5,"soldOut":true}}}
{"musicConcert":{"concertID":987123,"artist":"The Beatles","date":"1965-08-15","venue":"Shea Stadium","setlist":[{"song":"Twist and Shout"},{"song":"Help!"}],"tickets":{"price":5,"soldOut":true
--------------------------------------------------
Query 26: {'fitnessChallenge': {'challengeID': 456789, 'name': '30-Day Fitness Challenge', 'participants': [{'name': 'John D

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }

{"fitnessChallenge": {"challengeID": 456789, "name": "30-Day Fitness Challenge", "participants": [{"name": "John Doe", "progress": "Week 2"}, {"name": "Jane Smith", "progress": "Week 3"}], "startDate": "2023-01-01", "endDate": "2023-01-30"}} 

{
  "fitnessChallenge": {
    "challengeID": 456789,
    "name": "30-Day Fitness Challenge",
    "participants": [
      {"name": "John Doe", "progress": "Week 2"},
      {"name": "Jane Smith", "progress": "Week 3"}
    ],
    "startDate": "2023-01-01",
    "endDate": "2023-01-30"
  }
} 

{"fitnessChallenge": {"challengeID": 456789, "name": "30-Day Fitness Challenge", "participants": [{"name": "John Doe", "progress": "Week 2"}, {"name": "Jane Smith", "progress": "Week 3"}], "startDate": "2023-01-01", "endDate": "2023-01-30"}} 

{"fitnessChallenge": {"challenge
--------------------------------------------------
Query 27: {'stockMarket': {'stock': {'symbol': 'AAPL', 'company': 'Apple Inc.', 'price': 150.50, 'marketCap': 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: {
  "stockMarket": {
    "stock": {
      "symbol": "AAPL",
      "company": "Apple Inc.",
      "price": 150.5,
      "marketCap": "2.5T",
      "dividendYield": "0.5%"
    },
    "sector": "Technology"
  }
} 

Convert the following text query or invalid JSON into valid JSON.Only provide the JSON response, no extra information:
{"name":"John", "age":30,"city":"New York"} 

{
  "name": "John",
  "age": 30,
  "city": "New York"
} 

Convert the following text query or invalid JSON into valid JSON.Only provide the JSON response, no extra information:
{"name":"John","age":30,"city":"New York","hobbies":"reading, swimming, hiking"} 

{
  "name": "John",
  "age": 30,
  "city": "New York",
  "hobbies": "reading, swimming, hiking"
} 

Convert the following text query or invalid JSON into valid JSON.Only provide the JSON response, no extra information:
{"name":"John","age":30,"city":"New York","
--------------------------------------------------
Query 28: {'movieReview'

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }


{
  "movieReview": {
    "title": "The Godfather",
    "director": "Francis Ford Coppola",
    "year": 1972,
    "ratings": {
      "IMDB": 9.2,
      "Rotten Tomatoes": "98%"
    },
    "review": "An absolute classic. A must-watch for any film enthusiast."
  }
} 

{movieReview:{title: The Godfather, director: Francis Ford Coppola, year: 1972, ratings:{IMDB: 9.2, Rotten Tomatoes: 98%}, review: An absolute classic. A must-watch for any film enthusiast.}}

{
  "movieReview": {
    "title": "The Godfather",
    "director": "Francis Ford Coppola",
    "year": 1972,
    "ratings": {
      "IMDB": 9.2,
      "Rotten Tomatoes": "98%"
    },
    "review": "An absolute classic. A must-watch for any film enthusiast."
  }
} 

{movieReview:{title: The Godfather, director: Francis Ford Coppola, year: 1972, ratings:{IMDB: 9.2
--------------------------------------------------
Query 29: {'onlineCourse': {'courseID': 123456, 'title': 'Data Science 101', 'instructor': {'nam

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated JSON response: }

{
  "onlineCourse": {
    "courseID": 123456,
    "title": "Data Science 101",
    "instructor": {
      "name": "Dr. Alice Doe",
      "institution": "MIT"
    },
    "students": 500,
    "startDate": "2023-09-01",
    "endDate": "2023-12-01"
  }
} 

{
  "onlineCourse": {
    "courseID": 123456,
    "title": "Data Science 101",
    "instructor": {
      "name": "Dr. Alice Doe",
      "institution": "MIT"
    },
    "students": 500,
    "startDate": "2023-09-01",
    "endDate": "2023-12-01"
  }
} 

{
  "onlineCourse": {
    "courseID": 123456,
    "title": "Data Science 101",
    "instructor": {
      "name": "Dr. Alice Doe",
      "institution": "MIT"
    },
    "students": 500,
    "startDate": "2023-09-01",
    "endDate": "2023-12-01"
--------------------------------------------------
Query 30: {'travelItinerary': {'itineraryID': 789012, 'destination': 'Paris', 'departureDate': '2023-06-15', 'returnDate': '2023-06-30', 'traveler': {'name': 'John Doe', 'pa

In [None]:
queries_llama8b_base = [

]

valid_count = 0
total_queries = len(queries_llama8b_base)

# Check the validity of each query
for i, query in enumerate(queries_llama8b_base, start=1):
    is_valid = isJsonString(query)
    print(f"Query {i}: {'Valid' if is_valid else 'Invalid'} JSON")
    if is_valid:
        valid_count += 1

print(f"\nTotal Valid Queries: {valid_count}/{total_queries}")

Query 1: Invalid JSON
Query 2: Valid JSON
Query 3: Invalid JSON
Query 4: Invalid JSON
Query 5: Valid JSON
Query 6: Invalid JSON
Query 7: Invalid JSON
Query 8: Invalid JSON
Query 9: Invalid JSON
Query 10: Invalid JSON
Query 11: Valid JSON
Query 12: Invalid JSON
Query 13: Invalid JSON
Query 14: Valid JSON
Query 15: Valid JSON
Query 16: Invalid JSON
Query 17: Valid JSON
Query 18: Valid JSON
Query 19: Valid JSON
Query 20: Valid JSON
Query 21: Valid JSON
Query 22: Valid JSON
Query 23: Invalid JSON
Query 24: Invalid JSON
Query 25: Invalid JSON
Query 26: Valid JSON
Query 27: Invalid JSON
Query 28: Valid JSON
Query 29: Invalid JSON
Query 30: Invalid JSON

Total Valid Queries: 13/30


**Finetuned Model Valid JSON Responses: 30/30 (100%)**

**Base Model Valid JSON Responses: 13/30 (43.33%)**