# Logging, Tracking, and Debugging Prompts using Comet




In [1]:
! pip install comet-llm --quiet

In [2]:

import os
import IPython
import json
import pandas as pd
import numpy as np
import comet_llm
import urllib


The function below helps to generate the final results from the model after calling the OpenAI API:

In [3]:
# def get_completion(messages, model="gpt-3.5-turbo", temperature=0, max_tokens=300):
#     response = openai.ChatCompletion.create(
#         model=model,
#         messages=messages,
#         temperature=temperature,
#         max_tokens=max_tokens,
#     )
#     return response.choices[0].message["content"]

### Load the Data

The code below loads both the few-shot demonstrations and the validation dataset used for testing the model.

In [3]:
# API configuration


# print markdown
def print_markdown(text):
    """Prints text as markdown"""
    IPython.display.display(IPython.display.Markdown(text))

# load validation data from GitHub
f = urllib.request.urlopen("https://raw.githubusercontent.com/comet-ml/comet-llmops/main/data/article-tags.json")
val_data = json.load(f)

# load few shot data from GitHub
f = urllib.request.urlopen("https://raw.githubusercontent.com/comet-ml/comet-llmops/main/data/few_shot.json")
few_shot_data = json.load(f)

The following is a helper function to obtain the final predictions from the model given a prompt template (e.g., zero-shot or few-shot) and the provided input data.

In [13]:

# Load model directly
# Load model directly
# Load model directly
# Load model directly
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [5]:
def get_predictions(prompt_template, inputs):

    responses = []

    # for i in range(len(inputs)):
    #     messages = messages = [
    #         {
    #             "role": "system",
    #             "content": prompt_template.format(input=inputs[i])
    #         }
    #     ]
    #     response = get_completion(messages)
    #     responses.append(response)
    for i in range(len(inputs)):
        # Replace a placeholder in prompt_template with the actual input
        formatted_prompt = prompt_template.format(input=inputs[i])

        input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids
        outputs = model.generate(input_ids,max_new_tokens=1200)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        responses.append(response)

    return responses


### Few-Shot

First, we define a few-shot template which will leverage the few-shot demonstration data loaded previously.

In [6]:
# function to define the few-shot template
def get_few_shot_template(few_shot_prefix, few_shot_suffix, few_shot_examples):
    return few_shot_prefix + "\n\n" + "\n".join([ "Abstract: "+ ex["abstract"] + "\n" + "Tags: " + str(ex["tags"]) + "\n" for ex in few_shot_examples]) + "\n\n" + few_shot_suffix

# function to sample few shot data
def random_sample_data (data, n):
    return np.random.choice(few_shot_data, n, replace=False)


# the few-shot prefix and suffix
few_shot_prefix = """Your task is to extract model names from machine learning paper abstracts. Your response is an an array of the model names in the format [\"model_name\"]. If you don't find model names in the abstract or you are not sure, return [\"NA\"]"""
few_shot_suffix = """Abstract: {input}\nTags:"""

# load 3 samples from few shot data
few_shot_template = get_few_shot_template(few_shot_prefix, few_shot_suffix, random_sample_data(few_shot_data, 3))

In [7]:
few_shot_template

'Your task is to extract model names from machine learning paper abstracts. Your response is an an array of the model names in the format ["model_name"]. If you don\'t find model names in the abstract or you are not sure, return ["NA"]\n\nAbstract: Generative Pre-trained Transformer models, known as GPT or OPT, set themselves apart through breakthrough performance across complex language modelling tasks, but also by their extremely high computational and storage costs. Specifically, due to their massive size, even inference for large, highly-accurate GPT models may require multiple performant GPUs, which limits the usability of such models. While there is emerging work on relieving this pressure via model compression, the applicability and performance of existing compression techniques is limited by the scale and complexity of GPT models. In this paper, we address this challenge, and propose GPTQ, a new one-shot weight quantization method based on approximate second-order information, 

### Zero-Shot Template

The code below defines the zero-shot template. Note that we use the same instruction from the few-shot prompt template. But in this case, we don't use the demonstrations.

In [8]:
zero_shot_template = """
Your task is extract model names from machine learning paper abstracts. Your response is an an array of the model names in the format [\"model_name\"]. If you don't find model names in the abstract or you are not sure, return [\"NA\"]

Abstract: {input}
Tags:
"""

### Get Predictions

We then generated all the predictions using the validation data as inputs:

In [14]:
# get the predictions

abstracts = [val_data[i]["abstract"] for i in range(len(val_data))]
few_shot_predictions = get_predictions(few_shot_template, abstracts)
zero_shot_predictions = get_predictions(zero_shot_template, abstracts)
expected_tags = [str(val_data[i]["tags"]) for i in range(len(val_data))]

Token indices sequence length is longer than the specified maximum sequence length for this model (1179 > 512). Running this sequence through the model will result in indexing errors


In [15]:
print("Few shot predictions")
print(few_shot_predictions)
print("\n\nZero shot predictions")
print(zero_shot_predictions)
print("\n\nExpected tags")
print(expected_tags)

Few shot predictions
["['WizardLM']", "['FLAN-T5']", "['Language Models']", "['PAXQA']", "['ChatGPT']", "['ViT']", "['SAM', 'Inpaint Anything']", "['Anything-3D']", "['LLM', 'GPT-4']", "['tool_learning', 'tool_oriented']"]


Zero shot predictions
['["WizardLM", "Evol-Instruct", "LLaMA", "OpenAI ChatGPT"]', '["FLAN-T5", "AMR2.0", "AMR3.0", "BioAMR82.3", "AMR2.0", "AMR3.0", "AMR82.3", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR8.0", "AMR

### Log Prompt Results

Finally, we log the prompt + results to Comet. Note that we are logging both the few-shot and zero-shot results, together with all the metadata and tags.

In [17]:
# log the predictions in Comet along with the ground truth for comparison

# set up comet
COMET_API_KEY = ""
COMET_WORKSPACE = ""

# initialize comet
comet_llm.init(COMET_API_KEY, COMET_WORKSPACE, project="ml-paper-tagger-prompts")

# log the predictions
for i in range(len(expected_tags)):
    # log the few-shot predictions
    comet_llm.log_prompt(
        prompt=few_shot_template.format(input=abstracts[i]),
        prompt_template=few_shot_template,
        output=few_shot_predictions[i],
        tags = ["flan-t5-large", "few-shot"],
        metadata = {
            "expected_tags": expected_tags[i],
            "abstract": abstracts[i],
        }
    )

    # log the zero-shot predictions
    comet_llm.log_prompt(
        prompt=zero_shot_template.format(input=abstracts[i]),
        prompt_template=zero_shot_template,
        output=zero_shot_predictions[i],
        tags = ["flan-t5-large", "zero-shot"],
        metadata = {
            "expected_tags": expected_tags[i],
            "abstract": abstracts[i],
        }
    )

[1;38;5;39mCOMET INFO:[0m Valid Comet API Key saved in /root/.comet.config (set COMET_CONFIG to change where it is saved).
