In [50]:
import os

from groq import Groq

client = Groq(
    api_key="gsk_...",
)

labels = {
    "anger": "This is true if the user displays a clear anger emotion",
    "data": "This indicates if the abstract introduces a new dataset",
    "synthetic": "This indicates if the paper is working with synthetic data"
}


label_descr = "\n".join([f"- {name}: {desc}" for name, desc in labels.items()])

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": f"Here are my label descriptions {label_descr}. Respond in json. Generate the boolean tags for this text: 'Our paper introduces a new dataset'",
        }
    ],
    model="mixtral-8x7b-32768",
)

print(chat_completion.choices[0].message.content)

{
"anger": false,
"data": true,
"synthetic": false
}


In [85]:
from hashlib import md5

In [103]:
import json

class GroqTagger:
    def __init__(self, client, model="mixtral-8x7b-32768", **tags):
        """TODO:
        - add some sort of sqlite cache
        - name of each table/file should be {model}-{prompt_hash}
        - each key should be the text that goes in
        """
        self.client = client
        self.model = model
        self.tags = tags

    def _hash(self, thing):
        if isinstance(thing, str):
            string = thing
        if isinstance(thing, dict):
            string = "".join([f"{k}:{v}" for k, v in dict.items()])
        return md5(string.encode("utf-8")).hexdigets()
        
    @property
    def cache_name(self):
        return f"{self.model}-{self._hash(self.tags)}"

    def __call__(self, text: str):
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": self.prompt(text),
                }
            ],
            model=self.model,
        )
        return json.loads(chat_completion.choices[0].message.content.replace("\n", ""))
    
    def prompt(self, text:str):
        label_descr = "\n".join([f"- {name}: {desc}" for name, desc in self.tags.items()])
        foreword = f"You are an expert tagging system tasked with assinging tags to text. Here are my label descriptions:\n{label_descr}\nRespond in json. Generate the boolean tags for this text:\n"
        return f"{foreword}'{text}'"

tagger = GroqTagger(client, **labels)

In [104]:
print(tagger.prompt(""))

You are an expert tagging system tasked with assinging tags to text. Here are my label descriptions:
- anger: This is true if the user displays a clear anger emotion
- data: This indicates if the abstract introduces a new dataset
- synthetic: This indicates if the paper is working with synthetic data
Respond in json. Generate the boolean tags for this text:
''


In [105]:
tagger("this paper will talk about a new model trained a new synthetic dataset")

{'anger': False, 'data': True, 'synthetic': True}