# API endpoint prompt format testing
Playing with different formats for mostly mistral models. Using Together via the `/inference` endpoint we can bypass their prompt formatting for instruct/chat models and have more control.

This means there's still the control and instructing of these models, but we can also part-fill their responses to guide.

Alternatively, just use the base models and ignore the instruction tuning completely. There's still a lot of non-instruct style things that can be done with the instruction tuned models, but their logit distributions are altered from the base and it's not yet clear in what ways this is better or worse for extracting the best reasoning and knoweledge from them if you don't need to have structured conversations...this is still a research question.

In [12]:
import json
import os
import sys
from typing import List, Dict, Any

In [13]:
# add the parent directory to the path so we can import the module
# sys.path.append(os.path.abspath('.'))
sys.path.append(os.getcwd())

In [14]:
from dotenv import load_dotenv
load_dotenv()

True

In [15]:
from llm_utils.endpoint_utils import rest_api_request

In [42]:
def create_json_data_together_base(model: str, max_tokens: int = 1024, temperature: float = 0.7, repetition_penalty: float = 1.0, top_p: float = 0.7, **kwargs) -> Dict[str, Any]:
    """Create json dict for requests call"""
    json_data: Dict[str, Any] = {
        "model": model,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "repetition_penalty": repetition_penalty,
        "top_p": top_p,
        **kwargs,
    }
    return json_data
def create_json_data_together_chat(model: str, messages: List[str], max_tokens: int = 1024, temperature: float = 0.7, repetition_penalty: float = 1.0, top_p: float = 0.7, **kwargs) -> Dict[str, Any]:
    """Create json dict for requests call"""
    return create_json_data_together_base(model, max_tokens, temperature, repetition_penalty, top_p, messages=messages, **kwargs)

def create_json_data_together_inference(model: str, prompt: str, max_tokens: int = 1024, temperature: float = 0.7, repetition_penalty: float = 1.0, top_p: float = 0.7, **kwargs) -> Dict[str, Any]:
    """Create json dict for requests call"""
    return create_json_data_together_base(model, max_tokens, temperature, repetition_penalty, top_p, prompt=prompt, **kwargs)


In [43]:
fmt_prompt_mistral_instruct = """\
[INST] {prompt} [/INST] \
"""

In [46]:
prompt = "What's the Roland 808?"
prompt_str = fmt_prompt_mistral_instruct.format(prompt=prompt)

In [47]:
prompt_str

"[INST] What's the Roland 808? [/INST] "

In [64]:
BASE_URL = "https://api.together.xyz/inference"
API_KEY = os.getenv("TOGETHER_API_KEY")
MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# MODEL = "mistralai/Mixtral-8x7B-v0.1"

In [65]:
create_json_data_together_inference(MODEL, prompt_str)

{'model': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
 'max_tokens': 1024,
 'temperature': 0.7,
 'repetition_penalty': 1.0,
 'top_p': 0.7,
 'prompt': "[INST] What's the Roland 808? [/INST] "}

In [66]:
response = rest_api_request(BASE_URL, create_json_data_together_inference(MODEL, prompt_str), API_KEY,)

In [67]:
# response.content.decode("utf-8")
json_data = response.json()
print(json.dumps(json_data, indent=4, sort_keys=True))
print('\n\n' + json_data['output']['choices'][0]['text'])

{
    "args": {
        "max_tokens": 1024,
        "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "prompt": "[INST] What's the Roland 808? [/INST] ",
        "repetition_penalty": 1,
        "temperature": 0.7,
        "top_p": 0.7
    },
    "id": "8403c28c4f847756-LHR",
    "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "model_owner": "",
    "num_returns": 1,
    "output": {
        "choices": [
            {
                "text": "\tThe Roland TR-808 Rhythm Composer, commonly known as the Roland 808, is a programmable drum machine manufactured by the Roland Corporation between 1980 and 1984. It was one of the first drum machines to allow users to program their own rhythms, and it quickly became a popular tool for musicians and producers in a variety of genres, including hip hop, electronic music, and pop.\n\nThe 808 features a simple step-sequencer interface and a range of built-in percussion sounds, including kick drum, snare drum, hi-hat, and various other elec