# Models to generate responses:

* GPT 3.5 Turbo
* GPT 4 Turbo
* Starling 7B
* Qwen 72B
* Qwen 32B
* Command R 34B
* Command R + 105B

In [None]:
! pip install -q openai datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.3/268.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from datasets import load_dataset

prompts_dataset = load_dataset("lightblue/multilingual_prompts_100_sample", split="train")

Downloading readme:   0%|          | 0.00/3.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/19.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3055 [00:00<?, ? examples/s]

In [None]:
from openai import AzureOpenAI
from google.colab import userdata

client = AzureOpenAI(
    api_key=userdata.get("AZURE_OPENAI_API_KEY"),
    api_version="2024-02-01",
    azure_endpoint = userdata.get("AZURE_OPENAI_ENDPOINT")
    )

def get_openai_response(input_text, model_name):
    try:
        response = client.chat.completions.create(
          model=model_name,
          messages=[
            {
              "role": "user",
              "content": input_text
            }
          ],
          temperature=0,
          max_tokens=2048,
        )

        output_text = response.choices[0].message.content
        finish_reason = response.choices[0].finish_reason

        return output_text, finish_reason
    except:
        return None, None

In [None]:
from openai import OpenAI

vllm_client = OpenAI(
    api_key="EMPTY",
    base_url="http://localhost:8000/v1",
)

def get_vllm_response(prompt, model_name):
    try:
        response = vllm_client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model=model_name,
            temperature=0,
            max_tokens=2048
        )
        output_text = response.choices[0].message.content
        finish_reason = response.choices[0].finish_reason

        return output_text, finish_reason
    except:
        return None, None

In [None]:
import time
import random

def prompt_llm(row, model_name):
    input_prompt = row["conversation"][0]["content"]

    if "llm_responses" not in row.keys():
        row["llm_responses"] = {}

    if model_name in row["llm_responses"].keys() and row["llm_responses"][model_name]["content"] is not None:
        return {"llm_responses": row["llm_responses"]}

    get_response_fn = get_openai_response if "gpt" in model_name else get_vllm_response
    output_text, finish_reason = get_response_fn(input_prompt, model_name)

    row["llm_responses"].update({
        model_name: {
            "content": output_text,
            "finish_reason": finish_reason,
        }
    })
    return {"llm_responses": row["llm_responses"]}

In [None]:
prompts_dataset = prompts_dataset.map(
    lambda x: prompt_llm(x, "peter-gpt-35-turbo"),
    num_proc=8)



Map (num_proc=8):   0%|          | 0/3055 [00:00<?, ? examples/s]

In [None]:
prompts_dataset = prompts_dataset.map(
    lambda x: prompt_llm(x, "gpt-4-0125-preview"),
    num_proc=8)

Map (num_proc=8):   0%|          | 0/3055 [00:00<?, ? examples/s]

In [None]:
prompts_dataset[0]

{'conversation_id': 'e5bde988646d470282f07a22242609be',
 'model': 'vicuna-13b',
 'conversation': [{'content': "Die volgende is veelvuldige keuse vrae oor Natuurwetenskap. Gee asseblief net die korrekte opsie, sonder enige ander besonderhede of verduidelikings.\n\n\n'n Veldkrag...\n(A) is altyd 'n aantrekkingskrag.\n(B) kom voor alleenlik tussen 'n magnetise en elektriese lading.\n(C) Iei tot 'n aksie op 'n afstand tussen twee liggame.\n(D) is die enigste tipe krag wat die spoed van 'n voorwerp kan verander.\nAntwoord:",
   'role': 'user'},
  {'content': "(D) is die enigste soort kracht wat die snelheid van 'n voorwerp kan verander.",
   'role': 'assistant'}],
 'turn': 1,
 'language': 'Afrikaans',
 'openai_moderation': [{'categories': {'harassment': False,
    'harassment/threatening': False,
    'hate': False,
    'hate/threatening': False,
    'self-harm': False,
    'self-harm/instructions': False,
    'self-harm/intent': False,
    'sexual': False,
    'sexual/minors': False,
    'v

In [None]:
prompts_dataset.push_to_hub("lightblue/multilingual_prompts_100_sample")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.04k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/lightblue/multilingual_prompts_100_sample/commit/47b1e2d88531137579213e626b6abd39422fd93b', commit_message='Upload dataset', commit_description='', oid='47b1e2d88531137579213e626b6abd39422fd93b', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# Uncomment run commands as appropriate
# I manually killed the vllm process once the responses had been generated
# There is almost definitely a better way to do this

! pip install -q vllm
! nohup python -m vllm.entrypoints.openai.api_server --model Nexusflow/Starling-LM-7B-beta  > Starling-LM-7B-beta.out 2>&1 &
# ! nohup python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen1.5-32B-Chat  > Starling-LM-7B-beta.out 2>&1 &
# ! nohup python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen1.5-72B-Chat  > Starling-LM-7B-beta.out 2>&1 &
# ! nohup python -m vllm.entrypoints.openai.api_server --model CohereForAI/c4ai-command-r-plus  > Starling-LM-7B-beta.out 2>&1 &
# ! nohup python -m vllm.entrypoints.openai.api_server --model CohereForAI/c4ai-command-r-v01  > Starling-LM-7B-beta.out 2>&1 &

In [None]:
# Again, uncomment as appropriate for the corresponding vllm command

prompts_dataset = prompts_dataset.map(
    lambda x: prompt_llm(x, "Nexusflow/Starling-LM-7B-beta"),
    num_proc=256)

# prompts_dataset = prompts_dataset.map(
#     lambda x: prompt_llm(x, "Qwen/Qwen1.5-32B-Chat"),
#     num_proc=256)

# prompts_dataset = prompts_dataset.map(
#     lambda x: prompt_llm(x, "Qwen/Qwen1.5-72B-Chat"),
#     num_proc=256)

# prompts_dataset = prompts_dataset.map(
#     lambda x: prompt_llm(x, "CohereForAI/c4ai-command-r-plus"),
#     num_proc=256)

# prompts_dataset = prompts_dataset.map(
#     lambda x: prompt_llm(x, "CohereForAI/c4ai-command-r-v01"),
#     num_proc=256)

Map (num_proc=256):   0%|          | 0/3055 [00:00<?, ? examples/s]

In [None]:
prompts_dataset.push_to_hub("lightblue/multilingual_prompts_100_sample")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/4 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/lightblue/multilingual_prompts_100_sample/commit/edeb415180944275b490a4fe9ff800ed1548494c', commit_message='Upload dataset', commit_description='', oid='edeb415180944275b490a4fe9ff800ed1548494c', pr_url=None, pr_revision=None, pr_num=None)