Define a dictionary containing prompts for different types of queries

In [None]:
PROMPTS = {
    "math": """Please answer the following mathematics question. If you don't know the answer, respond "I don't know." \n Question: {question}"""
}

Define system prompts associated with different prompt types

In [None]:
SYSTEM_PROMPTS = {
    "math": "You are a helpful assistant who solves math problems for users."
}

Function to generate message for the AI chat system

In [None]:
def generate_messages(prompt_id, system_prompt_id =None, prompt_variables = {}):
    user_prompt = PROMPTS[prompt_id].format(**prompt_variables)
    system_prompt = SYSTEM_PROMPTS[prompt_id] if system_prompt_id is None else SYSTEM_PROMPTS[system_prompt_id]
    #Return system and user messages in a list format
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

Next, write a function to interact with the GPT-4 API.

In [None]:
from openai import OpenAI
import comet_llm
openai_client = OpenAI(api_key="YOUR-API-KEY")
comet_llm.init(api_key="YOUR-COMET-API-KEY")
def get_completion(
    prompt_id,
    system_prompt_id = None,
    prompt_variables = None,
    model="gpt-4-1106-preview",
    temperature=0,
    max_tokens=2000,
):
    #Generate messages using the provided inputs
    messages = generate_messages(prompt_id, system_prompt_id, prompt_variables)

    #Call an OpenAI function to get completions based on the generated messages.
    response = openai_client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    #Log the prompt, completion, and related metadata
    comet_llm.log_prompt(
        prompt=messages[1]['content'],
        prompt_template=PROMPTS[prompt_id],
        prompt_template_variables=prompt_variables,
        metadata={
            "usage.prompt_tokens": response.usage.prompt_tokens,
            "usage.completion_tokens": response.usage.completion_tokens,
            "usage.total_tokens": response.usage.total_tokens,
            "system_fingerprint" response.system_fingerprint
        },
        output=response.choices[0].message.content,
    )
    #return the response
    return response.choices[0].message.conten

In [None]:
question = {
 "question": "What three-digit palindromes are also perfect squares?"
}
get_completion(prompt_id="math", prompt_variables=question)

# In-Context Learning

In [None]:
import os
from openai import OpenAI
from abc import ABC, abstractmethod

client = OpenAI(api_key="YOUR-API-KEY")

def get_completion(
    prompt,
    model="gpt-3.5-turbo-instruct",
    temperature=0,
    max_tokens=2000,
    return_full=False,
    **kwargs
):
    response = client.completions.create(
        model=model,
        prompt=prompt,
        temperature=temperature,
        max_tokens=max_tokens,
        **kwargs
    )

    if return_full:
        return response

    return response.choices[0].text

In [None]:
class Node(ABC):
    @abstractmethod
    def forward(self):
        pass

class Pipeline(ABC):
    @abstractmethod
    def run(self):
        pass

In [None]:
class PromptNode(Node):
    def __init__(self, prompt_template, generate=get_completion):
        self.prompt_template = prompt_template
        self.generate = generate
        self.prompt = None

    def forward(self, **kwargs):
        self.prompt = self.prompt_template.format(**kwargs)
        return self.generate(self.prompt)

In [None]:
translate_tigrinya = """Translate the following into Tigrinya:
{prompt} => """

In [None]:
class TigrinyaTranslatePipeline(Pipeline):
    def __init__(self):
        self.p1 = PromptNode(prompt_template=translate_tigrinya)
    def run(self, **kwargs):
        return self.p1.forward(**kwargs)

In [None]:
no_icl = TigrinyaTranslatePipeline()
no_icl.run(prompt="It was the best of times, it was the worst of times.")

In [None]:
translate_tigrinya_icl = """Translate the following into Tigrinya:
It was the age of wisdom, it was the age of foolishness. => ዘመነ ጥበብ እዩ ነይሩ፣ ዘመን ዕሽነት እዩ ነይሩ።

Translate the following into Tigrinya:
To be, or not to be, that is the question. => ምዃንን ዘይምህላውን ንሱ እዩ እቲ ሕቶ።

Translate the following into Tigrinya:
What happiness was ours that day, what joy, what rest, what hope, what gratitude, what bliss! => ኣብታ መዓልቲ እቲኣ ከመይ ዝበለ ሓጐስ እዩ ነይሩ፣ ከመይ ዝበለ ሓጐስ፣ ከመይ ዝበለ ዕረፍቲ፣ ከመይ ዝበለ ተስፋ፣ ከመይ ዝበለ ምስጋና፣ ከመይ ዝበለ ዕግበት!

Translate the following into Tigrinya:
{prompt} => """

class TigrinyaTranslatePipeline(Pipeline):
    def __init__(self, icl=None):
        if icl == 'icl' :
            self.p1 = PromptNode(prompt_template=translate_tigrinya_icl)
        else:
            self.p1 = PromptNode(prompt_template=translate_tigrinya)
    
    def run(self, **kwargs):
        return self.p1.forward(**kwargs)

icl = TigrinyaTranslatePipeline(icl="icl")
icl.run(prompt="It was the best of times, it was the worst of times.")

## Intermediary Computation

In [None]:
math_template = """INSTRUCTION:
Solve the following equation: {prompt}

RESPONSE:
"""

class EquationPipeline(Pipeline):
    def __init__(self, with_zero_shot_cot=False):
    if with_zero_shot_cot is True:
        self.p1 = PromptNode(prompt_template=math_template + "Let's think step by step. ")
    else:
        self.p1 = PromptNode(prompt_template=math_template)
 
    def run(self, **kwargs):
        return self.p1.forward(**kwargs)
        
equation = "6^8 * 2 / 3 + 7 - 1 ="
raw_pipeline = EquationPipeline()
cot_pipeline =
EquationPipeline(with_zero_shot_cot=True)
print("Without Chain of Thought Prompting")
print(raw_pipeline.run(prompt=equation))
print("With Chain of Thought Prompting")
print(cot_pipeline.run(prompt=equation))

### 4.1 YouTube Retriever

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_search import YoutubeSearch

class YouTubeRetriever(Node):
    def __init__(self, generate=get_completion):
        self.generate = generate

    def _fetch_transcripts(self, query):
        results = YoutubeSearch(query, max_results=10).to_dict()
        return [ f"{['url_suffix'].split('&')[0]}" for x in results]

    def _parse_transcript(self, transcript, video_id):
        full_text = ""
        arr = transcript[0][video_id]
        for obj in arr:
            full_text += f"{obj['text']} "
        
        return full_text

    def _summarize_transcript(self, transcript):
        summary = self.generate(prompt=f"""INSTRUCTION: \nBelow is a transcript generated from a YouTube video. Condense and summarize it.\n\n"{transcript}"\nRESPONSE:\n""").strip()
        return summary

    def forward(self, question):
        context = ""
        # Generate search term + strip leading/trailing newlines and quotation marks
        youtube_query = self.generate(prompt=youtube_query_template.format(prompt=question)).strip().strip("\"")
        results = YoutubeSearch(youtube_query, max_results=10).to_dict()
        for x in results:
            video_id = x['id']
            transcript = ""
            try:
                transcript = YouTubeTranscriptApi.get_transcripts(video_ids=[video_id])
                transcript = self._parse_transcript(transcript, video_id)
                if len(transcript) > 2000:
                    transcript = transcript[0:2000]
                transcript = self._summarize_transcript(transcript)
            except TranscriptsDisabled:
                print(f"Transcripts disabled for{x['title']}")
                pass

            snippet = f"{x['title']} by {x['channel']}\n\n{transcript}\n\n"
            context += snippet
        return context

In [None]:
class QAWithYoutubePipeline(Pipeline):
    def __init__(self):
        self.context = ""
        self.retriever = YouTubeRetriever()
        self.qa = PromptNode(prompt_template="""#INSTRUCTION:
Below, you have summaries from several YouTube videos:

{context}Use the above summaries to answer this question: {question}
#RESPONSE:
""")

    def run(self, question):
        self.context = self.retriever.forward(question=question)
        return self.qa.forward(context=self.context, question=question)

In [None]:
# Run without pipeline (i.e. no additional context given)
get_completion(prompt="What is Mixtral 8x-7b?")

In [None]:
# Run with pipeline
pipe = QAWithYoutubePipeline()
pipe.run("What is Mixtral 8x-7b?")

# Agentic Techniques

In [None]:
from deep_translator import GoogleTranslator

def translate(text, lang):
    translation = GoogleTranslator(source='auto', target=lang).translate(text, dest=lang)
    return translation

In [None]:
class TranslateNode(Node):
    def __init__(self, generate_fn=get_completion):
        self.preprocessing = """#INSTRUCTION:
        
From the following text, extract the sequences that are written in {lang}:

"{text}"

#RESPONSE:
"""
        self.generate = generate_fn
    def forward(self, text, generate=get_completion):
        extracted_text = self.generate(self.preprocessing.format(text=text, lang="en"))[1:-1]
        # Remove wrapping quotation marks
        translated_text = translate(extracted_text, "en")
        translated = text.replace(extracted_text, translated_text)
        return translated

In [None]:
tools = {
    "translate": {
        "description": """translate(text, lang) -> This function takes input text and translates it to the "lang" language.""",
        "node": TranslateNode,
        "transform_q": True
    },
    "YouTubeResearch": {
        "description": """YouTubeResearch(question) -> This function takes a question and uses YouTube to generate research around the question topic. Before using, you should translate any non-English questions into English.""",
        "node": QAWithYoutubePipeline,
        "transform_q": False
    }
}

tools_context = """#INSTRUCTION: You are a helpful assistant who is capable of running Python functions. You answer questions, but you only respond in English. You have the following functions available to you as tools:
{tools}
Do you need a tool to answer the following question in English?
"{question}"
Respond "yes" or "no"
#RESPONSE: """

which_tool = """#INSTRUCTION: Which tool do you need? You can respond with {tool_names}
#RESPONSE: """

final_q = """#INSTRUCTION: Write a response that accurately answers the following question in English:
"{question}"
#RESPONSE: """

# Experiment Management for LLM Inference

In [None]:
import copy
class QAAgent(Pipeline):
    def __init__(self, tools=tools, generate_fn=get_completion):
        self.generate = generate_fn
        self.translate = TranslateNode()
        self.youtube = QAWithYoutubePipeline()
        self.context = ""
        self.tools = tools
        self.available_tools = copy.deepcopy(tools)
    
    def _intermediary_step(self):
        formatted_tools = ""
        need_tool = ""
        next_tool = ""
        tool_context = ""
        selected_tool = None
        self.context = "" # Clear context
        for tool in self.available_tools.keys():
            formatted_tools += self.available_tools[tool]['description']
            formatted_tools += "\n"
            need_tool_input = tools_context.format(tools=formatted_tools, question=self.question)
            need_tool = self.generate(need_tool_input)
            self.context += need_tool_input + "\n\n" + need_tool + "\n\n"
            if "yes" in need_tool.lower():
                tool_names = " or ".join(self.available_tools.keys())
                next_tool = self.generate(self.context + which_tool.format(tool_names=tool_names))
                self.context += which_tool.format(tool_names = tool_names) + "\n\n" + next_tool + "\n\n"
                for name in self.available_tools.keys():
                    if name in next_tool:
                        selected_tool = name
                        break
            return selected_tool


        def run(self, question):
            self.question = question
            selected_tool = self._intermediary_step()

            while len(self.available_tools.keys()) > 0:
                selected_tool = self._intermediary_step()
                
                if selected_tool == None:
                    break

                nxt = self.tools[selected_tool]['node']()

                if hasattr(nxt, 'forward'):
                    output = nxt.forward(self.question)
                else:
                    output = next.run(self.question)

                if self.tools[selected_tool]['transform_q'] == True:
                    self.question = output
                
                self.context += output
                del self.available_tools[selected_tool]

            self.context += final_q.format(question=self.question)
            answer = self.generate(self.context)
            self.available_tools = copy.deepcopy(self.tools)

            return answer

In [None]:
agent = QAAgent()
agent.run("¿Qué; es este proyecto LLMLingua del que todo el mundo habla?")

In [None]:
code_gen_template = """#INSTRUCTION:
Write a Python function named {name} that {description}. Make sure to include all necessary imports.

#RESPONSE
"""

code_gen_template_w_tests = """#INSTRUCTION:
Write a Python function named {name} that {description}. Make sure to include all necessary imports.

The function {name} will be evaluated with the following unit tests:
{tests}

#RESPONSE
"""

In [None]:
class TestGenerateImage(unittest.TestCase):
    def test_valid_input(self):
        width, height = 200, 300
        image = generate_image(f'{width}x{height}')
        self.assertEqual(image.size, (width, height))

In [None]:
TESTS = [
    {
        "name": "generate_image(dimensions)",
        "description": "takes a string containing the dimensions of an image, like '200x300', and generates an image of those dimensions using 3 random colors, before finally returning the image object.",
        "tests": image_tests,
        "tests_class": TestGenerateImage
    },
    {
        "name": "evaluate_expression(expression)",
        "description": "takes a string containing a mathematical equation, parses the equation, and returns its evaluated result.",
        "tests": math_tests,
        "tests_class": TestEvaluateExpression
    },
    {
        "name": "merge_k_lists(lists)",
        "description": "takes an array of k linkedlists lists, each sorted in ascending order, and merges all the linked-lists into one sorted linkedlist, returning the final sorted linked-list.",
        "test": merge_k_tests,
        "tests_class": TestMergeKLists
    }
]

In [None]:
class PromptWithMKwargsNode(Node):
    def __init__(self, prompt_template, generate=get_completion):
        self.prompt_template = prompt_template
        self.generate = generate
        self.prompt = None
        self.prompt_kwargs = None
    def forward(self, model_kwargs=None, prompt_kwargs=None):
        self.prompt_kwargs = prompt_kwargs
        if self.prompt_kwargs != None:
            self.prompt = self.prompt_template.format(**self.prompt_kwargs)
        else:
            self.prompt = self.prompt_template
            if model_kwargs != None:
                return self.generate(self.prompt, return_full=True, **model_kwargs)
            else:
                return self.generate(self.prompt,return_full=True)


class ExecNode(Node):
    def __init__(self):
        self.success = True
        self.message = None
        
    def forward(self, code):
        print(code)
        compiled = compile(code, 'test', 'exec')
        try:
            exec(compiled)
        except Exception as e:
            self.success = False
            self.message = e
            pass
        return self.success

class EvaluateNode(Node):
    def __init__(self, test_case):
        self.test_case = test_case
        self.success = False
        self.message = None
        self.results = None
    def forward(self, code):
        try:
            compiled = compile(code, 'test', 'exec')
            exec(compiled, None, globals())
        except Exception as e:
            self.success = False
            self.message = e
            return False

        test_suite = unittest.defaultTestLoader.loadTestsFromTestCase(self.test_case)
        self.results = unittest.TextTestRunner().run(test_suite)
        self.success = self.results.wasSuccessful()
        return self.success


class CodeGenPipeline(Pipeline):
    def __init__(self, prompt_template, test_case):
        self.p1 = PromptWithMKwargsNode(prompt_template=prompt_template)
        self.eval = EvaluateNode(test_case=test_case)
        self.code = None
        self.model_output = None
        self.success = False

    def run(self, model_kwargs=None, prompt_kwargs=None):
        # Intialize your Comet Experiment
        experiment = comet_ml.Experiment(workspace="ckaiser", project_name="llmops-test")
        experiment.add_tag("code-gen")

        # Run pipeline
        self.model_output = self.p1.forward(model_kwargs=model_kwargs, prompt_kwargs=prompt_kwargs)
        self.code = self.model_output.choices[0].text
        self.success = self.eval.forward(self.code)

        # Log metrics, parameters, and extra data to Comet
        metrics = {
            "success": self.success,
            "token_usage": self.model_output.usage.
            total_tokens
        }

        params = {
            "with_tests": self.p1.prompt_template ==
            code_gen_template_w_tests,
            **model_kwargs
        }

        metadata = {
            "name": self.p1.prompt_kwargs['name'],
            "description": self.p1.prompt_kwargs['description'],
            "tests": self.p1.prompt_kwargs['tests'],
            "prompt": self.p1.prompt,
            "prompt_template": self.p1.prompt_template,
            "usage.prompt_tokens": self.model_output.usage.prompt_tokens,
            "usage.completion_tokens": self.model_output.usage.completion_tokens,
            "usage.total_tokens": self.model_output.usage.total_tokens,
        }
        
        experiment.log_metrics(metrics)
        experiment.log_parameters(params)
        experiment.log_others(metadata)
        return self.success

In [None]:
for test in TESTS:
    for template in [code_gen_template, code_gen_template_w_tests]:
        for temperature in [0.0, 0.5, 1.0, 1.5]:
            model_kwargs = { "temperature": temp }
            pipeline = CodeGenPipeline(prompt_template=template, test_case=test['tests_class'])
            success = pipeline.run(model_kwargs=model_kwargs, prompt_kwargs=test)