In [1]:
import os
import getpass

assert os.getenv("OPENAI_API_TYPE", None) is not None, "Please set your OPENAI_API_TYPE environment variable"
assert os.getenv("OPENAI_API_VERSION", None) is not None, "Please set your OPENAI_API_VERSION environment variable"
assert os.getenv("OPENAI_API_BASE", None) is not None, "Please set your OPENAI_API_BASE environment variable"
assert os.getenv("OPENAI_API_KEY", None) is not None, "Please set your OPENAI_API_KEY environment variable"

assert os.path.isfile('./repo/langchain/libs/langchain/tags'), "Please run `zsh download_example_rpo.sh` first"

# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization
# os.environ['FAISS_NO_AVX2'] = '1'

In [2]:
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.embeddings.base import Embeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document

In [3]:
def read_tags_file(file_path):
    with open(file_path, 'r', errors='ignore') as file:
        lines = file.readlines()

    tags = []
    for line in lines:
        if line.startswith('!'):  # Skip metadata lines
            continue
        parts = line.split('\t')
        if len(parts) >= 4:
            tag_name = parts[0]
            file_name = parts[1]
            pattern = parts[2]
            tags.append(dict(tag_name=tag_name, file_name=file_name, pattern=pattern))

    return tags

# Use the function
tags = read_tags_file('./repo/langchain/libs/langchain/tags')

documents = []

for tag in tags:
    documents.append(Document(page_content=f"{tag['file_name']} | {tag['tag_name']} ", metadata=tag))


In [4]:
# You can use HuggingFaceEmbeddings as embedding model, this will runnuing faster in POC
# The performance is similar to OpenAIEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", model_kwargs={"device": "cpu"})
# embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", chunk_size = 1)

# https://openai.com/blog/introducing-text-and-code-embeddings
# embeddings = OpenAIEmbeddings(model="code-search-ada-code-001", chunk_size = 1)

In [5]:
if os.path.isdir("./db/faiss"):
    db = FAISS.load_local(folder_path="./db/faiss", embeddings=embeddings, index_name="poc")
else:
    db = FAISS.from_documents(documents, embeddings)
    db.save_local(folder_path="./db/faiss", index_name="poc")

In [6]:
query = "How can i add a Custom Prompt Template in this repository?"
docs = db.similarity_search(query, k=5)

In [7]:
docs

[Document(page_content='langchain/prompts/prompt.py | from_template ', metadata={'tag_name': 'from_template', 'file_name': 'langchain/prompts/prompt.py', 'pattern': '/^    def from_template($/;"'}),
 Document(page_content='langchain/prompts/prompt.py | PromptTemplate ', metadata={'tag_name': 'PromptTemplate', 'file_name': 'langchain/prompts/prompt.py', 'pattern': '/^class PromptTemplate(StringPromptTemplate):$/;"'}),
 Document(page_content='langchain/schema/prompt_template.py | _prompt_type ', metadata={'tag_name': '_prompt_type', 'file_name': 'langchain/schema/prompt_template.py', 'pattern': '/^    def _prompt_type(self) -> str:$/;"'}),
 Document(page_content='langchain/schema/prompt_template.py | BasePromptTemplate ', metadata={'tag_name': 'BasePromptTemplate', 'file_name': 'langchain/schema/prompt_template.py', 'pattern': '/^class BasePromptTemplate(RunnableSerializable[Dict, PromptValue], ABC):$/;"'}),
 Document(page_content='langchain/prompts/prompt.py | template_format ', metadat

In [8]:

import inspect
import importlib.util
import os

root = "repo/langchain/libs/langchain"
 

def get_source_code(function_name, function_path):
    spec=importlib.util.spec_from_file_location(function_name, os.path.join(root, function_path))
    foo = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(foo)
    return inspect.getsource(foo)



In [9]:
template = """You are a hupful bot that fuilfill the human' program task:

The following is releative code:
{code_file_text}

User: {user_prompt}
Ai:
"""

In [10]:
from typing import List
def create_code_file_text(docs : List[Document]):
    code_file_text = ""
    for i, doc in enumerate(docs):
        code_file_text += f'==== File {i+1}/{len(docs)} ====\n'
        code_file_text += f'File path: {doc.metadata["file_name"]}\n'
        code_file_text += f'Tag name: {doc.metadata["tag_name"]}\n'
        code_file_text += f'Code: {get_source_code(doc.metadata["tag_name"], doc.metadata["file_name"])}\n'
        code_file_text += "\n"
    return code_file_text
    

In [11]:
code_text = create_code_file_text(docs)

In [12]:
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, AIMessage


llm = AzureChatOpenAI(
    deployment_name="gpt35-chat",
    temperature=0.5,
)

In [13]:
def ask(user_prompt: str) -> AIMessage:
    # find docs similar to user_prompt
    docs = db.similarity_search(query, k=4)
    user_prompt = template.format(code_file_text=create_code_file_text(docs), user_prompt=user_prompt)

    # call openai api here
    message = HumanMessage(content=user_prompt)
    return llm([message])

In [14]:
user_question = "What is Langchain design for?"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : What is Langchain design for?
🤖 : LangChain is designed to provide a platform for building and deploying natural language processing (NLP) models and applications. It aims to make NLP more accessible and user-friendly for developers and businesses.


In [15]:
user_question = "How can i add a Custom Prompt Template in this repository? also add the unit-test. Give me an example"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : How can i add a Custom Prompt Template in this repository? also add the unit-test. Give me an example
🤖 : To add a custom prompt template, you can create a new class that inherits from `BasePromptTemplate` and implement the `format_prompt` method to return the desired prompt. Here's an example:

```
from langchain.schema.prompt_template import BasePromptTemplate

class MyPromptTemplate(BasePromptTemplate):
    def format_prompt(self, **kwargs):
        return f"My custom prompt with {kwargs['variable']}"

# Unit test
def test_my_prompt_template():
    prompt = MyPromptTemplate(input_variables=["variable"])
    output = prompt.format_prompt(variable="test")
    assert output == "My custom prompt with test"
```

You can then use this new prompt template in your LangChain project by importing it and instantiating it with the desired input variables.


In [16]:
user_question = "What is the high-level system architecture of this project? Give me an example"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : What is the high-level system architecture of this project? Give me an example
🤖 : The high-level system architecture of this project consists of various components such as prompts, models, and output parsers. A prompt is a template that accepts a set of parameters from the user that can be used to generate a prompt for a language model. The model is then used to generate an output based on the prompt. Finally, the output parser is used to parse the output and return a structured response. 

For example, let's say we have a prompt that asks for a summary of a news article. The user provides the article title and content as parameters to the prompt. The model then generates a summary of the article based on the provided parameters. Finally, the output parser is used to extract the summary and return it as a structured response.


## Proof of GPT-3.5 not knowing the langchain

In [17]:
print(llm([HumanMessage(content="What is langchain")]).content)

As an AI language model, I do not have any information about a term or concept called "langchain." Could you please provide more context or details about what you are referring to?
