In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
ctags_path = './repo/langchain/libs/langchain/tags'
assert os.path.isfile(ctags_path), "Please run `zsh download_example_rpo.sh` first"

In [3]:
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.embeddings.base import Embeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document

In [4]:
def read_tags_file(file_path):
    with open(file_path, 'r', errors='ignore') as file:
        lines = file.readlines()

    tags = []
    for line in lines:
        if line.startswith('!'):  # Skip metadata lines
            continue
        parts = line.split('\t')
        if len(parts) >= 4:
            tag_name = parts[0]
            file_name = parts[1]
            pattern = parts[2]
            tags.append(dict(tag_name=tag_name, file_name=file_name, pattern=pattern))

    return tags

# Use the function

ctags_root_path = os.path.dirname(ctags_path)
tags = read_tags_file(ctags_path)

documents = []

for tag in tags:
    documents.append(Document(page_content=f"{tag['file_name']} | {tag['tag_name']} ", metadata=tag))


In [5]:
# You can use HuggingFaceEmbeddings as embedding model, this will runnuing faster in POC
# The performance is similar to OpenAIEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", model_kwargs={"device": "cpu"})
# embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", chunk_size = 1)

# https://openai.com/blog/introducing-text-and-code-embeddings
# embeddings = OpenAIEmbeddings(model="code-search-ada-code-001", chunk_size = 1)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
if os.path.isdir("./db/faiss"):
    db = FAISS.load_local(folder_path="./db/faiss", embeddings=embeddings, index_name="poc")
else:
    db = FAISS.from_documents(documents, embeddings)
    db.save_local(folder_path="./db/faiss", index_name="poc")

In [7]:
query = "How can i add a Custom Prompt Template in this repository?"
docs = db.similarity_search(query, k=5)

In [8]:
docs

[Document(page_content='langchain/prompts/prompt.py | from_template ', metadata={'tag_name': 'from_template', 'file_name': 'langchain/prompts/prompt.py', 'pattern': '/^    def from_template(cls, template: str, **kwargs: Any) -> PromptTemplate:$/;"'}),
 Document(page_content='langchain/prompts/prompt.py | PromptTemplate ', metadata={'tag_name': 'PromptTemplate', 'file_name': 'langchain/prompts/prompt.py', 'pattern': '/^class PromptTemplate(StringPromptTemplate):$/;"'}),
 Document(page_content='langchain/schema/prompt_template.py | _prompt_type ', metadata={'tag_name': '_prompt_type', 'file_name': 'langchain/schema/prompt_template.py', 'pattern': '/^    def _prompt_type(self) -> str:$/;"'}),
 Document(page_content='langchain/schema/prompt_template.py | BasePromptTemplate ', metadata={'tag_name': 'BasePromptTemplate', 'file_name': 'langchain/schema/prompt_template.py', 'pattern': '/^class BasePromptTemplate(Serializable, ABC):$/;"'}),
 Document(page_content='langchain/prompts/prompt.py | 

In [9]:

import inspect
import importlib.util
import os

def get_source_code(function_name, function_path, ctags_root_path=ctags_root_path):
    spec=importlib.util.spec_from_file_location(function_name, os.path.join(ctags_root_path, function_path))
    foo = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(foo)
    return inspect.getsource(foo)



In [10]:
template = """You are a hupful bot that fuilfill the human' program task:

The following is releative code:
{code_file_text}

User: {user_prompt}
Ai:
"""

In [11]:
from typing import List
def create_code_file_text(docs : List[Document]):
    code_file_text = ""
    for i, doc in enumerate(docs):
        code_file_text += f'==== File {i+1}/{len(docs)} ====\n'
        code_file_text += f'File path: {doc.metadata["file_name"]}\n'
        code_file_text += f'Tag name: {doc.metadata["tag_name"]}\n'
        code_string = get_source_code(doc.metadata["tag_name"], doc.metadata["file_name"])
        code_file_text += f'Code: {code_string}\n'
        code_file_text += "\n"
    return code_file_text
    

In [12]:
code_text = create_code_file_text(docs=docs[:2])
print(code_text)

==== File 1/2 ====
File path: langchain/prompts/prompt.py
Tag name: from_template
Code: """Prompt schema definition."""
from __future__ import annotations

from pathlib import Path
from string import Formatter
from typing import Any, Dict, List, Union

from pydantic import root_validator

from langchain.prompts.base import (
    DEFAULT_FORMATTER_MAPPING,
    StringPromptTemplate,
    _get_jinja2_variables_from_template,
    check_valid_template,
)


class PromptTemplate(StringPromptTemplate):
    """Schema to represent a prompt for an LLM.

    Example:
        .. code-block:: python

            from langchain import PromptTemplate
            prompt = PromptTemplate(input_variables=["foo"], template="Say {foo}")
    """

    @property
    def lc_attributes(self) -> Dict[str, Any]:
        return {
            "template_format": self.template_format,
        }

    input_variables: List[str]
    """A list of the names of the variables the prompt template expects."""

    template: st

In [13]:
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage, AIMessage


llm = AzureChatOpenAI(
    azure_deployment=os.environ.get("DEPLOYMENT_NAME"),
    temperature=0.5,
)



In [14]:
def ask(user_prompt: str) -> AIMessage:
    # find docs similar to user_prompt
    docs = db.similarity_search(query, k=4)
    user_prompt = template.format(code_file_text=create_code_file_text(docs), user_prompt=user_prompt)

    # call openai api here
    message = HumanMessage(content=user_prompt)
    return llm([message])

In [15]:
user_question = "What is Langchain design for?"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : What is Langchain design for?
🤖 : Langchain is designed to be a language model development framework. It provides tools and libraries for creating and training language models, as well as generating prompts and processing outputs. It aims to simplify the process of building and deploying language models for various natural language processing tasks.


In [16]:
user_question = "How can i add a Custom Prompt Template in this repository? also add the unit-test. Give me an example"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : How can i add a Custom Prompt Template in this repository? also add the unit-test. Give me an example
🤖 : To add a custom prompt template to this repository, you can follow these steps:

1. Create a new Python file in the appropriate directory, such as `langchain/prompts`.
2. Define your custom prompt template class by inheriting from `BasePromptTemplate`.
3. Implement the required methods and properties of the `BasePromptTemplate` class, such as `format_prompt` and `_prompt_type`.
4. Add any additional methods or properties specific to your custom prompt template.
5. Write unit tests for your custom prompt template to ensure its functionality.
6. Save the file and commit it to the repository.

Here's an example of a custom prompt template class and its corresponding unit test:

```python
# File: langchain/prompts/custom_prompt.py

from langchain.schema.prompt_template import BasePromptTemplate

class CustomPromptTemplate(BasePromptTemplate):
    def format_prompt(self, **kwargs)

In [17]:
user_question = "What is the high-level system architecture of this project? Give me an example"
result = ask(user_question).content

print("="*20)
print(f"👩‍💻 : {user_question}")
print("="*20)
print(f"🤖 : {result}")

👩‍💻 : What is the high-level system architecture of this project? Give me an example
🤖 : The high-level system architecture of this project consists of several components. Here is an example:

1. File 1/4: `langchain/prompts/prompt.py`
   - Contains the definition of the `PromptTemplate` class, which represents a prompt for an LLM (Language Model).
   - It includes properties such as `input_variables`, `template`, `template_format`, and `validate_template`.
   - It also has methods for formatting the prompt and validating the template.

2. File 2/4: `langchain/prompts/prompt.py`
   - Contains the same definition of the `PromptTemplate` class as in File 1/4.
   - This duplication might be an error or a versioning issue.

3. File 3/4: `langchain/schema/prompt_template.py`
   - Defines the `BasePromptTemplate` class, which is the base class for all prompt templates.
   - It includes properties such as `input_variables`, `output_parser`, and `partial_variables`.
   - It also has methods fo

## Proof of GPT-3.5 not knowing the langchain

In [18]:
print(llm([HumanMessage(content="What is langchain")]).content)

Langchain is a decentralized blockchain platform that aims to provide a solution for language-related challenges in various industries. It aims to bridge the language gap by offering language services such as translation, interpretation, and proofreading through its network of language professionals. The platform utilizes smart contracts and blockchain technology to ensure secure and transparent transactions between clients and language service providers. Langchain also aims to create a global community of language professionals and facilitate the exchange of knowledge and expertise in different languages.
