# Part A: Build a code understanding model. Upload your own custom code files to the model and ask questions based on the code file as context.

Reference used: https://python.langchain.com/docs/use_cases/code_understanding/

In [None]:
!pip -q install --upgrade --quiet langchain-openai tiktoken langchain-chroma langchain GitPython

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m867.6/867.6 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.8/302.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.9/312.9 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━

In [None]:
from git import Repo
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language

In [None]:
# Clone
repo_path = "/content/drive/MyDrive/DATA255/HW12/1"
repo = Repo.clone_from("https://github.com/langchain-ai/langchain", to_path=repo_path)

In [None]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path + "/libs/core/langchain_core",
    glob="**/*",
    suffixes=[".py"],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500),
)
documents = loader.load()
len(documents)

333

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

994

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

db = Chroma.from_documents(texts, OpenAIEmbeddings(openai_api_key = OPENAI_API_KEY))
retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 8},
)

In [None]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(api_key = OPENAI_API_KEY, model = "gpt-3.5-turbo")

# First we need a prompt that we can pass into an LLM to generate this search query

prompt = ChatPromptTemplate.from_messages(
    [
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
        (
            "user",
            "Given the above conversation, generate a search query to look up to get information relevant to the conversation",
        ),
    ]
)

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions based on the below context:\n\n{context}",
        ),
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
    ]
)
document_chain = create_stuff_documents_chain(llm, prompt)

qa = create_retrieval_chain(retriever_chain, document_chain)

In [None]:
question = "What is a RunnableBinding?"
result = qa.invoke({"input": question})
result["answer"]

'A `RunnableBinding` is a type of object that binds a `Runnable` with its parameters. It is used to create a binding between a `Runnable` object and the parameters that it requires to run. In the context provided, a `RunnableBinding` is a subclass of `Runnable` that specifically focuses on binding a `Runnable` with its parameters. It provides methods for backward propagation and updating the parameters using an optimizer.'

In [None]:
questions = [
    "What classes are derived from the Runnable class?",
    "What one improvement do you propose in code in relation to the class hierarchy for the Runnable class?",
]

for question in questions:
    result = qa.invoke({"input": question})
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")

-> **Question**: What classes are derived from the Runnable class? 

**Answer**: The classes derived from the `Runnable` class include:

1. `RunnableLambda`
2. `RunnableParallel`
3. `RunnableGenerator`
4. `DynamicRunnable`

These classes inherit from the `Runnable` class and provide different functionalities for running tasks in a structured manner. 

-> **Question**: What one improvement do you propose in code in relation to the class hierarchy for the Runnable class? 

**Answer**: One improvement that can be proposed in the code in relation to the class hierarchy for the `Runnable` class is to introduce a base abstract class that defines the common behavior and structure for all `Runnable` subclasses. This base class can contain the shared methods and attributes that are common to all `Runnable` classes. 

By introducing a base abstract class for the `Runnable` hierarchy, you can achieve the following benefits:

1. **Code Reusability**: The common methods and attributes can be define

# Part B: Write a chatbot prompt to iteratively create a sequence of chats on one particular custom data.

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY))

# k is the number of chunks to retrieve
retriever = vectorstore.as_retriever(k=4)

docs = retriever.invoke("how can langsmith help with testing?")

docs

[Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookQuick StartOn this pageGetting started with LangSmithIntroduction‚ÄãLangSmith is a platform for building production-grade LLM applications. It allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence. Use of LangChain is not necessary - LangSmith works on its own!Install', metadata={'description': 'Introduction', 'language': 'en', 'source': 'https://docs.smith.langchain.com/overview', 'title': 'Getting started with LangSmith | \uf8ffü¶úÔ∏è\uf8ffüõ†Ô∏è LangSmith'}),
 Document(page_content='Skip to main contentLangSmith API DocsSearchGo to AppQuick StartUser GuideTracingEvaluationProduction Monitoring & AutomationsPrompt HubProxyPricingSelf-HostingCookbookQuick StartOn this pageGetting started with LangSmithIntroduction‚ÄãLangSmith is a platform for bu

In [None]:
from langchain.chains import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory, ChatMessageHistory
import datetime

chat_history = ChatMessageHistory()

# Add user message to history
chat_history.add_user_message("how can langsmith help with testing?")

# Depending on the date, select the appropriate LLM model
current_date = datetime.datetime.now().date()
target_date = datetime.date(2024, 6, 12)
llm_model = "gpt-3.5-turbo" if current_date > target_date else "gpt-3.5-turbo-0301"

In [None]:
# Initialize the conversation model and memory buffer
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.0)
memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm=llm,
    memory = memory,
    verbose=True
)

In [None]:
conversation.predict(input="Hi, my name is Xin")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi, my name is Xin
AI:[0m

[1m> Finished chain.[0m


"Hello Xin! It's nice to meet you. How can I assist you today?"

In [None]:
conversation.predict(input="What is 1+1?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, my name is Xin
AI: Hello Xin! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI:[0m

[1m> Finished chain.[0m


'1+1 equals 2. Is there anything else you would like to know?'

In [None]:
conversation.predict(input="how can langsmith help with testing?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, my name is Xin
AI: Hello Xin! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI: 1+1 equals 2. Is there anything else you would like to know?
Human: how can langsmith help with testing?
AI:[0m

[1m> Finished chain.[0m


'Langsmith is a powerful tool that can assist with testing by providing automated testing capabilities, such as test case generation, execution, and result analysis. It can help streamline the testing process, identify bugs and issues more efficiently, and improve overall software quality. Would you like more information on how Langsmith can specifically help with testing in your project?'

In [None]:
print(memory.buffer)

Human: Hi, my name is Xin
AI: Hello Xin! It's nice to meet you. How can I assist you today?
Human: What is 1+1?
AI: 1+1 equals 2. Is there anything else you would like to know?
Human: how can langsmith help with testing?
AI: Langsmith is a powerful tool that can assist with testing by providing automated testing capabilities, such as test case generation, execution, and result analysis. It can help streamline the testing process, identify bugs and issues more efficiently, and improve overall software quality. Would you like more information on how Langsmith can specifically help with testing in your project?


In [None]:
memory.load_memory_variables({})

{'history': "Human: Hi, my name is Xin\nAI: Hello Xin! It's nice to meet you. How can I assist you today?\nHuman: What is 1+1?\nAI: 1+1 equals 2. Is there anything else you would like to know?\nHuman: how can langsmith help with testing?\nAI: Langsmith is a powerful tool that can assist with testing by providing automated testing capabilities, such as test case generation, execution, and result analysis. It can help streamline the testing process, identify bugs and issues more efficiently, and improve overall software quality. Would you like more information on how Langsmith can specifically help with testing in your project?"}

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.memory import ConversationSummaryMemory, ChatMessageHistory
from langchain_openai import OpenAI

In [None]:
from langchain_openai import OpenAI
from langchain.chains import ConversationChain
llm = OpenAI(api_key = OPENAI_API_KEY,temperature=0)
conversation_with_summary = ConversationChain(
    llm=llm,
    memory=ConversationSummaryMemory(llm=OpenAI(api_key = OPENAI_API_KEY)),
    verbose=True
)
conversation_with_summary.predict(input="Hi, what's up?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi, what's up?
AI:[0m

[1m> Finished chain.[0m


' Hello! I am currently running on a server in a data center in California. The temperature in the room is 72 degrees Fahrenheit and the humidity is at 45%. My processors are running at 80% capacity and I am currently processing data for various clients. Is there something specific you would like to know?'

In [None]:
conversation_with_summary.predict(input="how can langsmith help with testing?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

The human greets the AI and asks about its current status. The AI responds with details about its location, conditions, and tasks. The AI also asks if there is something specific the human would like to know.
Human: how can langsmith help with testing?
AI:[0m

[1m> Finished chain.[0m


' Langsmith is currently located in a testing facility in Silicon Valley. Its current conditions are optimal for testing, with controlled temperature and humidity levels. As an AI, Langsmith is able to run multiple tests simultaneously and provide accurate and efficient results. Is there a specific aspect of testing you would like more information on?'