In [None]:
!pip install virtualenv
!virtualenv langchain-env
!source langchain-env/bin/activate

In [None]:
!pip install langchain
!pip install langchain-pinecone
!pip install sentence_transformers
!pip install tiktoken
!pip install GitPython
!pip install python-dotenv
!pip install langchain_community

In [None]:
## Load env variables (OPENAI_API_KEY, PINECONE_API_KEY)

In [None]:
import dotenv

dotenv.load_dotenv()

In [None]:
## Load a repository

In [None]:
from git import Repo
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language

In [None]:
# Clone the repo
repo_path = "./langchain-2/"
#repo = Repo.clone_from("https://github.com/langchain-ai/langchain", to_path=repo_path)


In [None]:
# Load .py files from path using LanguageParser

loader = GenericLoader.from_filesystem(
    repo_path + "/libs/core/langchain_core",
    glob="**/*",
    suffixes=[".py"],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(language="python", parser_threshold=500),
)
documents = loader.load()
len(documents)

In [None]:
!pip install langchain_openai


In [None]:
## Index
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
# Vector storage
from langchain_pinecone import PineconeVectorStore
index_name = "langchain-test-index"
db = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)

In [None]:
retriever = db.as_retriever(
    search_type="mmr",  # You can also experiment with "similarity"
    search_kwargs={"k": 8},
)

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

In [None]:
llm = ChatOpenAI(temperature=0)

In [None]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever)

In [None]:
chat_history = []

In [None]:
question = "How can I load a source code as documents, for a QA over code, splitting the code in classes and functions?"
result = qa.invoke({"question": question, "chat_history": chat_history})
chat_history.append((question, result["answer"]))
print(result["answer"])