In [None]:
!pip install virtualenv
!virtualenv langchain-env-java
!source langchain-env-java/bin/activate

In [None]:
!pip install langchain
!pip install langchain-pinecone
!pip install sentence_transformers
!pip install tiktoken
!pip install GitPython
!pip install python-dotenv
!pip install langchain_community
!pip install langchain_openai

In [None]:
import dotenv

dotenv.load_dotenv()

In [None]:
from git import Repo
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language

In [None]:
# Clone the repo
repo_path = "./large-projects-with-custom-gradle-plugins/"
repo = Repo.clone_from("https://github.com/teenageorge/large-projects-with-custom-gradle-plugins", to_path=repo_path)

In [None]:
# Load .py files from path using LanguageParser

loader = GenericLoader.from_filesystem(
    repo_path + "/consumer-service/",
    glob="**/*",
    suffixes=[".kt", ".kts"],
    parser=LanguageParser(language="kotlin", parser_threshold=500),
)
documents = loader.load()
len(documents)

In [None]:
## Index
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

kotlin_splitter = RecursiveCharacterTextSplitter.from_language(
    language="kotlin", chunk_size=2000, chunk_overlap=200
)
texts = kotlin_splitter.split_documents(documents)
len(texts)

embeddings = OpenAIEmbeddings(disallowed_special=())

In [None]:
# Vector storage
from langchain_pinecone import PineconeVectorStore
index_name = "consumer-service-index"
db = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)

In [None]:
retriever = db.as_retriever(
    search_type="mmr",  # You can also experiment with "similarity"
    search_kwargs={"k": 8},
)

In [None]:
# setup agent
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(temperature=0, model="gpt-4o")

prompt = ChatPromptTemplate.from_messages(
    [
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
        (
            "user",
            "Given the above conversation, generate a search query to look up to get information relevant to the conversation",
        ),
    ]
)

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions based on the below context:\n\n{context}",
        ),
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
    ]
)
document_chain = create_stuff_documents_chain(llm, prompt)

qa = create_retrieval_chain(retriever_chain, document_chain)

chat_history = []

question = "Explain functions in ConsumerController.kt. If it has references to other functions in me.teenageorge package, extend the explanation of the functions."
result = qa.invoke({"input": question})

chat_history.append(result["answer"])

print(result["answer"])