In [None]:
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language, RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_chroma import Chroma

import os
from git import Repo

In [None]:
repo_path = "../test_repo"
if not os.path.exists(repo_path):
  repo = Repo.clone_from("https://github.com/langchain-ai/langchain", to_path=repo_path)

In [None]:
loader = GenericLoader.from_filesystem(
  repo_path + "/libs/core/langchain_core",
  glob="**/*",
  suffixes=[".py"],
  exclude=["**/non-utf-8-encoding.py"],
  parser=LanguageParser(
    language=Language.PYTHON,
    parser_threshold=500
  )
)

documents = loader.load()
len(documents)

In [None]:
python_splitter = RecursiveCharacterTextSplitter.from_language(
  language=Language.PYTHON,
  chunk_size=2000,
  chunk_overlap=200
)

texts = python_splitter.split_documents(documents)
len(texts)

In [None]:
llm = ChatOllama(
  model="gpt-oss:20b",
  temperature=0
)

embeddings_model = OllamaEmbeddings(
  model="nomic-embed-text:v1.5"
)

vector_store = Chroma(
  embedding_function=embeddings_model,
  persist_directory="./chroma_langchain_db",
)

In [None]:
document_ids = vector_store.add_documents(documents=texts)

In [None]:
retriever = vector_store.as_retriever(
  search_type="mmr",
  search_kwargs={"k": 3},
)

In [None]:
def ask(question: str):
  docs = retriever.invoke(question)
  docs_string = "".join(doc.page_content for doc in docs)

  documents = docs_string

  ai_msg = llm.invoke([
    {
      "role": "system", 
      "content": f"Você é um revisor de código experiente. Forneça informações detalhadas sobre a revisão do código e sugestões de melhorias baseado no contexto fornecido abaixo: \n\n {documents}"
    },
    {
      "role": "user", 
      "content": question
    },
  ])

  answer = ai_msg.content
  return answer, docs

In [None]:
user_question = "Você pode revisar e sugerir melhorias para o código de RunnableBinding?"
answer, docs = ask(user_question)

In [None]:
print(answer)

In [None]:
print(docs[0])