https://lightning.ai/lightning-ai/studios/chat-with-your-code-using-rag


In [None]:
!pip3 install llama_index
!pip3 install llama-index-readers-github
!pip3 install llama-index-embeddings-langchain
!pip3 install llama-index-llms-ollama

In [None]:
# This is due to the fact that we use asyncio.loop_until_complete in
# the DiscordReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
import nest_asyncio

nest_asyncio.apply()

In [None]:
GITHUB_ACCESS_TOKEN="GITHUB_API_TOKEN"

In [None]:
from llama_index.readers.github import GithubRepositoryReader, GithubClient

def initialize_github_client(github_token):
    return GithubClient(github_token)

github_client = initialize_github_client(GITHUB_ACCESS_TOKEN)

loader = GithubRepositoryReader(
            github_client,
            owner='sergiopaniego',
            repo='RAG_local_tutorial',
            filter_file_extensions=(
                [".ipynb"],
                GithubRepositoryReader.FilterType.INCLUDE,
            ),
            verbose=False,
            concurrent_requests=5,
        )

docs = loader.load_data(branch="main")

In [None]:
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')
embed_model = LangchainEmbedding(embeddings)

In [None]:
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex

# ====== Create vector store and upload indexed data ======
Settings.embed_model = embed_model # we specify the embedding model to be used
index = VectorStoreIndex.from_documents(docs)

In [None]:
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

# setting up the llm
llm = Ollama(model="llama3", request_timeout=500.0) 

# ====== Setup a query engine on the index previously created ======
Settings.llm = llm # specifying the llm to be used
query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

In [None]:
from llama_index.core.prompts.base import PromptTemplate


qa_prompt_tmpl_str = (
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n"
            "Query: {query_str}\n"
            "Answer: "
            )

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})

response = query_engine.query('What is this repository about?')
print(response)