In [2]:
import os
import textwrap
from dotenv import load_dotenv
from llama_index import download_loader
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
from llama_index import VectorStoreIndex
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext
import re



In [None]:
# Load environment variables
load_dotenv()

# Fetch and set API keys
openai_api_key = os.getenv("OPENAI_API_KEY")
active_loop_token = os.getenv("ACTIVELOOP_TOKEN")
dataset_path = os.getenv("DATASET_PATH")
github_token = os.getenv("GITHUB_TOKEN")


In [4]:
github_client = GithubClient(github_token)
download_loader("GithubRepositoryReader")

custom_module.GithubRepositoryReader

In [5]:
owner, repo = re.match(r"https://github\.com/([^/]+)/([^/]+)", "https://github.com/srishtysuman2919/vector-voyage/tree/main/1_LangChainAndVectorDatabases").groups()
print(owner, repo)

srishtysuman2919 vector-voyage


In [6]:
loader = GithubRepositoryReader(
                github_client,
                owner=owner,
                repo=repo,
                filter_file_extensions=(
                    [".py", ".js", ".ts", ".md"],
                    GithubRepositoryReader.FilterType.INCLUDE,
                ),
                verbose=False,
                concurrent_requests=5,
            )

docs = loader.load_data(branch="main")
for doc in docs:
    print(doc.metadata)

{'file_path': '1_LangChainAndVectorDatabases/1_FromZeroToHero/l1_the_llms.py', 'file_name': 'l1_the_llms.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/1_FromZeroToHero/l1_the_llms.py'}
{'file_path': '1_LangChainAndVectorDatabases/1_FromZeroToHero/l2_the_chains.py', 'file_name': 'l2_the_chains.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/1_FromZeroToHero/l2_the_chains.py'}
{'file_path': '1_LangChainAndVectorDatabases/1_FromZeroToHero/l3_the_memory.py', 'file_name': 'l3_the_memory.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/1_FromZeroToHero/l3_the_memory.py'}
{'file_path': '1_LangChainAndVectorDatabases/1_FromZeroToHero/l4_deeplake_vector_store.py', 'file_name': 'l4_deeplake_vector_store.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/1_FromZeroToHero/l4_deeplake

In [33]:
vector_store = DeepLakeVectorStore(
    dataset_path=dataset_path,
    overwrite=True,
    exec_option="python"
)



In [34]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(docs, storage_context=storage_context)
query_engine = index.as_query_engine()

Dataset(path='https:/app.activeloop.ai/srishtysuman2919/llama_index', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (100, 1536)  float32   None   
    id        text      (100, 1)      str     None   
 metadata     json      (100, 1)      str     None   
   text       text      (100, 1)      str     None   




In [39]:
answer = query_engine.query("What this repo is about?")
answer

Response(response='The repo is about a project called "Chat with Github Repo".', source_nodes=[NodeWithScore(node=TextNode(id_='01e53c4c-85dc-4e43-8d33-31194e5e8eb4', embedding=None, metadata={'file_path': '1_LangChainAndVectorDatabases/6_GivingMemoeryToLLMs/l3_chat_with_github.py', 'file_name': 'l3_chat_with_github.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/6_GivingMemoeryToLLMs/l3_chat_with_github.py'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ef2717c4fd9d18f33c176b34240ff4bf0663cfa', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '1_LangChainAndVectorDatabases/6_GivingMemoeryToLLMs/l3_chat_with_github.py', 'file_name': 'l3_chat_with_github.py', 'url': 'https://github.com/srishtysuman2919/vector-voyage/blob/main/1_LangChainAndVectorDatabases/6_GivingMemoeryToLLMs/l3_chat_with_github.py'}, hash='e55550d4e55c2da10f233e

Configuring the Retriever

In [40]:
from llama_index.retrievers import VectorIndexRetriever
retriever = VectorIndexRetriever(index=index, similarity_top_k=4)

Customize the query engine

In [42]:
from llama_index import get_response_synthesizer
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor

response_synthesizer = get_response_synthesizer()
query_engine = RetrieverQueryEngine.from_args(
        retriever=retriever,
        response_mode='default',
        response_synthesizer=response_synthesizer,
        node_postprocessors=[
            SimilarityPostprocessor(similarity_cutoff=0.7)]
    )

Getting the Response Synthesizer:

In [43]:
response_synthesizer = get_response_synthesizer()

In [44]:
query_engine = RetrieverQueryEngine.from_args(
    retriever=retriever,
    response_mode='default',
    response_synthesizer=response_synthesizer,
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.7)]
)

In [46]:
from llama_index.response_synthesizers import ResponseMode
query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=ResponseMode.COMPACT)


In [47]:
query_engine = RetrieverQueryEngine.from_args(retriever, response_mode=ResponseMode.TREE_SUMMARIZE)
