In [2]:
import os
from dotenv import load_dotenv

load_dotenv('api.env')

HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']
github_token = os.environ['github_token']

In [3]:
from indox import IndoxRetrievalAugmentation
indox = IndoxRetrievalAugmentation()

[32mINFO[0m: [1mIndoxRetrievalAugmentation initialized[0m

            ██  ███    ██  ██████   ██████  ██       ██
            ██  ████   ██  ██   ██ ██    ██   ██  ██
            ██  ██ ██  ██  ██   ██ ██    ██     ██
            ██  ██  ██ ██  ██   ██ ██    ██   ██   ██
            ██  ██  █████  ██████   ██████  ██       ██
            


In [4]:
from indox.llms import HuggingFaceModel
from indox.embeddings import HuggingFaceEmbedding
mistral_qa = HuggingFaceModel(api_key=HUGGINGFACE_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
embed = HuggingFaceEmbedding(api_key=HUGGINGFACE_API_KEY,model="multi-qa-mpnet-base-cos-v1")

[32mINFO[0m: [1mInitializing HuggingFaceModel with model: mistralai/Mistral-7B-Instruct-v0.2[0m
[32mINFO[0m: [1mHuggingFaceModel initialized successfully[0m
[32mINFO[0m: [1mInitialized HuggingFaceEmbedding with model: multi-qa-mpnet-base-cos-v1[0m


In [5]:
from indox.data_connector import GithubClient, GithubRepositoryReader

github_client = GithubClient(github_token=github_token)

repo_reader = GithubRepositoryReader(
    github_client=github_client,
    owner="osllmai",
    repo="indoxjudge",
    filter_directories=(["docs"], GithubRepositoryReader.FilterType.INCLUDE),
    filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE)
)

documents = repo_reader.load_content(branch="main")

In [6]:
content = documents[0]


In [7]:
content

'# Branch Naming and Pull Request Guidelines for the Team\n\n### Note 1: Branch Naming\n\nPay attention to the type of task assigned to you. Is it a feature, a bug, or a refactor?\n\n- If it\'s a bug: The branch name should start with the word "issue".\n- If it\'s a feature: The branch name should start with the word "feature".\n- If it\'s a refactor: The branch name should start with the word "refactor".\n- If it\'s for documentation : The branch name should start with the word "docs".\n### Note 2: Creating a Pull Request\n\nFor every branch you create, you need to make a pull request at the end of development. However, there are some rules:\n\n1. Ensure your code adheres to a set of technical guidelines before creating the pull request. This includes following coding standards and running all necessary tests.\n2. Write detailed descriptions for the pull request. This should include  an explanation of the issue solved and what you did.\n3. Limit your changes to no more than 10 files t

In [8]:
from indox.splitter import semantic_text_splitter
content_chunks = semantic_text_splitter(content,500)

In [9]:
from indox.vector_stores import Chroma
db = Chroma(collection_name="sample",embedding_function=embed)
indox.connect_to_vectorstore(vectorstore_database=db)

[32mINFO[0m: [1mConnection to the vector store database established successfully[0m


<indox.vector_stores.chroma.Chroma at 0x1cdd7612110>

In [10]:
indox.store_in_vectorstore(docs= content_chunks)

[32mINFO[0m: [1mStoring documents in the vector store[0m
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
[32mINFO[0m: [1mDocument added successfully to the vector store.[0m
[32mINFO[0m: [1mDocuments stored successfully[0m


<indox.vector_stores.chroma.Chroma at 0x1cdd7612110>

In [14]:
query = "What are the guidelines for creating a pull request?"
retriever = indox.QuestionAnswer(vector_database=db, llm=mistral_qa, top_k=1)

In [15]:
answer = retriever.invoke(query)
context = retriever.context

[32mINFO[0m: [1mRetrieving context and scores from the vector database[0m
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
[32mINFO[0m: [1mGenerating answer without document relevancy filter[0m
[32mINFO[0m: [1mAnswering question[0m
[32mINFO[0m: [1mSending request to Hugging Face API[0m
[32mINFO[0m: [1mReceived successful response from Hugging Face API[0m
[32mINFO[0m: [1mQuery answered successfully[0m


In [16]:
answer

'The guidelines for creating a pull request include ensuring your code adheres to technical guidelines, writing detailed descriptions, limiting changes to no more than 10 files, getting at least one review and approval, encouraging the whole team to review the code, holding meetings for disagreements, and cleaning up by deleting the branch after merging.'