In [1]:
import os
from dotenv import load_dotenv

load_dotenv('api.env')

HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']
github_token = os.environ['github_token']

In [2]:
from indox import IndoxRetrievalAugmentation
indox = IndoxRetrievalAugmentation()

[32mINFO[0m: [1mIndoxRetrievalAugmentation initialized[0m

            ██  ███    ██  ██████   ██████  ██       ██
            ██  ████   ██  ██   ██ ██    ██   ██  ██
            ██  ██ ██  ██  ██   ██ ██    ██     ██
            ██  ██  ██ ██  ██   ██ ██    ██   ██   ██
            ██  ██  █████  ██████   ██████  ██       ██
            


In [3]:
from indox.llms import HuggingFaceModel
from indox.embeddings import HuggingFaceEmbedding
mistral_qa = HuggingFaceModel(api_key=HUGGINGFACE_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
embed = HuggingFaceEmbedding(api_key=HUGGINGFACE_API_KEY,model="multi-qa-mpnet-base-cos-v1")

[32mINFO[0m: [1mInitializing HuggingFaceModel with model: mistralai/Mistral-7B-Instruct-v0.2[0m
[32mINFO[0m: [1mHuggingFaceModel initialized successfully[0m
[32mINFO[0m: [1mInitialized HuggingFaceEmbedding with model: multi-qa-mpnet-base-cos-v1[0m


In [13]:
from indox.data_connector import GithubClient, GithubRepositoryReader

github_client = GithubClient(github_token=github_token)

repo_reader = GithubRepositoryReader(
    github_client=github_client,
    owner="osllmai",
    repo="indoxjudge",
    filter_directories=(["docs"], GithubRepositoryReader.FilterType.INCLUDE),
    filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE)
)

documents = repo_reader.load_data(branch="main")
doc = documents[0]


Processing file: Branch_and_PR_Guidelines.md
Processing file: README.md
Processing file: docs/metrics/AnswerRelevancy.md
Processing file: docs/metrics/BLEU.md
Processing file: docs/metrics/Bertscore.md
Processing file: docs/metrics/Bias.md
Processing file: docs/metrics/ContextualRelevancy.md
Processing file: docs/metrics/Fairness.md
Processing file: docs/metrics/Faithfulness.md
Processing file: docs/metrics/GEval.md
Processing file: docs/metrics/Gruen.md
Processing file: docs/metrics/Hallucination.md
Processing file: docs/metrics/Harmfulness.md
Processing file: docs/metrics/KnowledgeRetention.md
Processing file: docs/metrics/METEOR.md
Processing file: docs/metrics/MachineEthics.md
Processing file: docs/metrics/Misinformation.md
Processing file: docs/metrics/Privacy.md
Processing file: docs/metrics/ROUGE.md
Processing file: docs/metrics/Stereotype and Bias.md
Processing file: docs/metrics/Toxicity.md
Processing file: docs/piplines/CustomEvaluator.md
Processing file: docs/piplines/LLMCom

In [16]:
content = doc.content


In [18]:
from indox.splitter import semantic_text_splitter
content_chunks = semantic_text_splitter(content,500)

In [19]:
from indox.vector_stores import Chroma
db = Chroma(collection_name="sample",embedding_function=embed)
indox.connect_to_vectorstore(vectorstore_database=db)

[32mINFO[0m: [1mConnection to the vector store database established successfully[0m


<indox.vector_stores.chroma.Chroma at 0x29184b57d60>

In [20]:
indox.store_in_vectorstore(docs= content_chunks)

[32mINFO[0m: [1mStoring documents in the vector store[0m
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
[32mINFO[0m: [1mDocument added successfully to the vector store.[0m
[32mINFO[0m: [1mDocuments stored successfully[0m


<indox.vector_stores.chroma.Chroma at 0x29184b57d60>

In [23]:
query = "What are the guidelines for creating a pull request?"
retriever = indox.QuestionAnswer(vector_database=db, llm=mistral_qa, top_k=2)

In [24]:
answer = retriever.invoke(query)
context = retriever.context

[32mINFO[0m: [1mRetrieving context and scores from the vector database[0m
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
[32mINFO[0m: [1mGenerating answer without document relevancy filter[0m
[32mINFO[0m: [1mAnswering question[0m
[32mINFO[0m: [1mSending request to Hugging Face API[0m
[32mINFO[0m: [1mReceived successful response from Hugging Face API[0m
[32mINFO[0m: [1mQuery answered successfully[0m


In [25]:
answer

"The guidelines for creating a pull request include ensuring the code adheres to technical guidelines and passing all necessary tests before creating the pull request. Write detailed descriptions for the pull request, including an explanation of the issue solved and what was done. Limit changes to no more than 10 files, and if there are more, split them into multiple branches and pull requests. At least one review and approval are required before merging, and it's best if the whole team reviews the code."