In [1]:
import os
from dotenv import load_dotenv

load_dotenv('api.env')

HUGGINGFACE_API_KEY = os.environ['HUGGINGFACE_API_KEY']
discord_token = os.environ['discord_token']

In [2]:
from indox import IndoxRetrievalAugmentation
indox = IndoxRetrievalAugmentation()

[32mINFO[0m: [1mIndoxRetrievalAugmentation initialized[0m

            ██  ███    ██  ██████   ██████  ██       ██
            ██  ████   ██  ██   ██ ██    ██   ██  ██
            ██  ██ ██  ██  ██   ██ ██    ██     ██
            ██  ██  ██ ██  ██   ██ ██    ██   ██   ██
            ██  ██  █████  ██████   ██████  ██       ██
            


In [3]:
from indox.llms import HuggingFaceModel
from indox.embeddings import HuggingFaceEmbedding
mistral_qa = HuggingFaceModel(api_key=HUGGINGFACE_API_KEY,model="mistralai/Mistral-7B-Instruct-v0.2")
embed = HuggingFaceEmbedding(api_key=HUGGINGFACE_API_KEY,model="multi-qa-mpnet-base-cos-v1")

[32mINFO[0m: [1mInitializing HuggingFaceModel with model: mistralai/Mistral-7B-Instruct-v0.2[0m
[32mINFO[0m: [1mHuggingFaceModel initialized successfully[0m
[32mINFO[0m: [1mInitialized HuggingFaceEmbedding with model: multi-qa-mpnet-base-cos-v1[0m


In [5]:
from indox.data_connector import DiscordChannelReader
import nest_asyncio

nest_asyncio.apply()
reader = DiscordChannelReader(bot_token=discord_token)

channel_ids = [1275046109722447915]
documents = reader.load_data(channel_ids=channel_ids, num_messages=50)
doc = documents[0]

[2024-08-20 17:17:32] [INFO    ] discord.client: logging in using static token
[2024-08-20 17:17:32] [INFO    ] discord.client: logging in using static token
[2024-08-20 17:17:36] [INFO    ] discord.gateway: Shard ID None has connected to Gateway (Session ID: 142adc57bee93f8db26062af0704b5b8).
[2024-08-20 17:17:36] [INFO    ] discord.gateway: Shard ID None has connected to Gateway (Session ID: 142adc57bee93f8db26062af0704b5b8).


In [8]:
content = doc.content


In [9]:
from indox.splitter import semantic_text_splitter
content_chunks = semantic_text_splitter(content,500)

In [10]:
from indox.vector_stores import Chroma
db = Chroma(collection_name="sample",embedding_function=embed)
indox.connect_to_vectorstore(vectorstore_database=db)

[32mINFO[0m: [1mConnection to the vector store database established successfully[0m


<indox.vector_stores.chroma.Chroma at 0x20492683010>

In [11]:
indox.store_in_vectorstore(docs= content_chunks)

[32mINFO[0m: [1mStoring documents in the vector store[0m
[32mINFO[0m: [1mEmbedding documents[0m
[32mINFO[0m: [1mStarting to fetch embeddings for texts using model: SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)[0m
[32mINFO[0m: [1mDocument added successfully to the vector store.[0m
[32mINFO[0m: [1mDocuments stored successfully[0m


<indox.vector_stores.chroma.Chroma at 0x20492683010>