## Installations


In [1]:
# !pip install langchain-groq
# %pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4
# !pip install shutup
# !pip install sentence-transformers==2.7.0

## Imports

In [2]:

import os
import shutup
import torch
from typing import List, Dict, Tuple
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from torch import Tensor
from langchain_groq import ChatGroq
from torch.utils.data import Dataset, DataLoader
from sentence_transformers.readers import InputExample
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from sentence_transformers.losses import CoSENTLoss, MatryoshkaLoss
shutup.please()

In [3]:
# Load data
loader = TextLoader("essay.txt")
docs = loader.load()

In [4]:

# Split text into chunks 
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)

## GROQ

In [5]:
groq_key = 'yourgroq-key'

In [6]:
matryoshka_dim = 64

# Define the embedding model
#using mixbread's embedding and in binary mode with truncated (matryoshka embeddings)

embeddings = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1", 
                                   model_kwargs = {'truncate_dim':matryoshka_dim},
                                   encode_kwargs = {'precision': 'binary'})


In [7]:
# FAISS the vector store 
vector = FAISS.from_documents(documents, embeddings)

# Define a retriever interface
retriever = vector.as_retriever()

# Define LLM
model = ChatGroq(temperature=0, groq_api_key=groq_key, model_name="mixtral-8x7b-32768")

# Define prompt template
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

# Create a retrieval chain to answer questions
document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)


## retrieved responses

In [8]:
%%time
response = retrieval_chain.invoke({"input": "where did fifa happened ?"})
print(response["answer"])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


The 2022 FIFA World Cup was held in Qatar, making it the first World Cup to be held in the Arab world and Muslim world, and the second held entirely in Asia after the 2002 tournament in South Korea and Japan.
CPU times: user 150 ms, sys: 67.4 ms, total: 217 ms
Wall time: 1.02 s
