In [None]:
!pip install -q langchain langchain_core langchain_community sentence_transformers faiss-cpu unstructured chromadb Cython tiktoken unstructured[local-inference] langchain_groq

In [None]:
import getpass
import os

if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass("Provide your GROQ API TOKEN")

In [None]:
# Document Loader
from langchain.document_loaders import TextLoader
loader = TextLoader('/content/drive/MyDrive/About India, cricket.txt')
documents = loader.load()

In [None]:
documents

In [None]:
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

In [None]:
print(wrap_text_preserve_newlines(str(documents[0])))

In [None]:
# Text Splitter
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [None]:
len(docs)

In [None]:
docs[1]

In [None]:
docs[2]

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

In [None]:
from langchain.vectorstores import FAISS

db = FAISS.from_documents(docs, embeddings)

In [None]:
query = "explain about indian premier league"
docs = db.similarity_search(query)

In [None]:
print(wrap_text_preserve_newlines(str(docs[0].page_content)))

#- CREATE LLM

In [None]:
import langchain_groq
from langchain_groq import ChatGroq

GROQ_LLM = ChatGroq(
            api_key=os.getenv("GROQ_API_KEY"),
            model="gemma2-9b-it"
        )

In [None]:
from langchain.chains.question_answering import load_qa_chain

chain = load_qa_chain(GROQ_LLM, chain_type="stuff")

In [None]:
query = "HOW IS INDIAN ECONOMY"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

In [None]:
query = "explain about indian geograpy?"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)