## Text Loader

In [21]:
from langchain_community.document_loaders import TextLoader

In [22]:
loader = TextLoader("../data/blog1.txt", encoding = "utf-8")
loader

<langchain_community.document_loaders.text.TextLoader at 0x200e41ec230>

In [23]:
document = loader.load()

In [24]:
document

[Document(page_content='Bagh Chal: A native board game on the brink of extinction\nStalls adorned with lustrous trinkets, intriguing masks, sacred tokens, singing bowls, intricate miniature statues, and colourful beads are scattered in the Valley’s three Durbar Squares. Their owners eye every passerby with expectations and longing, hoping that an inquisitive look from a passerby will lead to a profitable sale. However, among the vibrant fragments of our history, a square-shaped board, either metallic or wooden, often goes unnoticed by the locals.\n\n“No Nepali buys the Bagh Chal board. Only foreigners buy it. It’s not a popular item among foreigners too though. I used to sell hardly one-two boards a week before the pandemic,” says Ram Maharjan, one of the many stallowners in Basantapur Dabali, where about half of the stalls flaunt this age-old board game.\n\nThe Bagh Chal board has 5x5 grids which are interconnected creating 25 intersection points where the game pieces are placed. The 

## Text Splitter

In [29]:
from langchain.text_splitter import CharacterTextSplitter


In [35]:
splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 20)

In [44]:
texts = splitter.split_documents(document)
len(texts)

11

In [42]:
print(texts[0].page_content)

Bagh Chal: A native board game on the brink of extinction
Stalls adorned with lustrous trinkets, intriguing masks, sacred tokens, singing bowls, intricate miniature statues, and colourful beads are scattered in the Valley’s three Durbar Squares. Their owners eye every passerby with expectations and longing, hoping that an inquisitive look from a passerby will lead to a profitable sale. However, among the vibrant fragments of our history, a square-shaped board, either metallic or wooden, often goes unnoticed by the locals.

“No Nepali buys the Bagh Chal board. Only foreigners buy it. It’s not a popular item among foreigners too though. I used to sell hardly one-two boards a week before the pandemic,” says Ram Maharjan, one of the many stallowners in Basantapur Dabali, where about half of the stalls flaunt this age-old board game.


In [43]:
print(texts[1].page_content)

The Bagh Chal board has 5x5 grids which are interconnected creating 25 intersection points where the game pieces are placed. The game pieces consist of four tiger pieces and 20 goat pieces which can move along the lines from one point to another. One player controls four tigers while the other player controls 20 goats. The objective for the player who plays the tiger is to capture four-five goats by jumping over the goats while the objective for the player who controls the goats is to gridlock the tigers so that they do not have any legal moves left.


## Embeddings

In [50]:
from langchain_community.embeddings import HuggingFaceEmbeddings

In [55]:
def download_embedding_model():
    embeddings = HuggingFaceEmbeddings(
        model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2",
    )
    return embeddings

## downloading embedding from hugging face

In [57]:
embedding = download_embedding_model()
# this embedding model has 384 dimensions

  from tqdm.autonotebook import tqdm, trange


In [58]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
), model_name='sentence-transformers/paraphrase-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

## PineCone

In [60]:
from langchain_pinecone import PineconeVectorStore

In [61]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY']

In [63]:
index_name="bloganalyzer"

In [66]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
    texts,
    index_name=index_name,
    embedding=embedding
)

## VectorQA chain | Retrieval QA chain

In [84]:
prompt_template="""
give answer based on the context:
{context}

dont make up your own answer if the answer is not in the context just answer I dont know.

question: {question}

answer:
"""

In [85]:
from langchain.llms import CTransformers

model_path = "D:\Generative AI\Gen AI Language\local LLM\llama2-7b-q2chat.bin"

llm = CTransformers(
    model = model_path,
    model_type = "llama",
    max_new_tokens = 200,
    temperature = 0.7
)

  model_path = "D:\Generative AI\Gen AI Language\local LLM\llama2-7b-q2chat.bin"


In [86]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

PROMPT = PromptTemplate(
    template = prompt_template,
    input_variables = ["context", "question"]
)

In [87]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = vectorstore_from_docs.as_retriever(
        search_kwargs = {
            "k": 2
        }
    ),
    return_source_documents = True,
    chain_type_kwargs = {
        "prompt": PROMPT
    }
)

In [88]:
response = qa({
    "query": "What is the blog about?",
})

  warn_deprecated(


In [93]:
response["result"]

'The blog is about the ancient game called Bagh Chal, and the debate surrounding its conservation and availability through both offline and online versions of the game.'

In [94]:
# TODO: how to solve the tokens limit if the context size of llm becomes full RAG course padhne majaale

In [89]:
response2 = qa({
    "query": "What is baagchal game and where is it played mostly?"
})

Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).
Number of tokens (515) exceeded maximum context length (512).
Number of tokens (516) exceeded maximum context length (512).
Number of tokens (517) exceeded maximum context length (512).
Number of tokens (518) exceeded maximum context length (512).
Number of tokens (519) exceeded maximum context length (512).
Number of tokens (520) exceeded maximum context length (512).
Number of tokens (521) exceeded maximum context length (512).
Number of tokens (522) exceeded maximum context length (512).
Number of tokens (523) exceeded maximum context length (512).
Number of tokens (524) exceeded maximum context length (512).
Number of tokens (525) exceeded maximum context length (512).
Number of tokens (526) exceeded maximum context length (512).
Number of tokens (527) exceeded maximum context length (512).
Number of tokens (528) exceeded maximum context length (512).
Number o

In [95]:
print(response2["result"])

Bagh Chal is a traditional board game popular in Nepal, particularly in rural areas like Gorkha, Lamjung, Nuwak Nuw Nuwak, Nuwak etc.and Nuwak, Nuwak, Nuwak, and Nuwak, Nuwak, Nuwak, Nuwak, and Nuwaknow kathok, Nuwak and Nuwakwnuk and Nuwak and Nuwak and Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwak and Nuwak and Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwakwn Nuw Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, Nuwak, and Nuwaktrk NuwalkjNuwak et.and Nuwak, Nuwak, Nuwak and Nuwakc, and Nuwak, Nuwak etc. Itan and Nuwak and Nuwak and Nuwak, Nuwak, Nuwak, Nuwaknow


In [115]:
similarity = vectorstore_from_docs.similarity_search("baagchal game board size exact", k=2)

In [118]:
similarity[0].page_content

'The Bagh Chal board has 5x5 grids which are interconnected creating 25 intersection points where the game pieces are placed. The game pieces consist of four tiger pieces and 20 goat pieces which can move along the lines from one point to another. One player controls four tigers while the other player controls 20 goats. The objective for the player who plays the tiger is to capture four-five goats by jumping over the goats while the objective for the player who controls the goats is to gridlock the tigers so that they do not have any legal moves left.'

In [119]:
similarity[1].page_content

'Bagh Chal: A native board game on the brink of extinction\nStalls adorned with lustrous trinkets, intriguing masks, sacred tokens, singing bowls, intricate miniature statues, and colourful beads are scattered in the Valley’s three Durbar Squares. Their owners eye every passerby with expectations and longing, hoping that an inquisitive look from a passerby will lead to a profitable sale. However, among the vibrant fragments of our history, a square-shaped board, either metallic or wooden, often goes unnoticed by the locals.\n\n“No Nepali buys the Bagh Chal board. Only foreigners buy it. It’s not a popular item among foreigners too though. I used to sell hardly one-two boards a week before the pandemic,” says Ram Maharjan, one of the many stallowners in Basantapur Dabali, where about half of the stalls flaunt this age-old board game.'