In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mistral-7b-instruct/gguf/mistral-7b-instructv0.1/1/mistral-7b-instruct-v0.1.Q8_0.gguf


In [8]:
#!pip install langchain llama-cpp-python 

In [9]:
#!pip install sentence-transformers

In [10]:
#!pip install chromadb

# Import LLM model

In [7]:
# import packages from langchains for llm
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings import HuggingFaceEmbeddings

In [11]:
model_path = '/kaggle/input/mistral-7b-instruct/gguf/mistral-7b-instructv0.1/1/mistral-7b-instruct-v0.1.Q8_0.gguf'

In [12]:
callbackManager = CallbackManager([StreamingStdOutCallbackHandler()])

In [13]:
n_gpu_layers = 2  # Metal set to 1 is enough.
n_batch = 512  

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=False,
)

# Scrap Data from Website

In [14]:
# import python packages to scrap data from sites

from langchain.document_loaders import WebBaseLoader
from langchain.document_loaders import PyPDFLoader

In [15]:
web_path = 'https://python.langchain.com/docs/use_cases/query_analysis/'

In [16]:
# now load the document

loader = WebBaseLoader(
    web_path = web_path,
    header_template = None,
    verify_ssl = True,
    proxies = None,
    continue_on_failure = False,
    requests_per_second = 2,
    default_parser = 'html.parser'
)

In [17]:
docs = loader.load()
txt_data = docs[0].page_content

In [18]:
len(txt_data)

5570

In [19]:
txt_data[0:500]

'\n\n\n\n\nQuery analysis | 🦜️🔗 LangChain\n\n\n\n\n\n\n\nSkip to main contentComponentsIntegrationsGuidesAPI ReferenceMorePeopleVersioningContributingTemplatesCookbooksTutorialsYouTube🦜️🔗LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs💬SearchGet startedIntroductionQuickstartInstallationUse casesQ&A with RAGExtracting structured outputChatbotsTool use and agentsQuery analysisQuickstarthow_toQuery analysistechniquesQ&A over SQL + CSVMoreExpression LanguageGet startedRu'

# Text Splitter

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter

In [21]:
# now define the chunks and overlap

chunks_size = 500
chunks_overlap = 40

rec_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", "(?<=\. )", " ", ""],
    chunk_size=chunks_size,
    chunk_overlap=chunks_overlap,
    is_separator_regex=False
)

In [22]:
rec_splitter.split_text(txt_data[0:1000])

['Query analysis | 🦜️🔗 LangChain',
 'Skip to main contentComponentsIntegrationsGuidesAPI ReferenceMorePeopleVersioningContributingTemplatesCookbooksTutorialsYouTube🦜️🔗LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs💬SearchGet startedIntroductionQuickstartInstallationUse casesQ&A with RAGExtracting structured outputChatbotsTool use and agentsQuery analysisQuickstarthow_toQuery analysistechniquesQ&A over SQL + CSVMoreExpression LanguageGet startedRunnable interfacePrimitivesAdvantages of',
 'interfacePrimitivesAdvantages of LCELStreamingAdd message history (memory)MoreEcosystem🦜🛠️ LangSmith🦜🕸️LangGraph🦜️🏓 LangServeSecurityUse casesQuery analysisOn this pageQuery analysis“Search” powers many use cases - including the “retrieval” part of',
 'Retrieval Augmented Generation. The simplest way to do this involves\npassing the user question directly to a retriever. In order to improve\nperformance, you can also “optimize” the query in some way using qu

In [23]:
# now try the Simple charater splitter

splitter = CharacterTextSplitter(
    separator='\n',
    chunk_size=chunks_size,
    chunk_overlap = chunks_overlap
)

In [24]:
splitter.split_text(txt_data[0: 1500])

['Query analysis | 🦜️🔗 LangChain',
 'Skip to main contentComponentsIntegrationsGuidesAPI ReferenceMorePeopleVersioningContributingTemplatesCookbooksTutorialsYouTube🦜️🔗LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs💬SearchGet startedIntroductionQuickstartInstallationUse casesQ&A with RAGExtracting structured outputChatbotsTool use and agentsQuery analysisQuickstarthow_toQuery analysistechniquesQ&A over SQL + CSVMoreExpression LanguageGet startedRunnable interfacePrimitivesAdvantages of LCELStreamingAdd message history (memory)MoreEcosystem🦜🛠️ LangSmith🦜🕸️LangGraph🦜️🏓 LangServeSecurityUse casesQuery analysisOn this pageQuery analysis“Search” powers many use cases - including the “retrieval” part of',
 'Retrieval Augmented Generation. The simplest way to do this involves\npassing the user question directly to a retriever. In order to improve\nperformance, you can also “optimize” the query in some way using query\nanalysis. This is traditionally 

In [25]:
split = rec_splitter.split_documents(
    docs
)

In [26]:
embedding = HuggingFaceEmbeddings(
    model_name='all-MiniLM-L6-v2'
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [27]:
t1= "I love dogs"
t2 = "I love cats"
t3 = "Study is very hard"

In [28]:
embd1 = embedding.embed_query(t1)
embd2 = embedding.embed_query(t2)
embd3 = embedding.embed_query(t3)

In [29]:
np.dot(embd1,embd2), np.dot(embd2,embd3)

(0.7720984757242813, 0.06933481201584218)

# Add Vector Databases

In [30]:
from langchain.vectorstores import Chroma
from langchain.vectorstores import pinecone

In [31]:
db = Chroma.from_documents(
    documents= split,
    embedding=embedding,
    collection_name='langchain',
    persist_directory='docs/chroma/'
)

In [32]:
query = 'word embedding'

In [33]:
db.similarity_search(
    query=query,
    k=3
)

[Document(page_content='embeddings may not be very similar to those of the relevant\ndocuments. Instead it might help to have the model generate a\nhypothetical relevant document, and then use that to perform\nsimilarity search.Query routing:\nIf we have multiple indexes and only a subset are useful for any\ngiven user input, we can route the input to only retrieve results\nfrom the relevant ones.Step back\nprompting:\nSometimes search quality and model generations can be tripped up by', metadata={'description': '“Search” powers many use cases - including the “retrieval” part of', 'language': 'en', 'source': 'https://python.langchain.com/docs/use_cases/query_analysis/', 'title': 'Query analysis | 🦜️🔗 LangChain'}),
 Document(page_content='decompose the input into separate queries that will each be executed\nindependently.Query\nexpansion: If\nan index is sensitive to query phrasing, we can generate multiple\nparaphrased versions of the user question to increase our chances of\nretrievin

In [37]:
# using MMR

mmr_docs = db.max_marginal_relevance_search(
    query=query,
    k=2,
    fetch_k=2
)

In [38]:
mmr_docs

[Document(page_content='embeddings may not be very similar to those of the relevant\ndocuments. Instead it might help to have the model generate a\nhypothetical relevant document, and then use that to perform\nsimilarity search.Query routing:\nIf we have multiple indexes and only a subset are useful for any\ngiven user input, we can route the input to only retrieve results\nfrom the relevant ones.Step back\nprompting:\nSometimes search quality and model generations can be tripped up by', metadata={'description': '“Search” powers many use cases - including the “retrieval” part of', 'language': 'en', 'source': 'https://python.langchain.com/docs/use_cases/query_analysis/', 'title': 'Query analysis | 🦜️🔗 LangChain'}),
 Document(page_content='decompose the input into separate queries that will each be executed\nindependently.Query\nexpansion: If\nan index is sensitive to query phrasing, we can generate multiple\nparaphrased versions of the user question to increase our chances of\nretrievin

In [40]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [41]:
meta_data_field_info = [
    AttributeInfo(
        name='source',
        description='Text chunks should be one of `/docs/use_cases/query_analysis/`',
        type='string'
    ),
    AttributeInfo(
        name='source',
        description='The heading from contexts',
        type='string'
    )
]

In [50]:
!pip install lark

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [52]:
import lark

In [53]:
docs_content_disc = 'Langchain RAG'

ret = SelfQueryRetriever.from_llm(
    llm,
    db,
    docs_content_disc,
    meta_data_field_info,
    verbose=True
)

ImportError: Cannot import lark, please install it with 'pip install lark'.