In [2]:

from dotenv import dotenv_values
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import HumanMessage, SystemMessage, AIMessage

# Load API key from .env
env_values = dotenv_values("./app.env")
openai_api_key = env_values.get("OPENROUTER_API_KEY")



# Initialize the model , openai/gpt-4o-mini
llm = ChatOpenAI(
    model_name="openai/gpt-oss-20b:free",
    openai_api_base="https://openrouter.ai/api/v1",
    openai_api_key=openai_api_key,
)


## Load Data from CSV to LangChain using Pandas

In [1]:
!wget -O youtube-sub.csv https://raw.githubusercontent.com/Petlja/JupyterBookSrCyr/master/podaci/Top%2025%20YouTubers.csv

--2025-10-19 17:00:02--  https://raw.githubusercontent.com/Petlja/JupyterBookSrCyr/master/podaci/Top%2025%20YouTubers.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1153 (1.1K) [text/plain]
Saving to: ‘youtube-sub.csv’


2025-10-19 17:00:03 (3.27 MB/s) - ‘youtube-sub.csv’ saved [1153/1153]



In [5]:
import pandas as pd
from langchain.document_loaders import DataFrameLoader

In [6]:
dataframe = pd.read_csv("./youtube-sub.csv", sep=",")
dataframe.head()

Unnamed: 0,RANK,GRADE,NAME,VIDEOS,SUBSCRIBERS,VIEWES
0,1,A++,T-Series,13629,105783888,76945588449
1,2,A,PewDiePie,3898,97853589,22298927681
2,3,A+,5-Minute Crafts,3341,58629572,14860695079
3,4,A++,Cocomelon - Nursery Rhymes,441,53163816,33519273951
4,5,A++,SET India,31923,51784081,36464793233


In [7]:
dataframe = dataframe[ ["NAME", "VIDEOS", "SUBSCRIBERS", "VIEWES"] ]
dataframe.head()

Unnamed: 0,NAME,VIDEOS,SUBSCRIBERS,VIEWES
0,T-Series,13629,105783888,76945588449
1,PewDiePie,3898,97853589,22298927681
2,5-Minute Crafts,3341,58629572,14860695079
3,Cocomelon - Nursery Rhymes,441,53163816,33519273951
4,SET India,31923,51784081,36464793233


In [8]:
llm_loader = DataFrameLoader(dataframe, page_content_column="NAME")

In [9]:
llm_data = llm_loader.load()

In [10]:
llm_data[5].dict()

{'page_content': 'Canal KondZilla',
 'metadata': {'VIDEOS': 1100, 'SUBSCRIBERS': 50560964, 'VIEWES': 25446405744}}

## Load Data from Internet

In [11]:
from langchain.document_loaders import UnstructuredURLLoader

urls = [
    "https://www.apple.com/in/support/products/faqs.html",
    "https://www.apple.com/legal/sales-support/",
]

In [14]:
llm_loader = UnstructuredURLLoader(urls=urls)
llm_data = llm_loader.load()

In [15]:
llm_data[0]

Document(page_content='Frequently Asked Questions\n\nWhere can I purchase the AppleCare Protection Plan?\n\nThe AppleCare Protection Plan is available at the Apple Online Store and many Apple-authorised resellers and wireless service providers.\n\nHow do I initiate repair service under the AppleCare Protection Plan?\n\nCarry-in service. Carry your product into an Apple Authorised Service Provider.\n\nOnsite service. Contact us and we’ll help you arrange an Apple-authorised repair for your desktop Mac at your location, at no additional charge.\n\nDo-It-Yourself service. Contact us and we may be able to send you what you need to service your own product, such as accessories.\n\nThe AppleCare Protection Plan for Mac, for iPod and for Apple TV provide global repair coverage. Service will be limited to the options available in the country where service is requested. Service options, parts availability and response times vary by country.\n\nAll repairs will be completed using genuine Apple p

## Loaders | WikiPedia

In [17]:
from langchain.document_loaders import WikipediaLoader

query = "Nikola Tesla"

llm_data = WikipediaLoader(query=query,
                           load_max_docs=3,
                           doc_content_chars_max=20_000).load()


In [18]:
llm_data[0].page_content

'Nikola Tesla (10 July 1856 – 7 January 1943) was a Serbian-American engineer, futurist, and inventor. He is known for his contributions to the design of the modern alternating current (AC) electricity supply system.\nBorn and raised in the Austrian Empire, Tesla first studied engineering and physics in the 1870s without receiving a degree. He then gained practical experience in the early 1880s working in telephony and at Continental Edison in the new electric power industry. In 1884, he immigrated to the United States, where he became a naturalized citizen. He worked for a short time at the Edison Machine Works in New York City before he struck out on his own. With the help of partners to finance and market his ideas, Tesla set up laboratories and companies in New York to develop a range of electrical and mechanical devices. His AC induction motor and related polyphase AC patents, licensed by Westinghouse Electric in 1888, earned him a considerable amount of money and became the corne

In [19]:
llm_data[0].metadata

{'title': 'Nikola Tesla',
 'summary': 'Nikola Tesla (10 July 1856 – 7 January 1943) was a Serbian-American engineer, futurist, and inventor. He is known for his contributions to the design of the modern alternating current (AC) electricity supply system.\nBorn and raised in the Austrian Empire, Tesla first studied engineering and physics in the 1870s without receiving a degree. He then gained practical experience in the early 1880s working in telephony and at Continental Edison in the new electric power industry. In 1884, he immigrated to the United States, where he became a naturalized citizen. He worked for a short time at the Edison Machine Works in New York City before he struck out on his own. With the help of partners to finance and market his ideas, Tesla set up laboratories and companies in New York to develop a range of electrical and mechanical devices. His AC induction motor and related polyphase AC patents, licensed by Westinghouse Electric in 1888, earned him a considerabl

### Loaders | PDF

##### For mor Pdf Loaders : https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf


In [20]:
!wget -O laravel.pdf https://laraveldaily.com/wp-content/uploads/2020/04/laravel-tips-2020-04.pdf

--2025-10-19 18:15:31--  https://laraveldaily.com/wp-content/uploads/2020/04/laravel-tips-2020-04.pdf
Resolving laraveldaily.com (laraveldaily.com)... 104.21.86.73, 172.67.216.144
Connecting to laraveldaily.com (laraveldaily.com)|104.21.86.73|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 314570 (307K) [application/pdf]
Saving to: ‘laravel.pdf’


2025-10-19 18:15:32 (985 KB/s) - ‘laravel.pdf’ saved [314570/314570]



In [21]:
from langchain.document_loaders import PyPDFLoader

In [22]:
llm_loader = PyPDFLoader("./laravel.pdf")
pages = llm_loader.load_and_split()

In [23]:
len(pages)

41

In [24]:
pages[5].page_content

"Tip 11. No timestamp columns\n \n \nIf your DB table doesn't contain timestamp fields \n\u200b \ncreated_at\n\u200b \n and \n\u200b \nupdated_at\n\u200b \n, you can\n \nspecify that Eloquent model wouldn't use them, with \n\u200b \n$timestamps = false\n\u200b \n property.\n \n \nclass Company extends Model\n \n{\n \n    public $timestamps = false;\n \n}\n \n \n \n \n \n \nTip 12. Migration fields with timezones\n \n \nDid you know that in migrations there's not only \n\u200b \ntimestamps()\n\u200b \n but also \n\u200b \ntimestampsTz()\n\u200b \n, for\n \nthe timezone?\n \n \nSchema::create('employees', function (Blueprint $table) {\n \n    $table->increments('id');\n \n    $table->string('name');\n \n    $table->string('email');\n \n    $table->timestampsTz();\n \n});\n \n \nAlso, there are columns \n\u200b \ndateTimeTz()\n\u200b \n, \n\u200b \ntimeTz()\n\u200b \n, \n\u200b \ntimestampTz()\n\u200b \n, \n\u200b \nsoftDeletesTz()\n\u200b \n.\n \n \n \n \nTip 13. Eloquent has() deeper\n 

In [25]:
pages[5].metadata

{'source': './laravel.pdf', 'page': 5}

## Load PDF from internet

In [None]:
from langchain.document_loaders import OnlinePDFLoader

url = "https://laraveldaily.com/wp-content/uploads/2020/04/laravel-tips-2020-04.pdf"

llm_loader = OnlinePDFLoader(url)

pages = llm_loader.load_and_split()

## Load All files in  Directory

In [None]:
from langchain.document_loaders import DirectoryLoader

lm_loader = DirectoryLoader("./md-data",
                            glob="*.txt",
                            show_progress=True)

llm_data = lm_loader.load()

## Load All Pdf from Directory

In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader

llm_loader = PyPDFDirectoryLoader("./pdf-files")

llm_data = llm_loader.load()

## Text Splitter

In [33]:
from langchain.document_loaders import WikipediaLoader

query_1 = "Nikola Tesla"
query_2 = "Thomas Edison"

# ==============
docs_1 = WikipediaLoader(query=query_1, load_max_docs=1,
                         doc_content_chars_max=20_000).load()

text_document_1 = docs_1[0].page_content

# ==============
docs_2 = WikipediaLoader(query=query_2, load_max_docs=1,
                         doc_content_chars_max=20_000).load()

text_document_2 = docs_2[0].page_content


In [34]:
documents = [ text_document_1, text_document_2]
metadatas = [ {"document":query_1}, {"document":query_2} ]

## Text Splitters | Characters -> Chunks

In [31]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator = " ",
    chunk_size = 100,
    chunk_overlap = 30,
    length_function = len
)

In [35]:
normal_chunks = text_splitter.create_documents(documents, metadatas=metadatas)

In [36]:
print(text_document_1)

Nikola Tesla (10 July 1856 – 7 January 1943) was a Serbian-American engineer, futurist, and inventor. He is known for his contributions to the design of the modern alternating current (AC) electricity supply system.
Born and raised in the Austrian Empire, Tesla first studied engineering and physics in the 1870s without receiving a degree. He then gained practical experience in the early 1880s working in telephony and at Continental Edison in the new electric power industry. In 1884, he immigrated to the United States, where he became a naturalized citizen. He worked for a short time at the Edison Machine Works in New York City before he struck out on his own. With the help of partners to finance and market his ideas, Tesla set up laboratories and companies in New York to develop a range of electrical and mechanical devices. His AC induction motor and related polyphase AC patents, licensed by Westinghouse Electric in 1888, earned him a considerable amount of money and became the corners

In [37]:
normal_chunks[0]

Document(page_content='Nikola Tesla (10 July 1856 – 7 January 1943) was a Serbian-American engineer, futurist, and', metadata={'document': 'Nikola Tesla'})

In [38]:
normal_chunks[1]

Document(page_content='engineer, futurist, and inventor. He is known for his contributions to the design of the modern', metadata={'document': 'Nikola Tesla'})

In [39]:
normal_chunks[2].metadata

{'document': 'Nikola Tesla'}

## NLTK   ممكن يكبر شويه عن المحدد chunk بيقسم لحد ما يوصل ل نقطه النهايه ، بطريقه زكيه شويه ، و حجم 


In [1]:
from langchain.text_splitter import NLTKTextSplitter
import nltk
nltk.download('punkt')

PydanticUserError: If you use `@root_validator` with pre=False (the default) you MUST specify `skip_on_failure=True`. Note that `@root_validator` is deprecated and should be replaced with `@model_validator`.

For further information visit https://errors.pydantic.dev/2.12/u/root-validator-pre-skip

In [41]:
text_splitter = NLTKTextSplitter(
    chunk_size=500
)

In [42]:
nltk_chunks = text_splitter.create_documents(
    documents, metadatas=metadatas
)

In [43]:
len(nltk_chunks)

128

In [44]:
print(nltk_chunks[5].page_content)

In 1893, he made pronouncements on the possibility of wireless communication with his devices.

Tesla tried to put these ideas to practical use in his unfinished Wardenclyffe Tower project, an intercontinental wireless communication and power transmitter, but ran out of funding before he could complete it.

After Wardenclyffe, Tesla experimented with a series of inventions in the 1910s and 1920s with varying degrees of success.


In [45]:
print(nltk_chunks[5].metadata)

{'document': 'Nikola Tesla'}


## Tokens | TikToken

In [None]:
from langchain.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(
    model_name="text-davinci-003",
    chunk_size=100,
    chunk_overlap=0,
)

In [None]:
tokens_chunks = text_splitter.create_documents(
    documents, metadatas=metadatas
)

In [None]:
len(tokens_chunks)

In [None]:
tokens_chunks[5].page_content

## Tokens | Transformers Tokens

In [None]:
from transformers import AutoTokenizer
from langchain.text_splitter import CharacterTextSplitter

In [None]:
# Hugging Face بنجيب اسم الموديل من 
model_id = "stabilityai/stablelm-tuned-alpha-3b"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [None]:
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=400,
    chunk_overlap=0
)

In [None]:
tokens_chunks = text_splitter.create_documents(
    documents, metadatas=metadatas
)

In [None]:
print( tokens_chunks[5].page_content )

## Embedding

In [46]:
from langchain.document_loaders import WikipediaLoader

query_1 = "Nikola Tesla"
query_2 = "Thomas Edison"

# ==============
docs_1 = WikipediaLoader(query=query_1, load_max_docs=1,
                         doc_content_chars_max=20_000).load()

text_document_1 = docs_1[0].page_content

# ==============
docs_2 = WikipediaLoader(query=query_2, load_max_docs=1,
                         doc_content_chars_max=20_000).load()

text_document_2 = docs_2[0].page_content

In [47]:
documents = [ text_document_1, text_document_2]
metadatas = [ {"document":query_1}, {"document":query_2} ]

In [48]:
from langchain.text_splitter import NLTKTextSplitter
import nltk
nltk.download('punkt')

text_splitter = NLTKTextSplitter(chunk_size=300, chunk_overlap=50)

[nltk_data] Downloading package punkt to /Users/macbook/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [49]:
tokens_chunks = text_splitter.create_documents(documents, metadatas=metadatas)

Created a chunk of size 315, which is longer than the specified 300
Created a chunk of size 316, which is longer than the specified 300
Created a chunk of size 334, which is longer than the specified 300
Created a chunk of size 430, which is longer than the specified 300


In [50]:
len(tokens_chunks)

176

In [51]:
tokens_chunks[5]

Document(page_content='Attempting to develop inventions he could patent and market, Tesla conducted a range of experiments with mechanical oscillators/generators, electrical discharge tubes, and early X-ray imaging.\n\nHe also built a wirelessly controlled boat, one of the first ever exhibited.', metadata={'document': 'Nikola Tesla'})

## Embeddings | OpenAI

In [None]:
from langchain.embeddings import OpenAIEmbeddings

from dotenv import dotenv_values
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

embedding_llm = OpenAIEmbeddings(openai_api_key=openai_api_key,
                                 model="text-davinci-003")

In [None]:
tokens_chunks[5]

In [None]:
# source data
docs_text = [chunk.page_content for chunk in tokens_chunks ]
docs_embeddings = embedding_llm.embed_documents(docs_text)

In [None]:
# query
query_text = "Can you list a number of Nikola Tesla's inventions?"
query_embedding = embedding_llm.embed_query(query_text)

## Embeddings | HuggingFace


In [53]:
from langchain.embeddings import SentenceTransformerEmbeddings
## modeles في اسماء كل  huggingFace علي  SentenceTransformer في صفحه 
## https://huggingface.co/sentence-transformers/models
##  llama-7b-hf او ممكن تستخدم 
##  https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
model_name = "sentence-transformers/all-MiniLM-L6-v2"

embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [54]:
tokens_chunks[5]

Document(page_content='Attempting to develop inventions he could patent and market, Tesla conducted a range of experiments with mechanical oscillators/generators, electrical discharge tubes, and early X-ray imaging.\n\nHe also built a wirelessly controlled boat, one of the first ever exhibited.', metadata={'document': 'Nikola Tesla'})

In [None]:
# source data   List عشان نجيب كل محتوايات النصوص في 
docs_text = [ chunk.page_content for chunk in tokens_chunks ]
docs_embeddings = embedding_llm.embed_documents(docs_text)

In [None]:
#  Number of Dimentions
len(docs_embeddings[5])

384

In [56]:
query_text = "Can you list a number of Nikola Tesla's inventions?"
query_embedding = embedding_llm.embed_query(query_text)

In [57]:
query_embedding

[-0.07105942070484161,
 0.05051181837916374,
 -0.040130071341991425,
 0.024454867467284203,
 -0.05593001842498779,
 0.013734078966081142,
 -0.003047244856134057,
 0.06978422403335571,
 -0.0800095722079277,
 -0.01563279703259468,
 0.06103595718741417,
 0.01247100718319416,
 0.061280205845832825,
 0.04083362594246864,
 -0.0708618089556694,
 0.004202002193778753,
 -0.060869213193655014,
 0.04482867568731308,
 0.013466309756040573,
 -0.02366340532898903,
 -0.007242718245834112,
 0.04129130393266678,
 0.08313379436731339,
 -0.01222901325672865,
 0.05306204408407211,
 0.036540497094392776,
 -0.032428741455078125,
 -0.010277731344103813,
 0.03167043253779411,
 -0.044503845274448395,
 -0.036458369344472885,
 -0.020086055621504784,
 -0.013852258212864399,
 -0.007332329638302326,
 0.013142140582203865,
 -0.020364413037896156,
 0.050789106637239456,
 0.09220387041568756,
 0.03171518072485924,
 -0.006074072327464819,
 0.0036697189789265394,
 -0.11701101064682007,
 0.0032048749271780252,
 -0.009129

## Embeddings | Cohere

In [None]:
from dotenv import dotenv_values
env_values = dotenv_values("./app.env")

cohere_api_key = env_values['COHERE_API_KEY']

In [None]:
from langchain.embeddings  import CohereEmbeddings

embedding_llm = CohereEmbeddings(cohere_api_key=cohere_api_key)

In [None]:
tokens_chunks[5]

In [None]:
# source data
docs_text = [ chunk.page_content  for chunk in tokens_chunks ]
docs_embeddings = embedding_llm.embed_documents(docs_text)

In [None]:
# query
query_text = "What is the name of Nikola Tesla's mother?"
query_embedding = embedding_llm.embed_query(query_text)

## Vectors Stores

In [60]:
from langchain.document_loaders import WikipediaLoader

from langchain.text_splitter import NLTKTextSplitter
import nltk
nltk.download('punkt')

from langchain.embeddings import OpenAIEmbeddings

[nltk_data] Downloading package punkt to /Users/macbook/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [61]:
# load document
query = "Nikola Tesla"

docs = WikipediaLoader(query=query,
                       load_max_docs=1,
                       doc_content_chars_max=20_000).load()

In [62]:
document_1 = docs[0].page_content

In [63]:
text_splitter = NLTKTextSplitter(chunk_size=300, chunk_overlap=50)

tokens_chunks = text_splitter.create_documents(
                                               [document_1],
                                               metadatas=[{"document":query}]
                                               )

Created a chunk of size 315, which is longer than the specified 300
Created a chunk of size 316, which is longer than the specified 300
Created a chunk of size 334, which is longer than the specified 300


In [None]:
from dotenv import dotenv_values
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

embedding_llm = OpenAIEmbeddings(openai_api_key=openai_api_key, model="gpt-3.5-turbo")

## Vector Stores | FAISS

In [None]:
from langchain.vectorstores import FAISS

# هنا بنختار الااثنين العملناهم قبل كده
vector_db = FAISS.from_documents(tokens_chunks, embedding_llm)

In [69]:
query_text = "In which year did Tesla's father die?"

similar_docs = vector_db.similarity_search(query_text)

In [70]:
print(similar_docs[0])

page_content="Tesla's father died the next month, on 17 April 1879, at the age of 60 after an unspecified illness.\n\nIn January 1880, two of Tesla's uncles paid for him to leave Gospić for Prague, where he was to study." metadata={'document': 'Nikola Tesla'}


### Vector Stores | Chroma

In [75]:
from langchain.vectorstores import Chroma

# store text documents as vectors
save_to_dir = "./content/wiki_chroma_db"

docs_ids = list( range( len(tokens_chunks) ) )
docs_ids = [ str(d) for d in docs_ids ]

vector_db = Chroma.from_documents(
                                tokens_chunks,
                                embedding_llm,
                                persist_directory=save_to_dir,
                                ids=docs_ids
                            )

In [76]:
# search for most similar document to a query
query_text = "In which year did Tesla's fathre die?"

similar_docs = vector_db.similarity_search(
                                      query_text,
                                      k=5,
                                      filter={'document': 'Nikola Tesla'}
                                    )

In [77]:
## save to disk
vector_db.persist()

In [79]:
## Load from disk
load_from_dir = "./content/wiki_chroma_db"

loaded_vector_db = Chroma(
    persist_directory=load_from_dir,
    embedding_function=embedding_llm
)

In [None]:
# search for most similar document to a query
query_text = "In which year did Tesla's father die?"

similar_docs = loaded_vector_db.similarity_search(
                                      query_text,
                                      # chunks بيجيب اقرب ٥ 
                                      k=5,
                                      # metaData اختيار عن طريق 
                                      filter={'document': 'Nikola Tesla'}
                                    )

In [83]:
print( similar_docs[0] )

page_content="Tesla's father died the next month, on 17 April 1879, at the age of 60 after an unspecified illness.\n\nIn January 1880, two of Tesla's uncles paid for him to leave Gospić for Prague, where he was to study." metadata={'document': 'Nikola Tesla'}


## QnA Chain

In [None]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import NLTKTextSplitter
import nltk
nltk.download('punkt')

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

from dotenv import dotenv_values
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

# load document
query = "Nikola Tesla"

docs = WikipediaLoader(query=query,
                       load_max_docs=1,
                       doc_content_chars_max=20_000).load()

documents = [ docs[0].page_content ]
metadatas = [ {"document": query} ]

# splitter
text_splitter = NLTKTextSplitter(chunk_size=300, chunk_overlap=50)
tokens_chunks = text_splitter.create_documents(
    documents,
    metadatas=metadatas
)

# embeddings
embedding_llm = OpenAIEmbeddings(openai_api_key=openai_api_key)

# vector database
save_to_dir = "/content/wiki_chroma_db"
vector_db = Chroma.from_documents(
    tokens_chunks,
    embedding_llm,
    persist_directory=save_to_dir
)

## QnA Chain | Stuff


In [None]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from dotenv import dotenv_values

In [None]:
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

llm = OpenAI(openai_api_key=openai_api_key,
             model_name="text-davinci-003",
             temperature=0.5)

In [None]:
qna_template = "\n".join([
    "Answer the next questionu using the provided context.",
    "If the answer is not contained in the context, say 'NO ANSWER IS AVAILABLE'",
    "### Context:",
    "{context}",
    "",
    "### Question:",
    "{question}",
    "",
    "### Answer:",
])

qna_prompt = PromptTemplate(
    template=qna_template,
    input_variables=['context', 'question'],
    verbose=True
)

stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)

In [None]:
question = "Why did Tesla work as a football player?"

similar_docs = vector_db.similarity_search(question, k=4)

print(len(similar_docs))

In [None]:
answer = stuff_chain(
    {
        "input_documents": similar_docs,
        "question": question
    },
    return_only_outputs=True,
)

answer

## QnA Chains | MapReduce

In [None]:
from langchain.llms import OpenAI
from dotenv import dotenv_values

from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

In [None]:
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

llm = OpenAI(openai_api_key=openai_api_key,
             model_name="text-davinci-003",
             temperature=0.5)

In [None]:
qna_template = "\n".join([
    "Answer the next question using the provided context.",
    "If the answer is not contained in the context, say 'NO ANSWER IS AVAILABLE'",
    "### Context:",
    "{context}",
    "",
    "### Question:",
    "{question}",
    "",
    "### Answer:",
])

qna_prompt = PromptTemplate(
    template=qna_template,
    input_variables=['context', 'question'],
)

In [None]:
combine_template = "\n".join([
    "Given intermediate contexts for a question, generate a final answer.",
    "If the answer is not contained in the intermediate contexts, say 'NO ANSWER IS AVAILABLE'",
    "### Summaries:",
    "{summaries}",
    "",
    "### Question:",
    "{question}",
    "",
    "### Final Answer:",
])

combine_prompt = PromptTemplate(
    template=combine_template,
    input_variables=['summaries', 'question'],
)

In [None]:
map_reduce_chain = load_qa_chain(llm, chain_type="map_reduce",
                                 return_intermediate_steps=True,
                                 question_prompt=qna_prompt,
                                 combine_prompt=combine_prompt)

In [None]:
question = "What did Tesla invent?"

similar_docs = vector_db.similarity_search(question, k=5)

print( len(similar_docs) )

In [None]:
final_answer = map_reduce_chain({
    "input_documents": similar_docs,
    "question": question
}, return_only_outputs=True)

In [None]:
final_answer

## QnA Chain | Refine

In [None]:
from langchain.llms import OpenAI
from dotenv import dotenv_values
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

In [None]:
env_values = dotenv_values("./app.env")
openai_api_key = env_values['OPENAI_API_KEY']

llm = OpenAI(openai_api_key=openai_api_key,
             model_name="gpt-3.5-turbo-0613",
             temperature=0.5)

In [None]:
initial_qna_template = "\n".join([
    "Answer the following question using the provided text only.",
    "If answer is not available. Say 'No answer for this context'",
    "### Context:",
    "{context_str}",
    "",
    "### Question:",
    "{question}",
    "### Answer:",
])

initial_qna_prompt = PromptTemplate(
    template=initial_qna_template,
    input_variables=['context_str', 'question']
)

In [None]:
refine_qna_template = "\n".join([
    "Refine the existing answer, if required, with the following context.",
    "If answer is not available. Say 'No answer for this context'",
    "### Context",
    "{context_str}",
    "",
    "### Existing Answer:",
    "{existing_answer}",
    "",
    "### Question:",
    "{question}",
    "",
    "### Refined Answer:",
])

refine_qna_prompt = PromptTemplate(
    template=refine_qna_template,
    input_variables=['context_str', 'existing_answer', 'question']
)

In [None]:
refine_chain = load_qa_chain(
    llm,
    chain_type="refine",
    question_prompt=initial_qna_prompt,
    refine_prompt=refine_qna_prompt,
    return_intermediate_steps=True,
)

In [None]:
question = "What did Tesla invent?"

similar_docs = vector_db.similarity_search(question, k=2)

print( len(similar_docs) )

In [None]:
final_refined_answer = refine_chain({
                                        "input_documents": similar_docs,
                                        "question": question
                                    }, return_only_outputs=True)

final_refined_answer