In [None]:
!pip install langchain langchain-community langchain-huggingface
!pip install transformers sentence-transformers faiss-cpu accelerate


In [2]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("speech.txt", encoding="utf-8")
documents = loader.load()

print("Loaded document length:", len(documents[0].page_content))


  from .autonotebook import tqdm as notebook_tqdm


Loaded document length: 3624


In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=30
)

chunks = text_splitter.split_documents(documents)

print("Total chunks:", len(chunks))

Total chunks: 17


In [5]:
from langchain_huggingface import HuggingFaceEmbeddings


embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 264.22it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [6]:
from langchain_community.vectorstores import FAISS


vectorstore = FAISS.from_documents(chunks, embeddings)

print("Vector store ready ")
vectorstore.save_local("faiss_index")
print("FAISS index saved successfully")


Vector store ready 
FAISS index saved successfully


In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import HuggingFacePipeline

import torch

torch.set_num_threads(4)  # optional speed tweak

model_name = "Qwen/Qwen2-0.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,
    temperature=0.1,
    do_sample=False
)

llm = HuggingFacePipeline(pipeline=pipe)

print("Qwen loaded ")


Loading weights: 100%|██████████| 290/290 [00:01<00:00, 266.19it/s, Materializing param=model.norm.weight]                              
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Passing `generation_config` together with generation-related arguments=({'max_new_tokens', 'do_sample', 'temperature'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.


Qwen loaded 


  llm = HuggingFacePipeline(pipeline=pipe)


In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

retrievr=vectorstore.as_retriever(search_kwargs={"k":2})

prompt=ChatPromptTemplate.from_template(""" 
Answer the question using only the context below.

Context:
{context}

Question:
{question}
                                        
Answer:
"""
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain=(
    {"context":retrievr | format_docs,"question":lambda X: X}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG chain ready")

RAG chain ready


In [9]:
response=rag_chain.invoke("What is the main theme of the speech?")
print(response)

Both `max_new_tokens` (=150) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Human:  
Answer the question using only the context below.

Context:
to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.

Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right

Question:
What is the main theme of the speech?
                                        
Answer:
The main theme of the speech is that the speaker believes in fighting for the rights and liberties of small nations.


In [10]:
response=rag_chain.invoke("Why does he mention small nations?")
print(response)

Both `max_new_tokens` (=150) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Human:  
Answer the question using only the context below.

Context:
to have a voice in their own governments, for the rights and liberties of small nations, for a universal dominion of right by such a concert of free peoples as shall bring peace and safety to all nations and make the world itself at last free.

and actions toward the millions of men and women of German birth and native sympathy who live among us and share our life, and we shall be proud to prove it toward all who are in fact loyal to their neighbors and to the government in the hour of test. They are, most of them, as true and loyal

Question:
Why does he mention small nations?
                                        
Answer:
Because he wants to show that his country is united and has a voice in its own governments.


In [1]:
%pip install streamlit reportlab

Collecting streamlitNote: you may need to restart the kernel to use updated packages.

  Using cached streamlit-1.54.0-py3-none-any.whl.metadata (9.8 kB)
Collecting reportlab
  Using cached reportlab-4.4.10-py3-none-any.whl.metadata (1.7 kB)
Collecting altair!=5.4.0,!=5.4.1,<7,>=4.0 (from streamlit)
  Using cached altair-6.0.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=5.5 (from streamlit)
  Using cached cachetools-6.2.6-py3-none-any.whl.metadata (5.6 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached gitpython-3.1.46-py3-none-any.whl.metadata (13 kB)
Collecting pandas<3,>=1.4.0 (from streamlit)
  Using cached pandas-2.3.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting pillow<13,>=7.1.0 (from streamlit)
  Using cached pillow-12.1.1-cp310-cp310-win_amd64.whl.metadata (9.0 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using 