In [9]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from transformers import pipeline

In [10]:
# Load pdf file
pdf_path = "/home/steffi/aisd/Books/Fundamentals of Machine Learning for Predictive Data Analytics 2015.pdf" 
loader = PyPDFLoader(pdf_path)
documents = loader.load()


In [11]:
#Chunck the document
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [12]:
# Create embeddings using HuggingFace
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [13]:
#Create vectorstore
vectorstore = FAISS.from_documents(docs, embedding_model)


In [14]:
# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


In [19]:
# 6. Load small local LLM using transformers pipeline (causal LM)
llm_pipeline = pipeline("text-generation", model="distilgpt2", max_new_tokens=150)



Device set to use cpu


In [20]:
# 7. Define a simple generation function using the pipeline
def local_llm_generate(prompt):
    outputs = llm_pipeline(prompt, max_length=150, do_sample=True, top_p=0.95, num_return_sequences=1)
    return outputs[0]['generated_text']


In [21]:

# 8. Simple retrieval-augmented generation function
def rag_query(query):
    docs = retriever.get_relevant_documents(query)
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    return local_llm_generate(prompt)


In [22]:

# 9. Ask a question
question = "What is gradient descent?"
answer = rag_query(question)

print(f"Q: {question}\nA: {answer}")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=150) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Q: What is gradient descent?
A: Context:
7.6	Further	Reading
The	key	component	of	the	gradient	descent	algorithm	presented	in	this	chapter	is	the	use
of	
differentiation
	to	compute	the	slope	of	the	error	surface.	Differentiation	is	a	part	of
calculus,	which	is	a	large	and	very	important	field	of	mathematics.	In	Appendix	C
[551]
	we
provide	an	introduction	to	differentiation	that	covers	all	the	techniques	required	to
understand	how	the	gradient	descent	algorithm	works.	If,	however,	you	wish	to	get	a
gradient	descent
.
To	understand	how	gradient	descent	works,	imagine	a	hiker	unlucky	enough	to	be
stranded	on	the	side	of	a	valley	on	a	foggy	day.	Because	of	the	dense	fog,	it	is	not	possible
for	her	to	see	the	way	to	her	destination	at	the	bottom	of	the	valley.	Instead,	it	is	only
possible	to	see	the	ground	at	her	feet	to	within	about	a	three	foot	radius.	It	might,	at	first,
seem	like	all	is	lost	and	that	it	will	be	impossible	for	the	hiker	to	find	her	way	down	to	the
accurate	predictive	m