### Practice: Parameter Efficient Fine-Tuning
In this notebook, you're gonna fine-tune large language models within limited GPU memory.

Prompt Tuning

In [1]:

import torch
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import transformers
from tqdm.auto import tqdm, trange
# assert torch.cuda.is_available(), "you need cuda for this part"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 2️⃣ Fonction pour charger et traiter un fichier PDF
def load_pdf(pdf_path):
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return text_splitter.split_documents(docs)

# 🔹 Charger le PDF
pdf_path = r"C:\Users\facto22020\Downloads\OneDrive_2025-02-08\Seminaire_Master_DataScience\J5_IA_Generative_RAG\Prompting\reference_rag.pdf"  # Remplace par ton fichier
documents = load_pdf(pdf_path)

In [3]:
documents

[Document(metadata={'producer': 'Microsoft® Word pour Microsoft\xa0365', 'creator': 'Microsoft® Word pour Microsoft\xa0365', 'creationdate': '2025-02-14T17:05:29+01:00', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_enabled': 'true', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_setdate': '2025-02-14T16:04:48Z', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_method': 'Standard', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_name': 'D3', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_siteid': '658ba197-6c73-4fea-91bd-1c7d8de6bf2c', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_actionid': '8f8bb064-b350-473d-b6ed-e0dd2c451360', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_contentbits': '0', 'msip_label_09e9a456-2778-4ca9-be06-1190b1e1118a_tag': '10, 3, 0, 1', 'author': 'Mohamed Abbas Konate', 'moddate': '2025-02-14T17:05:29+01:00', 'source': 'C:\\Users\\facto22020\\Downloads\\OneDrive_2025-02-08\\Seminaire_Master_DataScience\\J5_IA_Generative_RAG\\Prompting\\reference

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
model_name = "Qwen/Qwen2.5-0.5B"

# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
# model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B",token = "", local_files_only = True)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B",token = "", local_files_only = True)

In [16]:


# Création d'un pipeline Hugging Face pour LangChain
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0,
    max_new_tokens=100,
    temperature=0.0,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# 2️⃣ Charger et traiter les documents
def load_documents(file_path):
    loader = TextLoader(file_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=50)
    return text_splitter.split_documents(docs)

# 3️⃣ Créer la base de connaissances avec FAISS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(documents, embeddings)

# 4️⃣ Construire le pipeline RAG avec LangChain
retriever = vectorstore.as_retriever()

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="Answer all the subsequents question using the following context. Be precise and short. Start your answer usin keyword [START]. \n Explique en detail  :Context : \n\n{context}\n\nQuestion : {question}"
)


# prompt_template = PromptTemplate(
#     input_variables=["context", "question"],
#     template=(
#         "Utilise le contexte suivant pour répondre à la question de manière précise et détaillée.\n"
#         "Commence toujours ta réponse par le mot-clé [START].\n\n"
#         "=== CONTEXTE ===\n"
#         "{context}\n"
#         "=== FIN CONTEXTE ===\n\n"
#         "=== QUESTION ===\n"
#         "{question}\n"
#         "=== FIN QUESTION ===\n\n"
#         "[START] "
#     )
# )

# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     return_source_documents=True
# )



qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_template}  # Use the custom prompt template
)



Device set to use cuda:0


In [17]:

# 5️⃣ Fonction pour interroger le système
def ask_rag(question):
    response = qa_chain({"query": question})
    return response["result"], response

# 🔹 **Exemple d'utilisation**
query = "What is this document about ?"
answer, full_answer = ask_rag(query)
print(f"{answer}")




Answer all the subsequents question using the following context. Be precise and short. Start your answer usin keyword [START]. 
 Explique en detail  :Context : 

just a hobby. 
I released it under the Apache License and kept improving it. Over the years, I added more 
features, examples, documentation and testing. Good engineering practices. Marketing matters 
too, so I published articles and presented at conferences about it. 
It turns out that good documentation is a superpower for open source projects. 
By 2012 my hobby project had grown into a software library with deep tech algorithms for vehicle

• About Us 
• Jobs 
• Partners 
Resources 
• Blog 
• Videos 
• Customer Stories 
• Events 
• Press 
• Support 
Find us on 
• YouTube 
• LinkedIn 
• X 
• GitHub 
• Stack Overflow 
• RSS Feed 
© 2024 Timefold BV 
LegalPrivacyBrand Assets

Skip to content 
Navigation 
• Planning models 
• Documentation 
• Resources 
o Blog 
o Videos 
o Customer Stories 
o Events 
o Press 
o Support 
• Prici

In [13]:
full_answer

{'query': 'De quoi parle ce document ?',
 'result': "Réponds à la question suivante en utilisant le contexte donné. Commence toujours tes reponse par le mot clé [START]. Explique en detail  :Context : \n\njust a hobby. \nI released it under the Apache License and kept improving it. Over the years, I added more \nfeatures, examples, documentation and testing. Good engineering practices. Marketing matters \ntoo, so I published articles and presented at conferences about it. \nIt turns out that good documentation is a superpower for open source projects. \nBy 2012 my hobby project had grown into a software library with deep tech algorithms for vehicle\n\nunder stress \n• Head of Product that tracks everything and deeply understands that a product is more \nthan code \n• Head of Customer Success that groks complex constraints and speaks business too \n• Head of Marketing that wrote the most crisp copy on what my tech does that I’ve ever \nseen \nMaarten went through this too, but he had go