In [2]:
pip install python-dotenv langchain langchain_community faiss-cpu huggingface_hub sentence-transformers mlflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Import necessary libraries
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub
import mlflow
import mlflow.langchain

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Step 1: Load environment variables (for Hugging Face API key)
load_dotenv()  # Ensure .env file has HUGGINGFACEHUB_API_TOKEN
if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
    raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env file")

ValueError: HUGGINGFACEHUB_API_TOKEN not found in .env file

In [None]:
# Step 2: Load and split documents
try:
    loader = TextLoader("data/sample_doc.txt")  # Ensure this file exists
    documents = loader.load()
except FileNotFoundError:
    raise FileNotFoundError("Please create 'data/sample_doc.txt' with sample text")

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [None]:
# Step 3: Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(texts, embeddings)

In [None]:
# Step 4: Set up the LLM
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.7})  # Switched to flan-t5-base for better results

In [None]:
# Step 5: Create RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True
)

In [None]:
# Step 6: Run query and log with MLflow
query = "What is the main topic?"
with mlflow.start_run():
    # Run the query
    result = qa_chain({"query": query})
    # Log model and parameters
    mlflow.langchain.log_model(qa_chain, "rag_model")
    mlflow.log_param("embedding_model", "sentence-transformers/all-MiniLM-L6-v2")
    mlflow.log_param("llm_model", "google/flan-t5-base")
    # Log response length
    mlflow.log_metric("response_length", len(result["result"]))

In [None]:
# Step 7: Print result
print(result["result"])