## 🔹 Install & Import FAISS

In [1]:

from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

# Embeddings model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")


## 🔹 Load & Split Documents

In [3]:

FILE_PATH = r'/Users/sahiljain/Data Science/Course GenAI/Projects/Forecasting Models.pdf'

loader = PyPDFLoader(FILE_PATH)
pages = loader.load()

# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(pages)


## 🔹 Create FAISS Vector Store

In [4]:

# Build FAISS index from docs
vector_store = FAISS.from_documents(docs, embeddings)

# Save locally for reuse
vector_store.save_local("faiss_index")

# Reload when needed
vector_store = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

# Retriever setup
retriever = vector_store.as_retriever(
    search_type="mmr", 
    search_kwargs={"k": 5, "fetch_k": 10, "lambda_mult": 0.5}
)


## 🔹 Retrieval + LLM QA Chain

In [5]:

from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
import textwrap

# Custom Prompt
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "You are an assistant. Use the following CONTEXT to answer the QUESTION.\n\n"
        "CONTEXT:\n{context}\n\n"
        "QUESTION:\n{question}\n\n"
        "INSTRUCTIONS:\n"
        "- Answer concisely and clearly in short paragraphs (2–6 lines each).\n"
        "- If examples or steps exist, use bullet points.\n"
        "- At the end, add a 'SOURCES' section listing page numbers if available.\n\n"
        "ANSWER:\n"
    )
)

# Model (Google Gemini, you can switch to OpenAI if needed)
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

# Retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt_template}
)

# Example query
query = "What is ARIMA model?"
result = qa_chain(query)

# Extract answer & sources
answer_text = result["result"] if isinstance(result, dict) else result
source_docs = result.get("source_documents", []) if isinstance(result, dict) else []

# Pretty print
print("\n📌 Answer:\n")
print(textwrap.fill(answer_text.strip(), width=100))

print("\n📚 Sources:")
for i, doc in enumerate(source_docs, start=1):
    page = doc.metadata.get("page", "Unknown")
    preview = doc.page_content.strip().replace("\n", " ")[:300]
    print(f"{i}. Page: {page} — {preview}...")


  result = qa_chain(query)



📌 Answer:

ARIMA (Autoregressive Integrated Moving Average) models are statistical models used for analyzing
and forecasting time series data.  They capture patterns in data by combining autoregressive (AR),
integrated (I), and moving average (MA) components.  The 'I' component involves differencing the
data to achieve stationarity (a constant mean and variance over time).  The AR component models the
relationship between the current value and past values, while the MA component models the
relationship between the current value and past forecast errors.  Seasonal ARIMA models extend the
basic ARIMA model to incorporate seasonal patterns.  These are denoted as ARIMA(p,d,q)(P,D,Q)m,
where the lowercase letters refer to non-seasonal components and the uppercase letters refer to
seasonal components, with 'm' representing the seasonal period.  For example, ARIMA(1,1,1)(1,1,1)4
indicates a model with non-seasonal and seasonal AR(1), I(1), MA(1) components and a seasonal period
of 4.  Model 