## 1. Import Modules

In [1]:
import os  # Interacts with the operating system
from langchain_community.document_loaders import PyPDFLoader  # Loads and parses PDF documents
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Splits text into manageable chunks
from langchain_chroma import Chroma  # Manages vector stores for document embeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings  # Generates embeddings using Google’s Generative AI
from dotenv import load_dotenv  # Loads environment variables from .env file


## 2. Load environment variables

In [2]:
load_dotenv()  # Load environment variables from the .env file
api_key = os.getenv("GOOGLE_API_KEY")  # Retrieve the Google API key from environment variables
if not api_key:
    raise ValueError("No GOOGLE_API_KEY found in the environment variables.")



## 3. Load and Split PDF Document

In [3]:
loader = PyPDFLoader("Fuji_xs20_manual.pdf")  # Initialize the PDF loader with the specified PDF file
data = loader.load()  # Load the entire PDF as a single Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)  # Initialize the text splitter
docs = text_splitter.split_documents(data)  # Split the loaded data into smaller documents

print("Total number of documents: ", len(docs))  # Print the total number of documents



Total number of documents:  669


## 4. Generate Embeddings 

In [4]:
api_key = os.getenv("GOOGLE_API_KEY")  # Retrieve the Google API key from environment variables
# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/
embeddings = GoogleGenerativeAIEmbeddings(api_key=api_key, model="models/embedding-001")  # Initialize the embedding model with the API key and specified model
vector = embeddings.embed_query("hello, world!")  # Generate an embedding vector for a sample query
print(vector[:5])  # Print the first five elements of the embedding vector


[0.05168594419956207, -0.030764883384108543, -0.03062233328819275, -0.02802734449505806, 0.01813092641532421]


## 5. Create Vector Store and Retriever:

In [5]:
vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)  # Create a Chroma vector store from the split documents using the embeddings
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})  # Set up a retriever to find similar documents based on queries


## 6. Retrieve Documents

In [6]:
retrieved_docs = retriever.invoke("What is new in yolov9?")  # Use the retriever to find documents relevant to the query
print("Number of retrieved documents: ", len(retrieved_docs))  # Print the number of documents retrieved
print(retrieved_docs[5].page_content)  # Print the content of the 6th retrieved document


Number of retrieved documents:  10
IMAGE QUALITY .......................................................................................................... 129RAW RECORDING ....................................................................................................... 130SELECT JPEG/HEIF ...................................................................................................... 131FILM SIMULATION ...................................................................................................... 132MONOCHROMATIC COLOR .................................................................................. 134GRAIN EFFECT .............................................................................................................. 134COLOR CHROME EFFECT ....................................................................................... 135COLOR CHROME FX BLUE ..................................................................................... 135WHITE BALANCE


## 7. Initialize Language Model and Set Up Chains

In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI  # Import the ChatGoogleGenerativeAI class for conversational AI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.3, max_tokens=500)  # Initialize the language model with specified parameters

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)  # Define the system prompt

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),  # System-level instructions
        ("human", "{input}"),  # User input placeholder
    ]
)  # Create a prompt template

question_answer_chain = create_stuff_documents_chain(llm, prompt)  # Create a question-answering chain
rag_chain = create_retrieval_chain(retriever, question_answer_chain)  # Create a Retrieval-Augmented Generation (RAG) chain

response = rag_chain.invoke({"input": "what are film simulation modes and how to set it in Xs20 camera?"})  # Invoke the RAG chain with a query
print(response["answer"])  # Print the assistant's answer



Film simulation modes in cameras mimic the look of different film stocks from manufacturers like FujiFilm.  To set it in your X-S20, go to the shooting menu by pressing "MENU/OK", then navigate to the "IMAGE QUALITY SETTING" tab.  You can then select the "FILM SIMULATION" option to choose your desired film stock. 



## 8. Let's run this program in our conda environment

Export .ipynb as .py file and from your terminal run "python app.py" 