In [1]:
# Load environment variables from .env (do not commit secrets)
from dotenv import load_dotenv
import os
load_dotenv()

required = ["OPENAI_API_KEY", "GOOGLE_API_KEY", "GOOGLE_APPLICATION_CREDENTIALS"]
missing = []
for k in required:
    v = os.environ.get(k)
    status = 'SET' if v else 'NOT SET'
    print(f"{k}: {status}")
    if not v:
        missing.append(k)

if missing:
    print("Some required env vars are missing. Copy .env.example -> .env and fill them.")
else:
    print("All required env vars are present (values hidden).")

OPENAI_API_KEY: SET
GOOGLE_API_KEY: SET
GOOGLE_APPLICATION_CREDENTIALS: NOT SET
Some required env vars are missing. Copy .env.example -> .env and fill them.


In [2]:
import os
from dotenv import find_dotenv
print('cwd:', os.getcwd())
print('.env path:', os.path.abspath('.env'))
print('.env exists:', os.path.exists('.env'))
print('find_dotenv returned:', find_dotenv())


cwd: d:\langchainrAG
.env path: d:\langchainrAG\.env
.env exists: True
find_dotenv returned: d:\langchainrAG\.env


In [3]:
from dotenv import load_dotenv, find_dotenv
import os
p = find_dotenv()
print('find_dotenv returned:', p)
loaded = load_dotenv(p, override=True)
print('load_dotenv returned:', loaded)
for k in ['OPENAI_API_KEY', 'GOOGLE_API_KEY', 'GOOGLE_APPLICATION_CREDENTIALS']:
    print(k, 'SET' if os.getenv(k) else 'NOT SET')


find_dotenv returned: d:\langchainrAG\.env
load_dotenv returned: True
OPENAI_API_KEY SET
GOOGLE_API_KEY SET
GOOGLE_APPLICATION_CREDENTIALS NOT SET


In [4]:
import langchain
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


In [5]:
load_dotenv()


True

In [6]:
google_api_key = os.getenv("GOOGLE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

gemini_key = google_api_key or os.getenv("GEMINI_API_KEY")
if gemini_key:
    google_llm = ChatGoogleGenerativeAI(
        temperature=0,
        model="gemini-2.0-flash",
        api_key=gemini_key,
        max_tokens=200
    )
elif os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
    # Use ADC (Application Default Credentials)
    google_llm = ChatGoogleGenerativeAI(
        temperature=0,
        model="gemini-2.0-flash",
        max_tokens=200
    )
else:
    google_llm = None
    print("Warning: No Gemini credentials found (GOOGLE_API_KEY/GEMINI_API_KEY/GOOGLE_APPLICATION_CREDENTIALS). google_llm not created.")

openai_llm = ChatOpenAI(
    temperature=0, 
    model="gpt-4", 
    api_key=openai_api_key
)

In [7]:
# Quick sanity check that the LLM objects were created
print('openai_llm:', type(openai_llm).__name__ if 'openai_llm' in globals() else 'NOT CREATED')
print('google_llm:', type(google_llm).__name__ if 'google_llm' in globals() and google_llm is not None else 'NOT CREATED or None')


openai_llm: ChatOpenAI
google_llm: ChatGoogleGenerativeAI


In [8]:
# 'tool' import removed (not available in this langchain version)
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyPDFLoader

pdf_loader_1 = PyPDFLoader(
    r"D:\langchainrAG\facebook-guide.pdf",
)

pdf_loader_2 = PyPDFLoader(
    r"D:\langchainrAG\gzip.pdf",
)

text_loader = TextLoader(
    r"D:\langchainrAG\coolie_english.txt", encoding="utf-8"
)

pdf_1_docs = pdf_loader_1.load()
pdf_2_docs = pdf_loader_2.load()
# Load text file with utf-8, fall back to latin-1 if necessary
text_docs = text_loader.load()

all_docs = pdf_1_docs + pdf_2_docs + text_docs

In [9]:
print(len(pdf_1_docs))
print(len(pdf_2_docs))
print(len(text_docs))

print(len(all_docs))

76
29
1
106


In [10]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=100
)

split_docs = text_splitter.split_documents(all_docs)

In [11]:
len(split_docs)

109

In [12]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(split_docs, emb)

  emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [13]:
results = await vectorstore.asimilarity_search("who is dahaa in coolie?")
code
#VSC-c24f24bd
python
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda


prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant"),
    ("human", """Always answer the question just by using the context provided and not from your knowledge.
,
     ("placeholder", "{agent_scratchpad}")
])


chain = {"context": retriever, "input": RunnablePassthrough()} | prompt | google_llm

# chain = {"context": RunnableLambda(lambda x: x["input"]) | retriever, "input": RunnableLambda(lambda x: x["input"])} | prompt | google_llm | StrOutputParser()

try:
    res = chain.invoke("What is gzip?")
except Exception as e:
    print("Primary Google LLM failed:", e)
    if 'openai_llm' in globals() and openai_llm is not None:
        print("Falling back to OpenAI LLM")
        chain2 = {"context": retriever, "input": RunnablePassthrough()} | prompt | openai_llm
        try:
            res = chain2.invoke("What is gzip?")
        except Exception as e2:
            print("Fallback OpenAI failed:", e2)
            res = f"Both LLM calls failed: {e2}"
    else:
        res = f"Google LLM failed and no OpenAI LLM available: {e}"

res


prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant"),
    ("human", """Always answer the question just by using the context provided and not from your knowledge.
        Context: {context}
        question: {input}
     
        Answer: 
     """),
     ("placeholder", "{agent_scratchpad}")
])


chain = {"context": retriever, "input": RunnablePassthrough()} | prompt | google_llm

# chain = {"context": RunnableLambda(lambda x: x["input"]) | retriever, "input": RunnableLambda(lambda x: x["input"])} | prompt | google_llm | StrOutputParser()

res = chain.invoke("What is gzip?")

res

SyntaxError: unterminated triple-quoted string literal (detected at line 57) (1936940894.py, line 46)