In [35]:
import os
import streamlit as st
import pickle
import time
from dotenv import load_dotenv
import langchain
from langchain import LLMChain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

import google.generativeai as genai
from google.generativeai import GenerativeModel
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

Loading data

In [3]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])
data = loaders.load() 
len(data)

2

Splitting into chunks

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

docs = text_splitter.split_documents(data)

In [7]:
len(docs)

15

In [8]:
docs[0]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nGo PRO @₹99 PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nOptions FestWebinar\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topics\n\nSensex TodaySBI Card

Create embeddings using FAISS 

In [12]:
load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')

In [16]:
genai.configure()

In [17]:
text = "Hello world"
result = genai.embed_content(model="models/text-embedding-004", content=text)

# Print just a part of the embedding to keep the output manageable
print(str(result['embedding'])[:50], '... TRIMMED]')

[0.013168517, -0.00871193, -0.046782672, 0.0006996 ... TRIMMED]


In [22]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector_store = FAISS.from_documents(docs, embeddings)

vector_store.save_local("faiss_index")

reading faiss index

In [23]:
file_path = 'faiss_index\index.pkl'
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

In [24]:
vectorIndex

(<langchain_community.docstore.in_memory.InMemoryDocstore at 0x1ab5d1827d0>,
 {0: '85260b49-5c88-4e8f-9450-c1bc3bfeafe9',
  1: '26d8f1aa-bab0-4208-8e54-429a863366fa',
  2: 'b1aea8e2-f1dc-4f83-b623-e4920e74e5ae',
  3: 'eeca8d75-2b8b-44eb-895d-99434a892777',
  4: '8808374e-6cca-4500-9f1b-15ab2bb31894',
  5: '05ede73f-5edc-4a0b-9bef-9f925087b530',
  6: '380f8b81-8751-4517-b1c1-cecaf6efe2ea',
  7: '14e60618-ab9a-4e40-8da7-a1d8fe5c075d',
  8: '131b1162-3813-4496-a459-d62bfa94bc16',
  9: 'f5e7ab2d-2e08-4b0a-a697-48650f962671',
  10: '9061c177-9384-404b-8c15-a7efb1db2e1c',
  11: '22f03130-8133-4398-9442-c368ff160b10',
  12: '3de15537-2318-4d08-b8a2-169926f2ea2d',
  13: 'd1540233-4bab-4289-8770-710e848d652d',
  14: 'c4f17704-ca3f-4321-9afb-edc3ee2ebdf3'})

In [None]:
api_key = os.getenv('GOOGLE_API_KEY')

AIzaSyAbEAeAuvdvApvNOG8FptBTKTj28YdCu-o


In [28]:
from openai import OpenAI


client = OpenAI(
    api_key=api_key,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

In [29]:
models = client.models.list()
for model in models:
  if 'gemini' in model.id:
    print(model.id)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-dark-launch
models/gemini-test-23
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-thinking-001
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-tool-test
models/ge

In [30]:
MODEL="gemini-1.5-flash"

In [36]:
system_message = "You are a RAG LLM who searches the given documents for answers to questions. Do not provide answers that are not supported by the documents."
user_prompt = "what is the price of Tiago iCNG?"

In [40]:
llm = GenerativeModel(
    model_name='gemini-1.5-flash',
    model_kwargs={'system_instruction': system_message}  # Pass system_instruction within model_kwargs
)

TypeError: GenerativeModel.__init__() got an unexpected keyword argument 'model_kwargs'

In [41]:
qa_chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm, 
    retriever=vector_store.as_retriever()
)

ValidationError: 2 validation errors for LLMChain
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=genai.GenerativeModel(
  ...   cached_content=None
), input_type=GenerativeModel]
    For further information visit https://errors.pydantic.dev/2.10/v/is_instance_of
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=genai.GenerativeModel(
  ...   cached_content=None
), input_type=GenerativeModel]
    For further information visit https://errors.pydantic.dev/2.10/v/is_instance_of

In [43]:
from google.generativeai import GenerativeModel
from langchain.chains import RetrievalQAWithSourcesChain

# Assuming vector_store is your vector store (e.g., FAISS, Pinecone, etc.)

# Define the system message you want to use
system_message = "You are a helpful assistant."

# Initialize the Google Gemini model with the correct format
llm = GenerativeModel(
    model_name='gemini-1.5-flash',
    system_instruction=system_message  # Pass system_instruction directly
)

# Initialize the retrieval-based question-answering chain
qa_chain = RetrievalQAWithSourcesChain.from_llm(
    llm=llm, 
    retriever=vector_store.as_retriever()
)

# Example usage of the chain
query = "What is the capital of France?"
result = qa_chain.run(query)

print(result)  # The result will contain the answer along with the sources


ValidationError: 2 validation errors for LLMChain
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=genai.GenerativeModel(
  ...   cached_content=None
), input_type=GenerativeModel]
    For further information visit https://errors.pydantic.dev/2.10/v/is_instance_of
llm.is-instance[Runnable]
  Input should be an instance of Runnable [type=is_instance_of, input_value=genai.GenerativeModel(
  ...   cached_content=None
), input_type=GenerativeModel]
    For further information visit https://errors.pydantic.dev/2.10/v/is_instance_of