In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")

In [4]:
# Data ingestion 
# internally WebBaseLoader use beautifulsoap4 to scrape the web page.
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.cricbuzz.com/live-cricket-scorecard/105762/eng-vs-ind-1st-test-india-tour-of-england-2025")
data = loader.load()
data

[Document(metadata={'source': 'https://www.cricbuzz.com/live-cricket-scorecard/105762/eng-vs-ind-1st-test-india-tour-of-england-2025', 'title': 'Cricket scorecard - England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com', 'description': 'Catch live and fully detailed scorecard of England vs India, 1st Test, Jun 20, India tour of England, 2025 on Cricbuzz', 'language': 'en'}, page_content="\n Cricket scorecard - England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com  ✖Live ScoresScheduleArchivesNewsAll Stories  Premium Editorials Latest NewsTopicsSpotlightOpinionsSpecialsStats & AnalysisInterviewsLive BlogsHarsha BhogleSeries  India tour of England, 2025 Major League Cricket 2025 Bangladesh tour of Sri Lanka, 2025 Australia tour of West Indies, 2025 ICC World Test Championship Final 2025 T20 Blast 2025 South Africa tour of Zimbabwe, 2025 West Indies tour of Ireland, 2025 India Women tour of England, 2025 Tamil Nadu Premier League 2025 All Series »Teams   

In [6]:
# divide the document into chunks 

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(data)
documents

[Document(metadata={'source': 'https://www.cricbuzz.com/live-cricket-scorecard/105762/eng-vs-ind-1st-test-india-tour-of-england-2025', 'title': 'Cricket scorecard - England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com', 'description': 'Catch live and fully detailed scorecard of England vs India, 1st Test, Jun 20, India tour of England, 2025 on Cricbuzz', 'language': 'en'}, page_content='Cricket scorecard - England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com  ✖Live ScoresScheduleArchivesNewsAll Stories  Premium Editorials Latest NewsTopicsSpotlightOpinionsSpecialsStats & AnalysisInterviewsLive BlogsHarsha BhogleSeries  India tour of England, 2025 Major League Cricket 2025 Bangladesh tour of Sri Lanka, 2025 Australia tour of West Indies, 2025 ICC World Test Championship Final 2025 T20 Blast 2025 South Africa tour of Zimbabwe, 2025 West Indies tour of Ireland, 2025 India Women tour of England, 2025 Tamil Nadu Premier League 2025 All Series »Teams   Tes

In [9]:
# embedding the documents 

from langchain_openai import OpenAIEmbeddings
embeddings= OpenAIEmbeddings()


In [10]:
# vectorstores 
from langchain.vectorstores import FAISS
vectorstoreDB = FAISS.from_documents(documents, embeddings)

In [23]:
# Query from a vector store db 

query = "What is the score of the match between india and england?"
# similarity search
results = vectorstoreDB.similarity_search(query)
results[0].page_content


'Cricket scorecard - England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com  ✖Live ScoresScheduleArchivesNewsAll Stories  Premium Editorials Latest NewsTopicsSpotlightOpinionsSpecialsStats & AnalysisInterviewsLive BlogsHarsha BhogleSeries  India tour of England, 2025 Major League Cricket 2025 Bangladesh tour of Sri Lanka, 2025 Australia tour of West Indies, 2025 ICC World Test Championship Final 2025 T20 Blast 2025 South Africa tour of Zimbabwe, 2025 West Indies tour of Ireland, 2025 India Women tour of England, 2025 Tamil Nadu Premier League 2025 All Series »Teams   Test Teams India Afghanistan Ireland Pakistan Australia Sri Lanka Bangladesh England West Indies South Africa Zimbabwe New Zealand   Associate Malaysia Nepal Germany Namibia Denmark Singapore Papua New Guinea Kuwait Vanuatu Jersey Oman Fiji   More... Videos  All Videos Categories Playlists   RankingsICC Rankings - MenICC Rankings - Women More World Test Championship World Cup Super League Photos Mobile'

In [13]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")

In [14]:
# retrieval chain and document chain 
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate([
    """
    Answer the following question based on the context below.
    <context>
    {context}
    </context>
    """
])

document_chain = create_stuff_documents_chain(llm, prompt) 
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based on the context below.\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002ECFDC7FDC0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002ECFDEFF580>, root_client=<openai.OpenAI object at 0x000002ECFDC7F820>, root_async_client=<openai.AsyncOpenAI object at 0x000002ECFDEFF280>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={

In [27]:
query = "What is the score of the match between india and england?"
from langchain_core.documents import Document

result = document_chain.invoke({
                       "input": query,
                          "context": [Document(page_content =  "England vs India, 1st Test, India tour of England, 2025 | Cricbuzz.com"  )]
                       })

result

'Sure, what would you like to know about the England vs India 1st Test match from the India tour of England in 2025?'

In [None]:
# retriever 
