In [28]:
import os
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.question_answering import load_qa_chain

In [29]:
# Setting the environment path and autheticating the OpenAI API

os.environ["OPENAI_API_KEY"] = "Your API Keys"

In [30]:
# Loading the document

document = WikipediaLoader('Indian Primier League')
data = document.load()

In [31]:
data

[Document(page_content="The Kerala cricket team is a domestic cricket team based in the Indian state of Kerala. It is in the Elite Group of the Ranji Trophy, the premier first class cricket tournament in India. It was known as Travancore-Cochin cricket team until 1957/58.\nKerala has produced two Indian Test cricketers, Tinu Yohannan and S. Sreesanth. Sanju Samson has represented India in T20Is and ODIs, while Basil Thampi has a national call-up to his name. The team also lined up ex-Indian International player Sadagoppan Ramesh for two years from 2005 to 2007. Robin Uthappa, a former Indian International player currently plays for Kerala. Kerala has also produced Krishna Chandran, who plays at International level for United Arab Emirates.\n\n\n== Playing history ==\nKerala began competing in the 1957–58 Ranji Trophy, succeeding the Travancore-Cochin cricket team after the states were reorganized. It competed in the South Zone, against Madras/Tamil Nadu, Mysore/Karnataka, Andhra and Hy

In [32]:
# Now we need to split the document into chunks of text

text_splitter  = CharacterTextSplitter(chunk_size = 500)
chunkked_docs = text_splitter.split_documents(data)

Created a chunk of size 724, which is longer than the specified 500
Created a chunk of size 1660, which is longer than the specified 500


In [33]:
# verify the length of document array and chunked document array

print(len(data))
print(len(chunkked_docs))

1
7


In [36]:
# We need to embedd this chunked docs before storing to vector stores

embedding = OpenAIEmbeddings()

db_AI = Chroma.from_documents(chunkked_docs, embedding=embedding, persist_directory='./AI_docs')

In [37]:
db_AI.persist()

In [38]:
# Now we need to do the Question answering system.

from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import ChatOpenAI

In [39]:
chat_model = ChatOpenAI(temperature=0)

In [40]:
chain = load_qa_chain(llm=chat_model, chain_type='stuff')

In [41]:
question = "What is Indian Primier League"

In [42]:
db_connection = Chroma(embedding_function=embedding, persist_directory='./AI_docs')

In [43]:
chain_docs = db_connection.similarity_search(query=question)
chain_docs

[Document(page_content='The Kerala cricket team is a domestic cricket team based in the Indian state of Kerala. It is in the Elite Group of the Ranji Trophy, the premier first class cricket tournament in India. It was known as Travancore-Cochin cricket team until 1957/58.\nKerala has produced two Indian Test cricketers, Tinu Yohannan and S. Sreesanth. Sanju Samson has represented India in T20Is and ODIs, while Basil Thampi has a national call-up to his name. The team also lined up ex-Indian International player Sadagoppan Ramesh for two years from 2005 to 2007. Robin Uthappa, a former Indian International player currently plays for Kerala. Kerala has also produced Krishna Chandran, who plays at International level for United Arab Emirates.', metadata={'source': 'https://en.wikipedia.org/wiki/Kerala_cricket_team', 'summary': 'The Kerala cricket team is a domestic cricket team based in the Indian state of Kerala. It is in the Elite Group of the Ranji Trophy, the premier first class crick

In [44]:
result = chain.run(input_documents = chain_docs, question = question)
result

'The Indian Premier League (IPL) is a professional Twenty20 cricket league in India. It was founded by the Board of Control for Cricket in India (BCCI) in 2008. The IPL features franchise teams representing different cities and regions in India. It is considered one of the most popular and lucrative cricket leagues in the world, known for its high-intensity matches, star players, and entertainment value.'

In [45]:
chain_with_source =  load_qa_with_sources_chain(llm=chat_model, chain_type='stuff')

In [46]:
result = chain_with_source.run(input_documents=chain_docs, question=question)
result

'The Indian Premier League is a professional Twenty20 cricket league in India.\nSOURCES: https://en.wikipedia.org/wiki/Indian_Premier_League'