In [54]:
# ! pip install langchain_community
# ! pip install pypdf
# ! pip install bs4 requests

In [7]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableSequence


import os

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['HF_API_KEY'] = os.getenv('HF_API_KEY')



from langchain_community.document_loaders import TextLoader


In [8]:
loader  = TextLoader("data/cricket.txt", encoding="utf-8")

In [11]:
docs = loader.load()

In [20]:
print(len(docs))

print(type(docs[0]))

1
<class 'langchain_core.documents.base.Document'>


In [23]:
docs[0].page_content

'On the pitch where dreams take flight,\nCricket’s spirit shines in golden light.\nThe leather ball arcs through the air,\nChasing glory, chasing dare.\nBats meet ball in rhythmic sound,\nWhere passion and precision are found.\nUnder skies of endless blue,\nEvery stroke feels fresh and new.\nThe fielders sprint with hearts aflame,\nPursuing victory, earning their name.\nIn the silence before the bowler’s run,\nHope glistens like the morning sun.\nBoundaries drawn by will and might,\nEach shot a story, pure delight.\nCrowds rise in cheer from distant stands,\nUnited by dreams in countless lands.\nThe umpire’s call, a measured tone,\nBalances justice, sets the tone.\nField boundaries curve like art,\nA canvas where courage takes part.\nEvery run, every catch, every play,\nWeaves the fabric of a glorious day.\nIn cricket’s dance, time finds its pace,\nThrough swift deliveries and a steady grace.\nThe bat’s arc meets fate’s decree,\nUniting hearts in shared ecstasy.\nA bowler’s spin, a fie

In [24]:
docs[0].metadata

{'source': 'data/cricket.txt'}

In [25]:
model = ChatOpenAI(
    model="gpt-4",
    temperature=0,
    openai_api_key=os.getenv('OPENAI_API_KEY'),
    streaming=True,
    verbose=True
)

In [26]:
prompt = PromptTemplate(
    template="Summarize the poem :\n{input}",
    input_variables=["input"]

)

parser = StrOutputParser()

In [29]:
chain = prompt | model | parser
chain.invoke({"input":docs[0].page_content})

"The poem is a tribute to the sport of cricket, celebrating its spirit, passion, and the unity it brings. It describes the thrill of the game, from the flight of the ball to the crack of the bat, and the anticipation before the bowler's run. The poem highlights the precision, strategy, and raw desire involved in the game, and how it ignites the crowd's excitement. It also emphasizes the unity that cricket brings, uniting strangers and honoring legends. The poem portrays cricket as a timeless dance, a poem of life, and a theater where dreams are lived and never die. It concludes by stating that cricket is a soaring ode to what life can bring."

In [32]:
from langchain_community.document_loaders import PyPDFLoader

In [37]:
loader = PyPDFLoader("data/Sathwik_Bollepalli_Data_Analyst.pdf")

In [38]:
document = loader.load()

In [43]:
document[0].metadata

{'producer': 'Microsoft® Word 2019',
 'creator': 'Microsoft® Word 2019',
 'creationdate': '2025-03-29T15:57:12-04:00',
 'author': 'Sathwik Bollepalli',
 'moddate': '2025-03-29T15:57:12-04:00',
 'source': 'data/Sathwik_Bollepalli_Data_Analyst.pdf',
 'total_pages': 1,
 'page': 0,
 'page_label': '1'}

In [42]:
print(document[0].page_content)

Sathwik Bollepalli 
860-781-1558 | sathwik238@gmail.com | LinkedIn | Portfolio | GitHub 
 
SUMMARY 
Data Analyst with a strong foundation in data visualization, statistical analysis, and business intelligence. Proven track re cord of 
improving model accuracy by 15% and reducing data processing time by 70%. Skilled in Python, SQL, and Tableau, with a focus  on 
uncovering insights that support data-driven decisions and improve business performance. 
 
PROFESSIONAL EXPERIENCE 
Plymouth Rock Assurance - Boston, USA                                                                                     
Data Scientist                                                                                                                                           May 2024 – December 2024 
• Analyzed neighborhood-level property loss data and developed over 800 derived features to support risk segmentation and pricing 
analysis for home insurance. Built an XGBoost model to enhance risk segmentation and i

In [46]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader


In [49]:
loader = DirectoryLoader("data", glob="*.pdf", loader_cls=PyPDFLoader)

documents = loader.load() # laztload() if many documents
print(len(documents))

411


In [50]:
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [51]:
url = "https://www.flipkart.com/fastrack-optimus-pro-1-43-amoled-display-aod-466x466-functional-crown-bt-calling-smartwatch/p/itma4744c9053b72?pid=SMWGV3ZY9YJYEYEC&lid=LSTSMWGV3ZY9YJYEYECZN6QCW&marketplace=FLIPKART&store=ajy%2Fbuh&srno=b_1_3&otracker=browse&fm=organic&iid=b1aa2713-f664-4624-8205-3e4086973431.SMWGV3ZY9YJYEYEC.SEARCH&ppt=browse&ppn=browse&ssid=9o4vls2d400000001743300911351"
loader = WebBaseLoader(url)

In [55]:
docs = loader.load()
print(docs[0].page_content)

  Fastrack Optimus Pro with 1.43 AMOLED Display & AOD(466x466),Functional Crown,BT Calling Smartwatch Price in India - Buy Fastrack Optimus Pro with 1.43 AMOLED Display & AOD(466x466),Functional Crown,BT Calling Smartwatch online at Flipkart.com        Explore PlusLoginBecome a Seller More CartAdd to cart Buy NowHomeWearable Smart DevicesSmart WatchesFastrack Smart WatchesFastrack Optimus Pro with 1.43 AMOLED Display & AOD(466x466),Functional Crown,BT Calling Smartwatch (Blue Strap, Free Size)
CompareShareFastrack Optimus Pro with 1.43 AMOLED Display & AOD(466x466),Functional Crown,BT Calling Smartwatch  (Blue Strap, Free Size)4.331,364 Ratings & 2,006 ReviewsSpecial price₹2,299₹5,99561% offi+ ₹9 Protect Promise Fee Learn moreAvailable offersBank Offer5% Unlimited Cashback on Flipkart Axis Bank Credit CardT&CBank Offer10% instant discount on SBI Credit Card EMI Transactions, up to ₹1,500 on orders of ₹5,000 and aboveT&CBank Offer10% off up to ₹1000 on HDFC Bank Credit Card EMI on 6 mon

In [56]:
prompt = PromptTemplate(
    template="Answer the following question {question} from the following text :\n{text}",
    input_variables=["text", "question"]
)

In [57]:
chain = prompt | model | parser
chain.invoke({"text":docs[0].page_content, "question":"What is the price of the watch?"})

'The price of the watch is ₹2,299.'

In [58]:
from langchain_community.document_loaders import CSVLoader

In [69]:
loader = CSVLoader("data/train.csv")

In [70]:
docs = loader.load()
print(len(docs))

381109


In [73]:
print(docs[0].page_content)

id: 1
Gender: Male
Age: 44
Driving_License: 1
Region_Code: 28.0
Previously_Insured: 0
Vehicle_Age: > 2 Years
Vehicle_Damage: Yes
Annual_Premium: 40454.0
Policy_Sales_Channel: 26.0
Vintage: 217
Response: 1
