In [1]:
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import CharacterTextSplitter
os.environ['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'

embedding_model=OllamaEmbeddings(model='nomic-embed-text')

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
urls = ["https://www.tcs.com/"]
# "https://ollama.com/",

In [3]:
def load_data(urls, model):
    model = ChatOllama(model = model)
    docs = [WebBaseLoader(url).load() for url in urls]
    docs_list = [items for sublist in docs for items in sublist]
    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 10)
    doc_split = text_splitter.split_documents(docs_list)  
    return doc_split

In [4]:
# print(len(doc_split))

# print(embedding_model.embed_query("how are you doing today?"))


In [4]:
from langchain.vectorstores import FAISS
def vector_db(doc_split):    
    # doc_embeddings = embedding_model.embed_documents([doc.page_content for doc in doc_split])
    faiss_index = FAISS.from_documents(doc_split, embedding_model)
    retriever = faiss_index.as_retriever()
    return retriever

In [5]:
model_name = "llama3"
model = ChatOllama(model=model_name)
doc_split = load_data(urls, model_name)
retriever = vector_db(doc_split)

In [9]:
before_rag = "what is {topic}"
before_rag_prompt = ChatPromptTemplate.from_template(before_rag)
before_rag_chain = before_rag_prompt | model | StrOutputParser()
print(before_rag_chain.invoke({"topic": "TCS"}))

TCS can refer to several things, depending on the context:

1. **Tata Consultancy Services**: TCS is an Indian multinational information technology consulting company that provides a range of services including IT consulting, software development, and business process outsourcing.
2. **Transaction Control Statement** (TCS): In accounting and finance, a TCS is a document or statement that details the transactions involved in a specific financial transaction, such as a merger or acquisition.
3. **Time-Critical Systems**: TCS can also refer to systems or networks that require real-time processing and response times, often used in applications such as trading platforms, medical devices, or control systems.
4. **Tropical Cyclone Scale** (TCS): In meteorology, the Tropical Cyclone Scale is a classification system for tropical cyclones (hurricanes, typhoons, etc.) based on their wind speed and potential damage.

Without more context, it's difficult to determine which definition is most releva

In [6]:
after_rag_template = """{context}
Question: {question}"""
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template)
after_rag_chain = ({"context": retriever, "question":RunnablePassthrough()} | after_rag_prompt | model|StrOutputParser())
print(after_rag_chain.invoke("TCS"))

Based on the provided document metadata and content, here are some key points about Tata Consultancy Services (TCS):

1. **Source**: The source of this information is https://www.tcs.com/.
2. **Title**: The title of the website is "T TwinX™\n\nTCS MasterCraft™\n\nJile™\n\nResearch & Innovation\n\n\nexpand here\n\n\nTCS Research\n\nTCS Pace™\n\nOverviewPress tab for submenu items"
3. **Description**: The description of TCS is: "TCS, a global leader in IT services, consulting, and business solutions, leverages technology for business transformation and helps catalyze change."
4. **Language**: The language used on the website is English (en).
5. **Content**: The content on the website includes:
	* News alerts
	* Recent news articles (e.g., "Follett Higher Education Selects TCS to Build a Future-Ready, Cloud-Based IT Infrastructure")
	* Analyst recognition (e.g., "TCS Positioned as a Leader in Healthcare Payer Digital Services by Everest Group")
	* Media kit and overview
	* Management comm