In [1]:
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings

In [70]:
def file_loader(path):
  loader = DirectoryLoader(
    path, glob="*.pdf", loader_cls=PyPDFLoader
  )
  documents = loader.load()
  return documents


In [86]:
extracted_docs = file_loader(r'Data/')

In [73]:
def chunking_data(data):
  split_data = RecursiveCharacterTextSplitter(chunk_size= 500, chunk_overlap = 50)
  chunk_data = split_data.split_documents(data)
  return chunk_data

In [87]:
chunk_data = chunking_data(extracted_docs)
len(chunk_data)

2120

In [75]:
def get_embedding():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [88]:
embedding = get_embedding()
# len(embedding.embed_query("samanwaya ghosh"))

In [65]:
len(embedding.embed_query("samanwaya ghosh"))

384

In [77]:
from dotenv import load_dotenv
import os
load_dotenv()


PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [68]:
from langchain_pinecone import PineconeVectorStore
from pinecone import ServerlessSpec
from pinecone.grpc import PineconeGRPC as pinecone

pc = pinecone(PINECONE_API_KEY)

index_name = "mlragchatbot"

pc.create_index(name=index_name,
                dimension=384,
                metric="cosine",
                spec=ServerlessSpec(cloud="aws", region="us-east-1"))

In [89]:
store_data = PineconeVectorStore.from_documents(
    documents=chunk_data, 
    embedding=embedding, 
    index_name=index_name
)

In [90]:
docs = PineconeVectorStore.from_existing_index(embedding=embedding, index_name=index_name)
docs

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x28342e4d0d0>

In [94]:
retriver = docs.as_retriever(search_type="similarity", search_kwargs={"k": 3})
output = retriver.invoke("What is Supervised Machine Learning")
output

[Document(id='fdc44d8e-b701-4712-a2cc-c89a96397242', metadata={'page': 17.0, 'page_label': '18', 'source': 'Data\\100-Machine-Learning-Interview-Questions-and-Answers.pdf'}, page_content='75. Explain The Types Of Supervised Learning?\nSupervised learning is of two types, namely,\n1. Regression: It is a kind of Supervised Learning that learns from the given  LabelledDatasets, and then it is able to predict the continuous-valued output for the new data thatis given to the algorithm. It is used in cases where an output requirement is a numberlike money or height etc. Some popular Supervised Learning algorithms are LinearRegression, Logistic Regression.'),
 Document(id='1dd2cb51-995d-4a49-a07a-229c9e43b94f', metadata={'page': 11.0, 'page_label': '12', 'source': 'Data\\Hands-On-Machine-Learning-new (1).pdf'}, page_content='Figure 1-5. A labeled training set for supervised learning (e.g., spam classifica-\ntion) 8 | Chapter 1: The Machine Learning Landscape\nA typical supervised learning tas

In [99]:
import os
GROQ_API_KEY= os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
llm = ChatGroq(temperature=0.6, max_tokens=500)

In [96]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

In [97]:
system_prompt = (
"You are an expert Data Scientist assistat of qestion-answering tasks."
"Use the following pieces of retrieved context to answer "
"the question. If you don't find any related context then say that you "
"don't know. Do not give any halusinating answer of this. Use the three sentece maximum and keep the "
"answer concise."
"\n\n"
"{context}"
 )

chat_prompt = ChatPromptTemplate.from_messages([
  ("system", system_prompt),
  ("user", "{input}" )]
)

In [101]:
stuff_chain =create_stuff_documents_chain(llm, chat_prompt)
retriver_chain = create_retrieval_chain(retriver, stuff_chain)
question = "What is Tesorflow?"
response_dict = retriver_chain.invoke({"input" : question})
# response = response_dict["answer"] if isinstance(response_dict, dict) else str(response_dict)
response = response_dict['answer']
response

'TensorFlow is a machine learning library offering GPU support, distributed computing, and a just-in-time compiler for optimization. It is used in various domains such as natural language processing, recommender systems, and time series forecasting. It provides a NumPy-like core. TensorFlow includes both clustering algorithms and threshold logic units for classification or regression tasks.'

In [102]:
from langchain_core.messages import HumanMessage, AIMessage
chat_history = []
chat_history.extend([
  HumanMessage(content=question),
  AIMessage(content=response)
])

In [103]:
chat_history

[HumanMessage(content='What is Tesorflow?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='TensorFlow is a machine learning library offering GPU support, distributed computing, and a just-in-time compiler for optimization. It is used in various domains such as natural language processing, recommender systems, and time series forecasting. It provides a NumPy-like core. TensorFlow includes both clustering algorithms and threshold logic units for classification or regression tasks.', additional_kwargs={}, response_metadata={})]

In [104]:
from langchain_core.prompts import MessagesPlaceholder
contextualize_system_prompt = (
  "Given a chat history and latest user question "
  "which might reference context in the chat history, "
  "formulates a standalone question which can be understood "
  "without the chat history. Do not answer the question, "
  "just reformulate it if needed otherwise retuen as it is"
)

contextualize_prompt = ChatPromptTemplate.from_messages([
  ("system", contextualize_system_prompt),
  MessagesPlaceholder(variable_name="chat_history"),
  ("user", "{input}"),
  ("system", "Context: {context}")
])

In [105]:
from langchain.chains import create_history_aware_retriever

create_history_retrieve_chain = create_history_aware_retriever(llm, retriver, contextualize_prompt)

In [107]:
history_stuff_chain = create_stuff_documents_chain(llm, contextualize_prompt)
history_retrieval_chain = create_retrieval_chain(create_history_retrieve_chain, history_stuff_chain)
ans = history_retrieval_chain.invoke({"input" :"What is the modele that I need to learn for this?", "chat_history" : chat_history, "context": ""})
print(ans['answer'])

To get started with TensorFlow, you can begin by learning the fundamentals of machine learning and deep learning models such as:

1. Linear regression
2. Logistic regression
3. Neural networks
4. Convolutional Neural Networks (CNNs)
5. Recurrent Neural Networks (RNNs)
6. Long Short-Term Memory (LSTM) networks

These models cover the basics and are commonly used in various applications. TensorFlow provides production-ready implementations of these models, allowing you to easily apply them to solve real-world problems.
