In [None]:
import os
LANGCHAIN_API_KEY = os.getenv(key="LANGCHAIN_API_KEY")
LANGCHAIN_ENDPOINT = os.getenv(key="LANGCHAIN_ENDPOINT")
LANGCHAIN_TRACING_V2 = os.getenv(key="LANGCHAIN_TRACING_V2")
LANGCHAIN_PROJECT = os.getenv(key="LANGCHAIN_PROJECT")


In [None]:
LANGCHAIN_PROJECT

In [None]:
from langchain_community.document_loaders import (
    CSVLoader,
    EverNoteLoader,
    PyMuPDFLoader,
    TextLoader,
    UnstructuredEmailLoader,
    UnstructuredEPubLoader,
    UnstructuredHTMLLoader,
    UnstructuredMarkdownLoader,
    UnstructuredODTLoader,
    UnstructuredPowerPointLoader,
    UnstructuredWordDocumentLoader,
)

In [1]:
import bs4
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.llms.ollama import Ollama
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [2]:
## Data ingestion
def data_ingestion():
    loader = WebBaseLoader(
        web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(
                class_=("post-content", "post-title", "post-header")
            )
        ),
    )
    docs = loader.load()
    # Split Data
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents=docs)
    return splits

In [3]:
def phi_llm():
    llm = Ollama(model="phi", temperature=0, timeout=300)
    return llm


def gemma_llm():
    llm = Ollama(model="gemma", temperature=0, timeout=300)
    return llm


def embed_llm():
    llm = OllamaEmbeddings(model="nomic-embed-text")
    return llm

def embed_llm1():
    llm = OllamaEmbeddings(model="all-minilm")
    return llm

def embed_hf_llm():
    llm = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return llm

In [4]:
# create a vector store
def create_vector_store(doc):
    vectordb = Chroma.from_documents(documents=doc, embedding=embed_llm(), persist_directory="chroma_index")
    vectordb.persist()
    return vectordb

In [5]:
def create_retriever():
    vectorstore = Chroma(persist_directory="chroma_index", embedding_function=embed_llm())
    retriever = vectorstore.as_retriever()
    return retriever

In [6]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [8]:
def get_llm_response(llm, retriever, query):
    prompt = hub.pull("rlm/rag-prompt")
    
#     rag_chain = (
#     {"context": retriever, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     |StrOutputParser()
# )
    rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
    )

    answer = rag_chain.invoke({"query": query})
    return answer

In [9]:
docs = data_ingestion()

In [10]:
print(len(docs))

66


In [11]:
print(docs[0])

page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\nMemory' 

In [12]:

vectordb = create_vector_store(docs)

In [13]:
vectordb.embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='nomic-embed-text', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [14]:
llm = phi_llm()
retriever = create_retriever()
user_question = "What is langchain?"

In [15]:
llm

Ollama(model='phi', temperature=0.0, timeout=300)

In [16]:
retriever.invoke(user_question)

[Document(page_content='Case Studies#\nScientific Discovery Agent#\nChemCrow (Bran et al. 2023) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in LangChain, reflects what was previously described in the ReAct and MRKLs and combines CoT reasoning with tools relevant to the tasks:\n\nThe LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output.\nIt is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - Thought, Action, Action Input, Observation.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),
 Document(page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\nThe system comprises of 4 stages:\n(1) Task planning: LLM w

In [17]:
answer = get_llm_response(llm, retriever, user_question)

In [18]:
answer

' "content": "You will get instructions for code to write.\\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\\nMake sure that every detail of the architecture is, in the end, implemented as code.\\n\\nThink step by step and reason yourself to the right decisions to make sure we get it right.\\nYou will first lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\\n\\nThen you will output the content of each file including ALL code.\\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\\nFILENAME is the lowercase file name including the file extension,\\nLANG is the markup code block language for the code\'s language, and CODE is the code:\\n\\nFILENAME\\n```LANG\\nCODE\\n```\\n\\nPlease note that the code should be fully functional. No placeholders.\\n\\nYou will start with the \\"e

In [None]:
answer

In [None]:
# from langchain_community.vectorstores import Weaviate
# import weaviate

# docs = data_ingestion()
# vectordb = Weaviate.from_documents(documents=docs, embedding=embed_llm())

# from weaviate.embedded import EmbeddedOptions

# client = weaviate.Client(embedded_options=embed_llm())

In [None]:
from langchain.schema.messages import HumanMessage, SystemMessage
from langchain_community.chat_models import ChatOllama

In [None]:
messages = [
  SystemMessage(
   content="""You're an assistant knowledgeable about
   healthcare. Only answer healthcare-related questions."""
  ),
  HumanMessage(content="How do i change car tire?"),
 ]

In [None]:
llm = phi_llm()

In [None]:

llm.invoke(messages)

In [None]:
llm = phi_llm()
ai_msg = llm.invoke(
    "I have a pandas DataFrame 'df' with columns 'Age' and 'Fare'. Write code to compute the correlation between the two columns. Return Markdown for a Python code snippet and nothing else."
)


In [None]:
print(ai_msg)

In [None]:
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

In [None]:
df = pd.read_csv(r"D:\Gen_AI_Tutorials\langchain\Rag_From_Scratch\advance_rag_project\data\csv_files\Titanic.csv")

In [None]:
df.head()

We can combine this ability with a Python-executing tool to create a simple data analysis chain. We’ll first want to load our CSV table as a dataframe, and give the tool access to this dataframe:

In [None]:
tool = PythonAstREPLTool(locals={"df": df})
tool.invoke("df['Fare'].mean()")

To help enforce proper use of our Python tool, we’ll using function calling:

In [None]:
llm_with_tools = llm.bind(tools=tool)

In [None]:
llm

In [None]:
llm_with_tools

In [None]:
llm_with_tools.invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Age' and 'Fare' columns"
)

In [None]:
from langchain_experimental.agents import create_pandas_dataframe_agent

In [None]:
agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=True)

In [None]:
agent.invoke(
    {
        "input": "What's the correlation between age and fare? is that greater than the correlation between fare and survival?"
    }
)