langchain 0.2.3 quickstart

manufy · Jun 9, 2024 · 2a29e93 · 2a29e93
1 parent 6c9edbe
commit 2a29e93
Show file tree

Hide file tree

Showing 12 changed files with 1,246 additions and 0 deletions.
diff --git a/4-LangChain-0.2.3/QuickStart/Agents/Agent.ipynb b/4-LangChain-0.2.3/QuickStart/Agents/Agent.ipynb
diff --git a/4-LangChain-0.2.3/QuickStart/Chains/ConversationRetrievalChain.ipynb b/4-LangChain-0.2.3/QuickStart/Chains/ConversationRetrievalChain.ipynb
diff --git a/4-LangChain-0.2.3/QuickStart/Chains/LLMChain.ipynb b/4-LangChain-0.2.3/QuickStart/Chains/LLMChain.ipynb
@@ -0,0 +1,393 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Package                  Version\n",
+      "------------------------ ---------\n",
+      "aiohttp                  3.9.5\n",
+      "aiosignal                1.3.1\n",
+      "annotated-types          0.7.0\n",
+      "anyio                    4.4.0\n",
+      "appnope                  0.1.4\n",
+      "asttokens                2.4.1\n",
+      "async-timeout            4.0.3\n",
+      "attrs                    23.2.0\n",
+      "Brotli                   1.0.9\n",
+      "certifi                  2024.6.2\n",
+      "charset-normalizer       3.3.2\n",
+      "comm                     0.2.2\n",
+      "dataclasses-json         0.6.6\n",
+      "debugpy                  1.6.7\n",
+      "decorator                5.1.1\n",
+      "distro                   1.9.0\n",
+      "exceptiongroup           1.2.0\n",
+      "executing                2.0.1\n",
+      "frozenlist               1.4.0\n",
+      "greenlet                 3.0.1\n",
+      "h11                      0.14.0\n",
+      "httpcore                 1.0.5\n",
+      "httpx                    0.27.0\n",
+      "idna                     3.7\n",
+      "importlib_metadata       7.1.0\n",
+      "ipykernel                6.29.4\n",
+      "ipython                  8.25.0\n",
+      "jedi                     0.19.1\n",
+      "jsonpatch                1.33\n",
+      "jsonpointer              2.0\n",
+      "jupyter_client           8.6.2\n",
+      "jupyter_core             5.5.0\n",
+      "langchain                0.2.3\n",
+      "langchain-community      0.2.3\n",
+      "langchain-core           0.2.5\n",
+      "langchain-openai         0.1.8\n",
+      "langchain-text-splitters 0.2.1\n",
+      "langsmith                0.1.75\n",
+      "marshmallow              3.21.3\n",
+      "matplotlib-inline        0.1.7\n",
+      "multidict                6.0.4\n",
+      "mypy-extensions          1.0.0\n",
+      "nest_asyncio             1.6.0\n",
+      "numpy                    1.26.4\n",
+      "openai                   1.33.0\n",
+      "orjson                   3.9.15\n",
+      "packaging                23.2\n",
+      "parso                    0.8.4\n",
+      "pexpect                  4.9.0\n",
+      "pickleshare              0.7.5\n",
+      "pip                      24.0\n",
+      "platformdirs             4.2.2\n",
+      "prompt_toolkit           3.0.46\n",
+      "psutil                   5.9.0\n",
+      "ptyprocess               0.7.0\n",
+      "pure-eval                0.2.2\n",
+      "pydantic                 1.10.12\n",
+      "pydantic_core            2.18.4\n",
+      "Pygments                 2.18.0\n",
+      "PySocks                  1.7.1\n",
+      "python-dateutil          2.9.0\n",
+      "PyYAML                   6.0.1\n",
+      "pyzmq                    25.1.2\n",
+      "regex                    2024.5.15\n",
+      "requests                 2.32.3\n",
+      "setuptools               69.5.1\n",
+      "six                      1.16.0\n",
+      "sniffio                  1.3.1\n",
+      "SQLAlchemy               2.0.25\n",
+      "stack-data               0.6.2\n",
+      "tenacity                 8.3.0\n",
+      "tiktoken                 0.7.0\n",
+      "tornado                  6.3.3\n",
+      "tqdm                     4.66.4\n",
+      "traitlets                5.14.3\n",
+      "typing_extensions        4.12.2\n",
+      "typing-inspect           0.9.0\n",
+      "urllib3                  2.2.1\n",
+      "wcwidth                  0.2.13\n",
+      "wheel                    0.43.0\n",
+      "yarl                     1.9.3\n",
+      "zipp                     3.18.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Langsmith can help with testing by providing automated testing tools and frameworks that can be used to test code written in various programming languages. These tools can help developers perform unit tests, integration tests, and end-to-end tests to ensure that their code functions correctly and meets the requirements. Langsmith can also provide resources and guidance on best practices for testing, as well as support for continuous integration and deployment processes to streamline the testing workflow. Additionally, Langsmith can assist in setting up and maintaining testing environments and infrastructure to support the testing process.', response_metadata={'token_usage': {'completion_tokens': 106, 'prompt_tokens': 15, 'total_tokens': 121}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-39933902-c75e-42bf-aa6f-6f01211bcace-0', usage_metadata={'input_tokens': 15, 'output_tokens': 106, 'total_tokens': 121})"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm.invoke(\"how can langsmith help with testing?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\"Langsmith can help with testing by providing a platform for automated testing of software applications. The tool allows users to create and run test cases, generate test data, and analyze test results. Langsmith's automation capabilities can save time and effort in testing, ensure more consistent and thorough testing coverage, and help identify issues early in the development process. Additionally, Langsmith can integrate with popular testing frameworks and tools, making it easier to incorporate automated testing into the software development workflow.\", response_metadata={'token_usage': {'completion_tokens': 94, 'prompt_tokens': 28, 'total_tokens': 122}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-cbffe71a-7b08-477a-8f24-7b237ebf528f-0', usage_metadata={'input_tokens': 28, 'output_tokens': 94, 'total_tokens': 122})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "prompt = ChatPromptTemplate.from_messages([\n",
+    "    (\"system\", \"You are a world class technical documentation writer.\"),\n",
+    "    (\"user\", \"{input}\")\n",
+    "])\n",
+    "chain = prompt | llm \n",
+    "chain.invoke({\"input\": \"how can langsmith help with testing?\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The output of a ChatModel (and therefore, of this chain) is a message. However, it's often much more convenient to work with strings. Let's add a simple output parser to convert the chat message to a string."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Langsmith can help with testing in several ways:\\n\\n1. Automated Testing: Langsmith can be used to generate test data for automated testing. By creating realistic and diverse test data sets, Langsmith can help ensure comprehensive test coverage and identify potential edge cases.\\n\\n2. Performance Testing: Langsmith can be used to generate large volumes of data to simulate real-world scenarios for performance testing. By providing a variety of data types and structures, Langsmith can help identify performance bottlenecks and optimize system performance.\\n\\n3. Load Testing: Langsmith can generate synthetic load on a system by creating multiple instances of data sets. This can help evaluate system scalability, response times, and overall performance under heavy loads.\\n\\n4. Data Validation: Langsmith can be used to validate the accuracy and integrity of data by generating test data sets that cover a wide range of input variations. This can help identify data inconsistencies, errors, and anomalies in the system.\\n\\n5. Regression Testing: Langsmith can help with regression testing by generating test data sets that cover both new and existing functionalities. By automating the generation of test data, Langsmith can help streamline the regression testing process and ensure that new changes do not introduce unexpected issues.\\n\\nOverall, Langsmith can play a crucial role in enhancing the efficiency and effectiveness of testing processes by providing reliable and customizable test data sets for various types of testing scenarios.'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "\n",
+    "output_parser = StrOutputParser()\n",
+    "chain = prompt | llm | output_parser\n",
+    "chain.invoke({\"input\": \"how can langsmith help with testing?\"})\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Retrieval Chain\n",
+    "\n",
+    "To properly answer the original question (\"how can langsmith help with testing?\"), we need to provide additional context to the LLM. We can do this via retrieval. Retrieval is useful when you have too much data to pass to the LLM directly. You can then use a retriever to fetch only the most relevant pieces and pass those in.\n",
+    "\n",
+    "In this process, we will look up relevant documents from a Retriever and then pass them into the prompt. A Retriever can be backed by anything - a SQL table, the internet, etc - but in this instance we will populate a vector store and use that as a retriever. For more information on vectorstores, see this documentation.\n",
+    "\n",
+    "First, we need to load the data that we want to index. To do this, we will use the WebBaseLoader. This requires installing BeautifulSoup:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting beautifulsoup4\n",
+      "  Using cached beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)\n",
+      "Collecting soupsieve>1.2 (from beautifulsoup4)\n",
+      "  Using cached soupsieve-2.5-py3-none-any.whl.metadata (4.7 kB)\n",
+      "Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)\n",
+      "Using cached soupsieve-2.5-py3-none-any.whl (36 kB)\n",
+      "Installing collected packages: soupsieve, beautifulsoup4\n",
+      "Successfully installed beautifulsoup4-4.12.3 soupsieve-2.5\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install beautifulsoup4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import WebBaseLoader\n",
+    "loader = WebBaseLoader(\"https://docs.smith.langchain.com/user_guide\")\n",
+    "\n",
+    "docs = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings = OpenAIEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting faiss-cpu\n",
+      "  Using cached faiss_cpu-1.8.0-cp312-cp312-macosx_10_14_x86_64.whl.metadata (3.6 kB)\n",
+      "Requirement already satisfied: numpy in /opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages (from faiss-cpu) (1.26.4)\n",
+      "Using cached faiss_cpu-1.8.0-cp312-cp312-macosx_10_14_x86_64.whl (7.4 MB)\n",
+      "Installing collected packages: faiss-cpu\n",
+      "Successfully installed faiss-cpu-1.8.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install faiss-cpu"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.vectorstores import FAISS\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter()\n",
+    "documents = text_splitter.split_documents(docs)\n",
+    "vector = FAISS.from_documents(documents, embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
+    "\n",
+    "<context>\n",
+    "{context}\n",
+    "</context>\n",
+    "\n",
+    "Question: {input}\"\"\")\n",
+    "\n",
+    "document_chain = create_stuff_documents_chain(llm, prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Langsmith can help with testing by allowing you to visualize test results.'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "document_chain.invoke({\n",
+    "    \"input\": \"how can langsmith help with testing?\",\n",
+    "    \"context\": [Document(page_content=\"langsmith can let you visualize test results\")]\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains import create_retrieval_chain\n",
+    "\n",
+    "retriever = vector.as_retriever()\n",
+    "retrieval_chain = create_retrieval_chain(retriever, document_chain)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LangSmith can help with testing by allowing developers to create datasets, run tests on LLM applications, upload test cases in bulk or create them on the fly, export test cases from application traces, run custom evaluations, compare results for different configurations, provide a playground environment for rapid iteration and experimentation, collect feedback from users, annotate traces, add runs to datasets, monitor key metrics over time, and perform automations on traces in near real-time.\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = retrieval_chain.invoke({\"input\": \"how can langsmith help with testing?\"})\n",
+    "print(response[\"answer\"])\n",
+    "\n",
+    "# LangSmith offers several features that can help with testing:..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "langchain-0.2.3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}