Skip to content

Commit

Permalink
Langchain cloud oss example update (#188)
Browse files Browse the repository at this point in the history
  • Loading branch information
raghavdixit99 authored May 17, 2024
1 parent bbc96f0 commit 89214a1
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 153 deletions.
143 changes: 82 additions & 61 deletions examples/Code-Documentation-QA-Bot/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,22 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "e8a49c31",
"metadata": {},
"outputs": [],
"source": [
"! pip install -U langchain langchain-openai"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "66638d6c",
"metadata": {},
"outputs": [],
"source": [
"!pip install -qq openai==0.28 langchain==0.0.354 tiktoken unstructured pandas lancedb"
"! pip install -qq tiktoken unstructured pandas lancedb"
]
},
{
Expand All @@ -41,22 +51,23 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"id": "58ee1868",
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"import os\n",
"\n",
"\n",
"# Configuring the environment variable OPENAI_API_KEY\n",
"os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
"\n",
"if \"OPENAI_API_KEY\" not in os.environ:\n",
" os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n",
"openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
"\n",
"assert len(openai.Model.list()[\"data\"]) > 0"
"# assert len(openai.models.list()[\"data\"]) > 0"
]
},
{
Expand All @@ -78,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "b55d22f1",
"metadata": {},
"outputs": [],
Expand All @@ -91,10 +102,9 @@
"from pathlib import Path\n",
"\n",
"from langchain.document_loaders import UnstructuredHTMLLoader\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.vectorstores import LanceDB\n",
"from langchain.llms import OpenAI\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain.chains import RetrievalQA"
]
},
Expand All @@ -117,7 +127,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 3,
"id": "7da77e75",
"metadata": {},
"outputs": [],
Expand All @@ -142,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 4,
"id": "d171d062",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -196,14 +206,16 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "f5f683a7-123b-4e9e-a60b-115bc1340a66",
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm\n",
"\n",
"docs = []\n",
"docs_path = Path(\"docs.pkl\")\n",
"for p in Path(\"numpy_docs\").rglob(\"*.html\"):\n",
"for p in tqdm(Path(\"numpy_docs\").rglob(\"*.html\")):\n",
" if p.is_dir():\n",
" continue\n",
" loader = UnstructuredHTMLLoader(p)\n",
Expand All @@ -227,21 +239,10 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "c019d728-bb65-494a-b4a9-73a62bf8e155",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2699"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"len(docs)"
]
Expand All @@ -258,7 +259,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 7,
"id": "82230563",
"metadata": {},
"outputs": [],
Expand All @@ -278,29 +279,52 @@
"source": [
"# Storing\n",
"\n",
"Let's connect to LanceDB so we can store our documents. We'll create a Table to store them in:"
"Let's connect to LanceDB so we can store our documents, It requires 0 setup !"
]
},
{
"cell_type": "markdown",
"id": "7ed0c792",
"metadata": {},
"source": [
"### For LanceDB cloud use the below cell :"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 9,
"id": "74780a58",
"metadata": {},
"outputs": [],
"source": [
"db = lancedb.connect(\"lancedb\")\n",
"table = db.create_table(\n",
" \"numpy_docs\",\n",
" data=[\n",
" {\n",
" \"vector\": embeddings.embed_query(\"Hello World\"),\n",
" \"text\": \"Hello World\",\n",
" \"id\": \"1\",\n",
" }\n",
" ],\n",
" mode=\"overwrite\",\n",
"vectorstore = LanceDB(\n",
" embedding=embeddings,\n",
" uri=\"db://test\", # your remote database URI\n",
" api_key=\"sk_...\",\n",
" region=\"us-east-x-xxx\", # the cloud region you have configured\n",
" table_name=\"langchain_vectorstore\", # Optional, defaults to \"vectors\"\n",
" mode=\"overwrite\", # Optional, defaults to \"overwrite\"\n",
")\n",
"docsearch = LanceDB.from_documents(documents, embeddings, connection=table)"
"\n",
"doc_ids = vectorstore.add_documents(documents=documents)"
]
},
{
"cell_type": "markdown",
"id": "9d6cf585",
"metadata": {},
"source": [
"### For LanceDB local use the below cell :"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "71e15e3b",
"metadata": {},
"outputs": [],
"source": [
"vectorstore = LanceDB.from_documents(documents=documents, embedding=embeddings)"
]
},
{
Expand All @@ -313,13 +337,13 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"id": "6a5891ad",
"metadata": {},
"outputs": [],
"source": [
"qa = RetrievalQA.from_chain_type(\n",
" llm=OpenAI(), chain_type=\"stuff\", retriever=docsearch.as_retriever()\n",
" llm=OpenAI(), chain_type=\"stuff\", retriever=vectorstore.as_retriever()\n",
")"
]
},
Expand All @@ -341,83 +365,80 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 10,
"id": "70d88316",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' The NumPy library is an open source Python library that is used in many fields of science and engineering. It provides efficient data structures for multidimensional arrays and matrices, along with a vast array of mathematical functions to operate on them. It is a core component of the scientific Python and PyData ecosystems, and is used by everyone from beginners to experienced researchers in various applications. It is also interoperable with other Python libraries such as SciPy, Pandas, and OpenCV, making it a crucial tool for scientific computing in Python. '"
"{'query': 'tell me about the numpy library?',\n",
" 'result': ' The NumPy library is an open-source Python library that provides efficient data structures and mathematical functions for working with multidimensional arrays and matrices. It is widely used in the fields of science and engineering and is a fundamental component of the scientific Python ecosystem. It can be installed using a scientific Python distribution or through the use of pip or conda. '}"
]
},
"execution_count": 23,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"tell me about the numpy library?\"\n",
"qa.run(query)"
"qa.invoke(query)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 12,
"id": "85a0397c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\n\\n1.22.0'"
"{'query': \"What's the current version of numpy?\",\n",
" 'result': ' The current version of NumPy is 1.21.6, according to the context provided.'}"
]
},
"execution_count": 24,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"What's the current version of numpy?\"\n",
"qa.run(query)"
"qa.invoke(query)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 13,
"id": "923f86c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' Linear algebra operations can be performed using the numpy.linalg library.'"
"{'query': 'What kind of linear algebra related operations can be done in numpy?',\n",
" 'result': ' Numpy provides a variety of linear algebra related operations, including decompositions, matrix eigenvalues, norms and other numbers, solving equations and inverting matrices, and linear algebra on several matrices at once.'}"
]
},
"execution_count": 25,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"What kind of linear algebra related operations can be done in numpy?\"\n",
"qa.run(query)"
"qa.invoke(query)"
]
},
{
"cell_type": "markdown",
"id": "f8958d1b-0ad6-44d6-bca0-d81771c564a1",
"metadata": {},
"source": [
"Thanks"
"Thanks !"
]
},
{
"cell_type": "markdown",
"id": "e6a53efb",
"metadata": {},
"source": []
}
],
"metadata": {
Expand All @@ -436,7 +457,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.10.13"
},
"vscode": {
"interpreter": {
Expand Down
Loading

0 comments on commit 89214a1

Please sign in to comment.