diff --git a/.gitignore b/.gitignore
index 53c032b..86accc4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,6 +20,7 @@ old
*.mp4
5-VectorDB/chroma
onnxruntime*
+storage*
diff --git a/4-LangChain-0.2.3/RAG QA/OpeinAI_RAG.ipynb b/4-LangChain-0.2.3/RAG QA/OpeinAI_RAG.ipynb
index 8ac3b44..9a58918 100644
--- a/4-LangChain-0.2.3/RAG QA/OpeinAI_RAG.ipynb
+++ b/4-LangChain-0.2.3/RAG QA/OpeinAI_RAG.ipynb
@@ -16,18 +16,14 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'langchain_chroma'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[9], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m hub\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_community\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdocument_loaders\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m WebBaseLoader\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_chroma\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chroma\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moutput_parsers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StrOutputParser\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrunnables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RunnablePassthrough\n",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'langchain_chroma'"
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
]
}
],
@@ -35,13 +31,22 @@
"import bs4\n",
"from langchain import hub\n",
"from langchain_community.document_loaders import WebBaseLoader\n",
- "from langchain_chroma import Chroma\n",
+ "#from langchain_chroma import Chroma\n",
"from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import chromadb"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/5-VectorDB/ChromaDB/chroma-test.ipynb b/5-VectorDB/ChromaDB/chroma-test.ipynb
new file mode 100644
index 0000000..9250886
--- /dev/null
+++ b/5-VectorDB/ChromaDB/chroma-test.ipynb
@@ -0,0 +1,91 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using embedded DuckDB without persistence: data will be transient\n",
+ "Using embedded DuckDB without persistence: data will be transient\n",
+ "No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n",
+ "/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+ " from tqdm.autonotebook import tqdm, trange\n",
+ "/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n",
+ "Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n",
+ "the same time. Both libraries are known to be incompatible and this\n",
+ "can cause random crashes or deadlocks on Linux when loaded in the\n",
+ "same Python program.\n",
+ "Using threadpoolctl may cause crashes or deadlocks. For more\n",
+ "information and possible workarounds, please see\n",
+ " https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n",
+ "\n",
+ " warnings.warn(msg, RuntimeWarning)\n",
+ "/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "ename": "",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31mEl kernel se bloqueó al ejecutar código en la celda actual o en una celda anterior. \n",
+ "\u001b[1;31mRevise el código de las celdas para identificar una posible causa del error. \n",
+ "\u001b[1;31mHaga clic aquí para obtener más información. \n",
+ "\u001b[1;31mVea Jupyter log para obtener más detalles."
+ ]
+ }
+ ],
+ "source": [
+ "import chromadb\n",
+ "# setup Chroma in-memory, for easy prototyping. Can add persistence easily!\n",
+ "client = chromadb.Client()\n",
+ "\n",
+ "chromadb.Client() # connect to a running Chroma server\n",
+ "\n",
+ "# Create collection. get_collection, get_or_create_collection, delete_collection also available!\n",
+ "collection = client.create_collection(\"all-my-documents\")\n",
+ "\n",
+ "# Add docs to the collection. Can also update and delete. Row-based API coming soon!\n",
+ "collection.add(\n",
+ " documents=[\"This is document1\", \"This is document2\"], # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well\n",
+ " metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}], # filter on these!\n",
+ " ids=[\"doc1\", \"doc2\"], # unique for each doc\n",
+ ")\n",
+ "\n",
+ "# Query/search 2 most similar results. You can also .get by id\n",
+ "results = collection.query(\n",
+ " query_texts=[\"This is a query document\"],\n",
+ " n_results=2,\n",
+ " # where={\"metadata_field\": \"is_equal_to_this\"}, # optional filter\n",
+ " # where_document={\"$contains\":\"search_string\"} # optional filter\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "langchain-0.2.3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/5-VectorDB/ChromaDB/client-test.py b/5-VectorDB/ChromaDB/client-test.py
new file mode 100644
index 0000000..15fe1d0
--- /dev/null
+++ b/5-VectorDB/ChromaDB/client-test.py
@@ -0,0 +1,21 @@
+import chromadb
+# setup Chroma in-memory, for easy prototyping. Can add persistence easily!
+client = chromadb.Client()
+
+# Create collection. get_collection, get_or_create_collection, delete_collection also available!
+collection = client.create_collection("all-my-documents")
+
+# Add docs to the collection. Can also update and delete. Row-based API coming soon!
+collection.add(
+ documents=["This is document1", "This is document2"], # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well
+ metadatas=[{"source": "notion"}, {"source": "google-docs"}], # filter on these!
+ ids=["doc1", "doc2"], # unique for each doc
+)
+
+# Query/search 2 most similar results. You can also .get by id
+results = collection.query(
+ query_texts=["This is a query document"],
+ n_results=2,
+ # where={"metadata_field": "is_equal_to_this"}, # optional filter
+ # where_document={"$contains":"search_string"} # optional filter
+)
\ No newline at end of file
diff --git a/5-VectorDB/Chroma-Docker/docker-compose.yml b/5-VectorDB/ChromaDB/docker-compose.yml
similarity index 100%
rename from 5-VectorDB/Chroma-Docker/docker-compose.yml
rename to 5-VectorDB/ChromaDB/docker-compose.yml
diff --git a/5-VectorDB/ChromaDB/requirements.txt b/5-VectorDB/ChromaDB/requirements.txt
new file mode 100644
index 0000000..3e33d68
--- /dev/null
+++ b/5-VectorDB/ChromaDB/requirements.txt
@@ -0,0 +1 @@
+chromadb-client
\ No newline at end of file
diff --git a/6-LlamaIndex/data/mio.txt b/6-LlamaIndex/data/mio.txt
new file mode 100644
index 0000000..b8b4a4e
--- /dev/null
+++ b/6-LlamaIndex/data/mio.txt
@@ -0,0 +1 @@
+hola
\ No newline at end of file
diff --git a/6-LlamaIndex/dataingestion.ipynb b/6-LlamaIndex/dataingestion.ipynb
new file mode 100644
index 0000000..4b35281
--- /dev/null
+++ b/6-LlamaIndex/dataingestion.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core import Document\n",
+ "\n",
+ "doc = Document(text=\"text\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core import VectorStoreIndex\n",
+ "\n",
+ "vector_index = VectorStoreIndex.from_documents(doc)\n",
+ "vector_index.as_query_engine()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core import SimpleDirectoryReader\n",
+ "from llama_index.core.ingestion import IngestionPipeline\n",
+ "from llama_index.core.node_parser import TokenTextSplitter\n",
+ "\n",
+ "documents = SimpleDirectoryReader(\"./data\").load_data()\n",
+ "\n",
+ "pipeline = IngestionPipeline(transformations=[TokenTextSplitter(), ...])\n",
+ "\n",
+ "nodes = pipeline.run(documents=documents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core import Document\n",
+ "from llama_index.core import VectorStoreIndex\n",
+ "document = Document(\n",
+ " text=\"text\",\n",
+ " metadata={\"filename\": \"\", \"category\": \"\"},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core.schema import TextNode\n",
+ "from llama_index.core import VectorStoreIndex\n",
+ "\n",
+ "node1 = TextNode(text=\"\", id_=\"\")\n",
+ "node2 = TextNode(text=\"\", id_=\"\")\n",
+ "\n",
+ "index = VectorStoreIndex([node1, node2])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "langchain-0.2.3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/6-LlamaIndex/requirements.txt b/6-LlamaIndex/requirements.txt
new file mode 100644
index 0000000..9d17f70
--- /dev/null
+++ b/6-LlamaIndex/requirements.txt
@@ -0,0 +1,11 @@
+llama-index-core
+llama-index-llms-openai
+llama-index-embeddings-openai
+llama-index-program-openai
+llama-index-question-gen-openai
+llama-index-agent-openai
+llama-index-readers-file
+llama-index-multi-modal-llms-openai
+
+llama-index-llms-ollama
+llama-index-embeddings-huggingface
\ No newline at end of file
diff --git a/6-LlamaIndex/starter-OpenAI.ipynb b/6-LlamaIndex/starter-OpenAI.ipynb
new file mode 100644
index 0000000..31de38e
--- /dev/null
+++ b/6-LlamaIndex/starter-OpenAI.ipynb
@@ -0,0 +1,352 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
+ "\n",
+ "documents = SimpleDirectoryReader(\"data\").load_data()\n",
+ "index = VectorStoreIndex.from_documents(documents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The author likely spoke Spanish growing up.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query_engine = index.as_query_engine()\n",
+ "response = query_engine.query(\"What did the author do growing up?\")\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import logging\n",
+ "import sys\n",
+ "\n",
+ "logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)\n",
+ "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x15b19ccc0>, 'json_data': {'input': ['What did the author do growing up?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x15b19ccc0>, 'json_data': {'input': ['What did the author do growing up?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "DEBUG:httpcore.http11:send_request_headers.started request=\n",
+ "send_request_headers.started request=\n",
+ "DEBUG:httpcore.http11:send_request_headers.complete\n",
+ "send_request_headers.complete\n",
+ "DEBUG:httpcore.http11:send_request_body.started request=\n",
+ "send_request_body.started request=\n",
+ "DEBUG:httpcore.http11:send_request_body.complete\n",
+ "send_request_body.complete\n",
+ "DEBUG:httpcore.http11:receive_response_headers.started request=\n",
+ "receive_response_headers.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:32:13 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'21'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999992'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_949b82edbda9f14f45b7db54f02ee193'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891124a43e243851-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:32:13 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'21'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999992'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_949b82edbda9f14f45b7db54f02ee193'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891124a43e243851-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "DEBUG:httpcore.http11:receive_response_body.started request=\n",
+ "receive_response_body.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_body.complete\n",
+ "receive_response_body.complete\n",
+ "DEBUG:httpcore.http11:response_closed.started\n",
+ "response_closed.started\n",
+ "DEBUG:httpcore.http11:response_closed.complete\n",
+ "response_closed.complete\n",
+ "DEBUG:openai._base_client:HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:32:13 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '21', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999992', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '0s', 'x-request-id': 'req_949b82edbda9f14f45b7db54f02ee193', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891124a43e243851-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:32:13 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '21', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999992', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '0s', 'x-request-id': 'req_949b82edbda9f14f45b7db54f02ee193', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891124a43e243851-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "DEBUG:openai._base_client:request_id: req_949b82edbda9f14f45b7db54f02ee193\n",
+ "request_id: req_949b82edbda9f14f45b7db54f02ee193\n",
+ "DEBUG:llama_index.core.indices.utils:> Top 1 nodes:\n",
+ "> [Node 3dec12cd-66f4-4b2d-b723-0378a9bab1bd] [Similarity score: 0.696261] hola\n",
+ "> Top 1 nodes:\n",
+ "> [Node 3dec12cd-66f4-4b2d-b723-0378a9bab1bd] [Similarity score: 0.696261] hola\n",
+ "DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': \"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\"}, {'role': 'user', 'content': 'Context information is below.\\n---------------------\\nfile_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\\n\\nhola\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: What did the author do growing up?\\nAnswer: '}], 'model': 'gpt-3.5-turbo', 'stream': False, 'temperature': 0.1}}\n",
+ "Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': \"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\"}, {'role': 'user', 'content': 'Context information is below.\\n---------------------\\nfile_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\\n\\nhola\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: What did the author do growing up?\\nAnswer: '}], 'model': 'gpt-3.5-turbo', 'stream': False, 'temperature': 0.1}}\n",
+ "DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/chat/completions\n",
+ "Sending HTTP Request: POST https://api.openai.com/v1/chat/completions\n",
+ "DEBUG:httpcore.http11:send_request_headers.started request=\n",
+ "send_request_headers.started request=\n",
+ "DEBUG:httpcore.http11:send_request_headers.complete\n",
+ "send_request_headers.complete\n",
+ "DEBUG:httpcore.http11:send_request_body.started request=\n",
+ "send_request_body.started request=\n",
+ "DEBUG:httpcore.http11:send_request_body.complete\n",
+ "send_request_body.complete\n",
+ "DEBUG:httpcore.http11:receive_response_headers.started request=\n",
+ "receive_response_headers.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:32:14 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'462'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'60000'), (b'x-ratelimit-remaining-requests', b'9998'), (b'x-ratelimit-remaining-tokens', b'59827'), (b'x-ratelimit-reset-requests', b'16.2s'), (b'x-ratelimit-reset-tokens', b'173ms'), (b'x-request-id', b'req_45a33bb8f9a19028058ed7fbb0fddd4c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891124a5edd3662f-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:32:14 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'462'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'60000'), (b'x-ratelimit-remaining-requests', b'9998'), (b'x-ratelimit-remaining-tokens', b'59827'), (b'x-ratelimit-reset-requests', b'16.2s'), (b'x-ratelimit-reset-tokens', b'173ms'), (b'x-request-id', b'req_45a33bb8f9a19028058ed7fbb0fddd4c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891124a5edd3662f-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+ "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+ "DEBUG:httpcore.http11:receive_response_body.started request=\n",
+ "receive_response_body.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_body.complete\n",
+ "receive_response_body.complete\n",
+ "DEBUG:httpcore.http11:response_closed.started\n",
+ "response_closed.started\n",
+ "DEBUG:httpcore.http11:response_closed.complete\n",
+ "response_closed.complete\n",
+ "DEBUG:openai._base_client:HTTP Response: POST https://api.openai.com/v1/chat/completions \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:32:14 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'openai-organization': 'manuai', 'openai-processing-ms': '462', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '10000', 'x-ratelimit-limit-tokens': '60000', 'x-ratelimit-remaining-requests': '9998', 'x-ratelimit-remaining-tokens': '59827', 'x-ratelimit-reset-requests': '16.2s', 'x-ratelimit-reset-tokens': '173ms', 'x-request-id': 'req_45a33bb8f9a19028058ed7fbb0fddd4c', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891124a5edd3662f-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "HTTP Response: POST https://api.openai.com/v1/chat/completions \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:32:14 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'openai-organization': 'manuai', 'openai-processing-ms': '462', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '10000', 'x-ratelimit-limit-tokens': '60000', 'x-ratelimit-remaining-requests': '9998', 'x-ratelimit-remaining-tokens': '59827', 'x-ratelimit-reset-requests': '16.2s', 'x-ratelimit-reset-tokens': '173ms', 'x-request-id': 'req_45a33bb8f9a19028058ed7fbb0fddd4c', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891124a5edd3662f-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "DEBUG:openai._base_client:request_id: req_45a33bb8f9a19028058ed7fbb0fddd4c\n",
+ "request_id: req_45a33bb8f9a19028058ed7fbb0fddd4c\n",
+ "The author grew up saying \"hola\".\n"
+ ]
+ }
+ ],
+ "source": [
+ "response = query_engine.query(\"What did the author do growing up?\")\n",
+ "print(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/docstore.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/docstore.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/index_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/index_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/graph_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/graph_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/default__vector_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/default__vector_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/image__vector_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage/image__vector_store.json\n"
+ ]
+ }
+ ],
+ "source": [
+ "index.storage_context.persist(persist_dir=\".storage\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DEBUG:llama_index.core.readers.file.base:> [SimpleDirectoryReader] Total files added: 1\n",
+ "> [SimpleDirectoryReader] Total files added: 1\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\n",
+ "DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: hola\n",
+ "> Adding chunk: hola\n",
+ "DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x159171a80>, 'json_data': {'input': ['file_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt hola'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x159171a80>, 'json_data': {'input': ['file_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt hola'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "DEBUG:httpcore.connection:close.started\n",
+ "close.started\n",
+ "DEBUG:httpcore.connection:close.complete\n",
+ "close.complete\n",
+ "DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=60.0 socket_options=None\n",
+ "connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=60.0 socket_options=None\n",
+ "DEBUG:httpcore.connection:connect_tcp.complete return_value=\n",
+ "connect_tcp.complete return_value=\n",
+ "DEBUG:httpcore.connection:start_tls.started ssl_context= server_hostname='api.openai.com' timeout=60.0\n",
+ "start_tls.started ssl_context= server_hostname='api.openai.com' timeout=60.0\n",
+ "DEBUG:httpcore.connection:start_tls.complete return_value=\n",
+ "start_tls.complete return_value=\n",
+ "DEBUG:httpcore.http11:send_request_headers.started request=\n",
+ "send_request_headers.started request=\n",
+ "DEBUG:httpcore.http11:send_request_headers.complete\n",
+ "send_request_headers.complete\n",
+ "DEBUG:httpcore.http11:send_request_body.started request=\n",
+ "send_request_body.started request=\n",
+ "DEBUG:httpcore.http11:send_request_body.complete\n",
+ "send_request_body.complete\n",
+ "DEBUG:httpcore.http11:receive_response_headers.started request=\n",
+ "receive_response_headers.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'27'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999982'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-request-id', b'req_4d7b53b7d9691cdbe58fd4d0b80f67a3'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d24ab903aa-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'27'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999982'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'1ms'), (b'x-request-id', b'req_4d7b53b7d9691cdbe58fd4d0b80f67a3'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d24ab903aa-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "DEBUG:httpcore.http11:receive_response_body.started request=\n",
+ "receive_response_body.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_body.complete\n",
+ "receive_response_body.complete\n",
+ "DEBUG:httpcore.http11:response_closed.started\n",
+ "response_closed.started\n",
+ "DEBUG:httpcore.http11:response_closed.complete\n",
+ "response_closed.complete\n",
+ "DEBUG:openai._base_client:HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '27', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999982', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '1ms', 'x-request-id': 'req_4d7b53b7d9691cdbe58fd4d0b80f67a3', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d24ab903aa-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '27', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999982', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '1ms', 'x-request-id': 'req_4d7b53b7d9691cdbe58fd4d0b80f67a3', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d24ab903aa-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "DEBUG:openai._base_client:request_id: req_4d7b53b7d9691cdbe58fd4d0b80f67a3\n",
+ "request_id: req_4d7b53b7d9691cdbe58fd4d0b80f67a3\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/docstore.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/docstore.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/index_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/index_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/graph_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/graph_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/default__vector_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/default__vector_store.json\n",
+ "DEBUG:fsspec.local:open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/image__vector_store.json\n",
+ "open file: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/storage2/image__vector_store.json\n",
+ "DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x15b19d620>, 'json_data': {'input': ['What did the author do growing up?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': .parser at 0x15b19d620>, 'json_data': {'input': ['What did the author do growing up?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}\n",
+ "DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "Sending HTTP Request: POST https://api.openai.com/v1/embeddings\n",
+ "DEBUG:httpcore.http11:send_request_headers.started request=\n",
+ "send_request_headers.started request=\n",
+ "DEBUG:httpcore.http11:send_request_headers.complete\n",
+ "send_request_headers.complete\n",
+ "DEBUG:httpcore.http11:send_request_body.started request=\n",
+ "send_request_body.started request=\n",
+ "DEBUG:httpcore.http11:send_request_body.complete\n",
+ "send_request_body.complete\n",
+ "DEBUG:httpcore.http11:receive_response_headers.started request=\n",
+ "receive_response_headers.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'17'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999992'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_68c64ab6c84510fbd553f1400232c86c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d48ea203aa-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-allow-origin', b'*'), (b'openai-model', b'text-embedding-ada-002'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'17'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'3000'), (b'x-ratelimit-limit-tokens', b'1000000'), (b'x-ratelimit-remaining-requests', b'2999'), (b'x-ratelimit-remaining-tokens', b'999992'), (b'x-ratelimit-reset-requests', b'20ms'), (b'x-ratelimit-reset-tokens', b'0s'), (b'x-request-id', b'req_68c64ab6c84510fbd553f1400232c86c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d48ea203aa-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
+ "DEBUG:httpcore.http11:receive_response_body.started request=\n",
+ "receive_response_body.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_body.complete\n",
+ "receive_response_body.complete\n",
+ "DEBUG:httpcore.http11:response_closed.started\n",
+ "response_closed.started\n",
+ "DEBUG:httpcore.http11:response_closed.complete\n",
+ "response_closed.complete\n",
+ "DEBUG:openai._base_client:HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '17', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999992', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '0s', 'x-request-id': 'req_68c64ab6c84510fbd553f1400232c86c', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d48ea203aa-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "HTTP Response: POST https://api.openai.com/v1/embeddings \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-embedding-ada-002', 'openai-organization': 'manuai', 'openai-processing-ms': '17', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '1000000', 'x-ratelimit-remaining-requests': '2999', 'x-ratelimit-remaining-tokens': '999992', 'x-ratelimit-reset-requests': '20ms', 'x-ratelimit-reset-tokens': '0s', 'x-request-id': 'req_68c64ab6c84510fbd553f1400232c86c', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d48ea203aa-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "DEBUG:openai._base_client:request_id: req_68c64ab6c84510fbd553f1400232c86c\n",
+ "request_id: req_68c64ab6c84510fbd553f1400232c86c\n",
+ "DEBUG:llama_index.core.indices.utils:> Top 1 nodes:\n",
+ "> [Node 6495ddd4-c6e0-44a6-9dff-38b461758b13] [Similarity score: 0.696261] hola\n",
+ "> Top 1 nodes:\n",
+ "> [Node 6495ddd4-c6e0-44a6-9dff-38b461758b13] [Similarity score: 0.696261] hola\n",
+ "DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': \"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\"}, {'role': 'user', 'content': 'Context information is below.\\n---------------------\\nfile_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\\n\\nhola\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: What did the author do growing up?\\nAnswer: '}], 'model': 'gpt-3.5-turbo', 'stream': False, 'temperature': 0.1}}\n",
+ "Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': \"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\"}, {'role': 'user', 'content': 'Context information is below.\\n---------------------\\nfile_path: /Users/manu/dev/ai/ai-intro/6-LlamaIndex/data/mio.txt\\n\\nhola\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: What did the author do growing up?\\nAnswer: '}], 'model': 'gpt-3.5-turbo', 'stream': False, 'temperature': 0.1}}\n",
+ "DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/chat/completions\n",
+ "Sending HTTP Request: POST https://api.openai.com/v1/chat/completions\n",
+ "DEBUG:httpcore.connection:close.started\n",
+ "close.started\n",
+ "DEBUG:httpcore.connection:close.complete\n",
+ "close.complete\n",
+ "DEBUG:httpcore.connection:connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=60.0 socket_options=None\n",
+ "connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=60.0 socket_options=None\n",
+ "DEBUG:httpcore.connection:connect_tcp.complete return_value=\n",
+ "connect_tcp.complete return_value=\n",
+ "DEBUG:httpcore.connection:start_tls.started ssl_context= server_hostname='api.openai.com' timeout=60.0\n",
+ "start_tls.started ssl_context= server_hostname='api.openai.com' timeout=60.0\n",
+ "DEBUG:httpcore.connection:start_tls.complete return_value=\n",
+ "start_tls.complete return_value=\n",
+ "DEBUG:httpcore.http11:send_request_headers.started request=\n",
+ "send_request_headers.started request=\n",
+ "DEBUG:httpcore.http11:send_request_headers.complete\n",
+ "send_request_headers.complete\n",
+ "DEBUG:httpcore.http11:send_request_body.started request=\n",
+ "send_request_body.started request=\n",
+ "DEBUG:httpcore.http11:send_request_body.complete\n",
+ "send_request_body.complete\n",
+ "DEBUG:httpcore.http11:receive_response_headers.started request=\n",
+ "receive_response_headers.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'318'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'60000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'59827'), (b'x-ratelimit-reset-requests', b'8.64s'), (b'x-ratelimit-reset-tokens', b'173ms'), (b'x-request-id', b'req_2c79ce01b4b764ec3ab5d1695ca5f3e0'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d68acc2172-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Sun, 09 Jun 2024 12:33:43 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'manuai'), (b'openai-processing-ms', b'318'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15724800; includeSubDomains'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'60000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'59827'), (b'x-ratelimit-reset-requests', b'8.64s'), (b'x-ratelimit-reset-tokens', b'173ms'), (b'x-request-id', b'req_2c79ce01b4b764ec3ab5d1695ca5f3e0'), (b'CF-Cache-Status', b'DYNAMIC'), (b'Server', b'cloudflare'), (b'CF-RAY', b'891126d68acc2172-MAD'), (b'Content-Encoding', b'br'), (b'alt-svc', b'h3=\":443\"; ma=86400')])\n",
+ "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+ "HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+ "DEBUG:httpcore.http11:receive_response_body.started request=\n",
+ "receive_response_body.started request=\n",
+ "DEBUG:httpcore.http11:receive_response_body.complete\n",
+ "receive_response_body.complete\n",
+ "DEBUG:httpcore.http11:response_closed.started\n",
+ "response_closed.started\n",
+ "DEBUG:httpcore.http11:response_closed.complete\n",
+ "response_closed.complete\n",
+ "DEBUG:openai._base_client:HTTP Response: POST https://api.openai.com/v1/chat/completions \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'openai-organization': 'manuai', 'openai-processing-ms': '318', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '10000', 'x-ratelimit-limit-tokens': '60000', 'x-ratelimit-remaining-requests': '9999', 'x-ratelimit-remaining-tokens': '59827', 'x-ratelimit-reset-requests': '8.64s', 'x-ratelimit-reset-tokens': '173ms', 'x-request-id': 'req_2c79ce01b4b764ec3ab5d1695ca5f3e0', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d68acc2172-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "HTTP Response: POST https://api.openai.com/v1/chat/completions \"200 OK\" Headers({'date': 'Sun, 09 Jun 2024 12:33:43 GMT', 'content-type': 'application/json', 'transfer-encoding': 'chunked', 'connection': 'keep-alive', 'openai-organization': 'manuai', 'openai-processing-ms': '318', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '10000', 'x-ratelimit-limit-tokens': '60000', 'x-ratelimit-remaining-requests': '9999', 'x-ratelimit-remaining-tokens': '59827', 'x-ratelimit-reset-requests': '8.64s', 'x-ratelimit-reset-tokens': '173ms', 'x-request-id': 'req_2c79ce01b4b764ec3ab5d1695ca5f3e0', 'cf-cache-status': 'DYNAMIC', 'server': 'cloudflare', 'cf-ray': '891126d68acc2172-MAD', 'content-encoding': 'br', 'alt-svc': 'h3=\":443\"; ma=86400'})\n",
+ "DEBUG:openai._base_client:request_id: req_2c79ce01b4b764ec3ab5d1695ca5f3e0\n",
+ "request_id: req_2c79ce01b4b764ec3ab5d1695ca5f3e0\n",
+ "The author likely spoke Spanish growing up.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os.path\n",
+ "from llama_index.core import (\n",
+ " VectorStoreIndex,\n",
+ " SimpleDirectoryReader,\n",
+ " StorageContext,\n",
+ " load_index_from_storage,\n",
+ ")\n",
+ "\n",
+ "# check if storage already exists\n",
+ "PERSIST_DIR = \"./storage2\"\n",
+ "if not os.path.exists(PERSIST_DIR):\n",
+ " # load the documents and create the index\n",
+ " documents = SimpleDirectoryReader(\"data\").load_data()\n",
+ " index = VectorStoreIndex.from_documents(documents)\n",
+ " # store it for later\n",
+ " index.storage_context.persist(persist_dir=PERSIST_DIR)\n",
+ "else:\n",
+ " # load the existing index\n",
+ " storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)\n",
+ " index = load_index_from_storage(storage_context)\n",
+ "\n",
+ "# Either way we can now query the index\n",
+ "query_engine = index.as_query_engine()\n",
+ "response = query_engine.query(\"What did the author do growing up?\")\n",
+ "print(response)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "langchain-0.2.3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/6-LlamaIndex/starter-localLLM.ipynb b/6-LlamaIndex/starter-localLLM.ipynb
new file mode 100644
index 0000000..e927926
--- /dev/null
+++ b/6-LlamaIndex/starter-localLLM.ipynb
@@ -0,0 +1,68 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "ename": "",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31mEl kernel se bloqueó al ejecutar código en la celda actual o en una celda anterior. \n",
+ "\u001b[1;31mRevise el código de las celdas para identificar una posible causa del error. \n",
+ "\u001b[1;31mHaga clic aquí para obtener más información. \n",
+ "\u001b[1;31mVea Jupyter log para obtener más detalles."
+ ]
+ }
+ ],
+ "source": [
+ "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings\n",
+ "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
+ "from llama_index.llms.ollama import Ollama\n",
+ "\n",
+ "documents = SimpleDirectoryReader(\"data\").load_data()\n",
+ "\n",
+ "# bge-base embedding model\n",
+ "Settings.embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-base-en-v1.5\")\n",
+ "\n",
+ "# ollama\n",
+ "Settings.llm = Ollama(model=\"llama2\", request_timeout=360.0)\n",
+ "\n",
+ "index = VectorStoreIndex.from_documents(\n",
+ " documents,\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "langchain-0.2.3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}