-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
655 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ old | |
*.mp4 | ||
5-VectorDB/chroma | ||
onnxruntime* | ||
storage* | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Using embedded DuckDB without persistence: data will be transient\n", | ||
"Using embedded DuckDB without persistence: data will be transient\n", | ||
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n", | ||
"/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | ||
" from tqdm.autonotebook import tqdm, trange\n", | ||
"/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/threadpoolctl.py:1214: RuntimeWarning: \n", | ||
"Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at\n", | ||
"the same time. Both libraries are known to be incompatible and this\n", | ||
"can cause random crashes or deadlocks on Linux when loaded in the\n", | ||
"same Python program.\n", | ||
"Using threadpoolctl may cause crashes or deadlocks. For more\n", | ||
"information and possible workarounds, please see\n", | ||
" https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md\n", | ||
"\n", | ||
" warnings.warn(msg, RuntimeWarning)\n", | ||
"/opt/anaconda3/envs/langchain-0.2.3/lib/python3.12/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", | ||
" warnings.warn(\n" | ||
] | ||
}, | ||
{ | ||
"ename": "", | ||
"evalue": "", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[1;31mEl kernel se bloqueó al ejecutar código en la celda actual o en una celda anterior. \n", | ||
"\u001b[1;31mRevise el código de las celdas para identificar una posible causa del error. \n", | ||
"\u001b[1;31mHaga clic <a href='https://aka.ms/vscodeJupyterKernelCrash'>aquí</a> para obtener más información. \n", | ||
"\u001b[1;31mVea Jupyter <a href='command:jupyter.viewOutput'>log</a> para obtener más detalles." | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import chromadb\n", | ||
"# setup Chroma in-memory, for easy prototyping. Can add persistence easily!\n", | ||
"client = chromadb.Client()\n", | ||
"\n", | ||
"chromadb.Client() # connect to a running Chroma server\n", | ||
"\n", | ||
"# Create collection. get_collection, get_or_create_collection, delete_collection also available!\n", | ||
"collection = client.create_collection(\"all-my-documents\")\n", | ||
"\n", | ||
"# Add docs to the collection. Can also update and delete. Row-based API coming soon!\n", | ||
"collection.add(\n", | ||
" documents=[\"This is document1\", \"This is document2\"], # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well\n", | ||
" metadatas=[{\"source\": \"notion\"}, {\"source\": \"google-docs\"}], # filter on these!\n", | ||
" ids=[\"doc1\", \"doc2\"], # unique for each doc\n", | ||
")\n", | ||
"\n", | ||
"# Query/search 2 most similar results. You can also .get by id\n", | ||
"results = collection.query(\n", | ||
" query_texts=[\"This is a query document\"],\n", | ||
" n_results=2,\n", | ||
" # where={\"metadata_field\": \"is_equal_to_this\"}, # optional filter\n", | ||
" # where_document={\"$contains\":\"search_string\"} # optional filter\n", | ||
")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "langchain-0.2.3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import chromadb | ||
# setup Chroma in-memory, for easy prototyping. Can add persistence easily! | ||
client = chromadb.Client() | ||
|
||
# Create collection. get_collection, get_or_create_collection, delete_collection also available! | ||
collection = client.create_collection("all-my-documents") | ||
|
||
# Add docs to the collection. Can also update and delete. Row-based API coming soon! | ||
collection.add( | ||
documents=["This is document1", "This is document2"], # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well | ||
metadatas=[{"source": "notion"}, {"source": "google-docs"}], # filter on these! | ||
ids=["doc1", "doc2"], # unique for each doc | ||
) | ||
|
||
# Query/search 2 most similar results. You can also .get by id | ||
results = collection.query( | ||
query_texts=["This is a query document"], | ||
n_results=2, | ||
# where={"metadata_field": "is_equal_to_this"}, # optional filter | ||
# where_document={"$contains":"search_string"} # optional filter | ||
) |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
chromadb-client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
hola |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core import Document\n", | ||
"\n", | ||
"doc = Document(text=\"text\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core import VectorStoreIndex\n", | ||
"\n", | ||
"vector_index = VectorStoreIndex.from_documents(doc)\n", | ||
"vector_index.as_query_engine()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core import SimpleDirectoryReader\n", | ||
"from llama_index.core.ingestion import IngestionPipeline\n", | ||
"from llama_index.core.node_parser import TokenTextSplitter\n", | ||
"\n", | ||
"documents = SimpleDirectoryReader(\"./data\").load_data()\n", | ||
"\n", | ||
"pipeline = IngestionPipeline(transformations=[TokenTextSplitter(), ...])\n", | ||
"\n", | ||
"nodes = pipeline.run(documents=documents)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core import Document\n", | ||
"from llama_index.core import VectorStoreIndex\n", | ||
"document = Document(\n", | ||
" text=\"text\",\n", | ||
" metadata={\"filename\": \"<doc_file_name>\", \"category\": \"<category>\"},\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.core.schema import TextNode\n", | ||
"from llama_index.core import VectorStoreIndex\n", | ||
"\n", | ||
"node1 = TextNode(text=\"<text_chunk>\", id_=\"<node_id>\")\n", | ||
"node2 = TextNode(text=\"<text_chunk>\", id_=\"<node_id>\")\n", | ||
"\n", | ||
"index = VectorStoreIndex([node1, node2])" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "langchain-0.2.3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
llama-index-core | ||
llama-index-llms-openai | ||
llama-index-embeddings-openai | ||
llama-index-program-openai | ||
llama-index-question-gen-openai | ||
llama-index-agent-openai | ||
llama-index-readers-file | ||
llama-index-multi-modal-llms-openai | ||
|
||
llama-index-llms-ollama | ||
llama-index-embeddings-huggingface |
Oops, something went wrong.