# indexing

In [5]:
# https://python.langchain.com/docs/modules/data_connection/indexing

In [110]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import SQLRecordManager, index
from langchain.schema import Document
from langchain.vectorstores import FAISS
#from langchain.vectorstores import ElasticsearchStore

In [111]:
collection_name = "test_index"
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
#embedding = OpenAIEmbeddings()



In [112]:
doc_init = Document(page_content="Initial document", metadata={"source": "initial.txt"})

In [113]:
vectorstore = FAISS.from_documents([doc_init], embedding)

In [114]:
#vectorstore.add_texts(["hi there"])

In [115]:
query = "initial"
docs = vectorstore.similarity_search(query)

In [116]:
print(len(docs))
print(docs[0].page_content)

1
Initial document


In [117]:
# https://api.python.langchain.com/en/latest/indexes/langchain.indexes.base.RecordManager.html
namespace = f"faiss/{collection_name}"
record_manager = SQLRecordManager(
    namespace, db_url="sqlite:///record_manager_cache.sql"
)

In [118]:
record_manager.create_schema()

In [119]:
keys = record_manager.list_keys()
len(keys)

20

In [120]:
record_manager.delete_keys(keys)

In [89]:
doc1 = Document(page_content="kitty", metadata={"source": "kitty.txt"})
doc2 = Document(page_content="doggy", metadata={"source": "doggy.txt"})

In [90]:
def _clear():
    """Hacky helper method to clear content. See the `full` mode section to to understand why it works."""
    index([], record_manager, vectorstore, cleanup="full", source_id_key="source")

## none deletion

In [91]:
_clear()

In [12]:
index(
    [doc1, doc1, doc1, doc1, doc1],
    record_manager,
    vectorstore,
    cleanup=None,
    source_id_key="source",
)

{'num_added': 1, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [13]:
_clear()

In [14]:
index([doc1, doc2], record_manager, vectorstore, cleanup=None, source_id_key="source")

{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [18]:
index([doc1, doc2], record_manager, vectorstore, cleanup=None, source_id_key="source")

{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 0}

## incremental deletion

In [54]:
_clear()

ValueError: Some specified ids do not exist in the current store. Ids not found: {'786c883e-3b9a-5d49-9821-9ea3c2ef5932', '8755000d-eb19-52f7-b5ac-b061b063a565', '0ceb4962-343e-5242-acef-38a17e7f815d', 'c8c1a700-8633-518c-977b-c7000915f258', 'a47e7000-83ce-5592-895a-076e560bdd5c', '06e278e4-cbed-5430-98fc-61b49abd5992', 'c0072095-6249-563d-98e1-f6b5f35bd6fe', 'e83ee6a8-eaa3-5e7b-99b8-51d6d049effc', '818a9b15-3016-5fc9-a795-294698cb3aba', '8fd4f6c4-2733-5446-9f5a-1b18e43c0e11', '53dd13ab-a48f-5103-aadc-df41be7964ac', 'df88f840-e1d6-5b1d-8d85-d153118cb023', '6032dd17-d4df-546d-ae40-aa8935aa58d7', '71fb02eb-1cec-5688-be60-82fe6f49e9f6', '389b9577-34e0-5d2f-b852-65f197bf30ec', 'c4e72324-a4a2-5f33-8397-3bf024d4b0c7', '132f4877-27f7-5466-a7e5-a72c2fffd806', 'd3e603a1-b313-5f3e-8da0-f1021269dc34', '9a47fafb-2d25-52b3-90bc-fbc766bf2b52', '195d5a43-5f76-5e74-94c0-29ec254bcb7d', 'cdaf51a9-f48c-5eeb-95be-419dbb4f1f3b', '837096e8-4ee8-57c9-b2f1-1d92c960396c', '6cb4cb9e-be82-5c95-bfbb-11c4b01e82d0', 'e851c758-689a-5435-9606-94bdb17ae081', '805cb865-1dec-5659-a480-1556ceff9c82', '4fbdc548-ad30-5e27-a270-6e19864403f0', 'fb05b1ea-fa54-5319-a5d2-af0f8007e50f', '7c30ffef-7845-522f-9be5-092b61765ebe', 'c60a0122-c79a-545e-bcb5-647ce1c4430a', '360defbe-ea2e-56fa-85a6-5eac662a790b', '12f3f28d-5566-574b-ac54-95a5fec01e79', '0c90fe78-8a7a-5964-b3aa-f1d6389f25d5', 'e075c875-de52-5c21-be0f-67a3e02eea22', 'f4f98f44-ef9e-50c7-93ed-5ff574db028e', 'aacd9183-2b84-5647-adfd-46ce99a88b82', 'fc326f01-ca67-5e31-9895-41733ba2ad30', 'a34df21a-21fc-530d-bea4-205cd3e0778a', '3961113e-e7a2-53e8-875b-f1aca1a88f39', '18864a16-ad29-55aa-9e4a-9d5d5b92715e', '4b893878-f2ec-5ab5-9cab-e93606bed157', '6166629b-bf8e-5a62-9789-95809a348515', 'b8aaaba5-ce09-515b-bc77-a93b5d9ecd03', '41c0e9fe-d77e-5fae-a2aa-e1d68a9ff8ea', '047d5747-980b-58d6-9ab5-8bbfd6b5927e', '93e8e1e1-abd9-50ce-9d87-bee71871de73', '604ecf7a-0faa-53c9-8660-2f27fddcc2b2', '496b73c0-2cb4-5b82-8603-0e02941ca969', 'f6e1396f-750b-558b-9bd3-3d71471ae4c9', 'd50bdb43-df30-5fea-9af8-0edd3e8e6f49', '58d30560-acc1-583f-8e7d-a24e45dd5456', '40abeef0-4ee2-5666-a4cc-917468fc4da1', '29343343-6ace-590f-8df9-af0ef8b2bc4a', '1dc65bc5-5198-5430-be9c-c613d2139406', '9c6c5e5d-bdf0-5a8f-9162-ab0ce702af85', '3cab4103-3aa1-5cf5-879b-a4062e291f61', 'a1a0c328-b046-5886-a0db-527f48be6be6', 'a9a21c82-5963-5355-9432-e11b0bd98e08', 'ac9635fa-91ba-5079-8399-d99acfa0120e', 'adb93aa5-ef2b-5516-b8d0-91839ff6b5e1', 'e4135fce-fd04-5a8d-b913-b4469321957f', '6f90b821-553f-504a-9906-2719fd50bd1b', '0f1fe72f-5e48-5052-8035-eac26a1388dd', 'c0412bcc-9e2e-572c-ad14-b2fd06d57eff', '17240f61-aaac-5a93-8c70-95e2aa4315c2', 'ac7deab2-b071-5501-bcc2-ebd1d186e6c9', '38021308-5a30-50e1-a7cc-7951f2c79934', '810329bb-6903-5ac0-9eb9-23c4f1706de8', '042e72c9-6bc0-5aaf-aad2-786016cead00', '2bbab495-1672-51ab-98ba-ca830b35e617', 'bea4f282-32c6-5533-bbed-e82212ef23d9', '39510668-8164-5b38-81b7-38bc1838c2e7', 'e7781e6e-9700-5add-8238-1567772e2c3a', '63dbb537-c06a-5d92-a720-9e46b45e0591', 'a3a07722-a28a-5e4c-b843-ea86c4a0b2f2', '77dc59d3-8ea0-5554-b84d-7e1163611f75', '349ae906-622a-5a6b-8ef9-9e09fd7cd073', '7033da28-536e-5893-b51c-2a8a8e554e6d', 'c2901e85-ec58-5e6e-9583-c234613a4ddd', '12eac516-240b-5c67-b99a-ef346360ac69', '680add04-608c-5c55-b2b0-72351e0be5f9', '9c75f3e3-66f9-5b2d-a53f-e6f998ae82e1', '9b7e9953-9dab-5069-9232-a728b059fd68', 'aa1f651a-c26f-5573-8bdf-76aa2438ee00', '8cb5952a-dfbc-5377-b932-af0c66b575a5', 'c48d63c0-aa4a-549f-af07-fe0b8852c9d7', 'a2d46cb5-9d40-5249-8b5c-5b0239f96ea4', '7aad63d8-4204-5e92-8181-45ae63e42642', '271dd5d1-9133-5f13-99f4-9b6fd5d7df55', 'f4420a65-904d-5c98-886b-0b7952938867', '459db66e-e91b-54e2-8090-4015c8fb9b5d', '2b685ff9-29b9-540a-ae9a-957030dc5e5f', '11eb9a6f-9d6e-5da1-b6ca-5f22955f8b72', '91a0d5c4-b818-5e17-bd46-27dd37d749d1', 'fdabf6a5-5380-5d17-8364-22429c99d5ee', 'ecdbe1ba-2e7f-51dd-97f2-7724e09c1948', 'f03711a2-0b63-5bbf-b109-85e15d8e6122', 'b59fb998-0cd3-5b2a-adfb-1bea3333f025', '81c8269a-9bc0-5f0b-ad4c-8049d3c39cc9', '3a6e1407-83c9-587f-b011-e71e2db47782', 'ba59d337-c225-51ef-aaa2-a9ca07f7d45e', '0365e5bf-98af-5486-8450-7ab612c1fd8a', '0b117af7-35ce-52b4-aec2-920613dbe29c', '81ec9f21-629b-554b-ad05-722fa06f14d5', '652f90e0-0a41-596d-a7be-3b3a38b1b888', '83169149-d84a-57fd-9441-a78ebbcd96b1', 'a4c77021-727d-5074-9028-2d02c5387dc7', 'db2aeee0-1432-5688-abb1-c0c1fbbad166', '0e1ab8e2-2b95-5e6a-a87f-fab601930ce7', '22909a0e-6bdc-5f6b-8a64-21214b74d94f', '0e44ab6e-1137-523e-aa3d-5d361a45503b', '629b58d0-c007-5200-875f-3c6d38d3823a', '435df8ae-f1a8-5750-a4a7-dc06292cbd07', '14c721cc-1a3f-5e2f-841a-7c0b11772526', '447bb57c-bcef-5890-b6f0-892341a39904', 'c233d742-cd2a-508d-a615-b81d011ccedd', '9fb2c613-b47a-510a-872b-7b7dc2e49cea', 'f8ee551f-0840-5d3d-be28-6615ab2cc3d8', '41e7611f-ea25-5e55-9112-1e96b8fdcb16', '7da01694-5ed7-50c2-8246-2e5e8a411a41', '8487eefb-eadb-5f8a-bac4-febdf40b94fc', 'a63ee160-51f0-5e1e-ac89-b86dc7b632a1', 'bac5b7bf-14e9-57e3-81f0-47ee2d80622e', 'f9dd0574-ddf2-5b88-9d86-cf8e8648e428', '8286c509-4396-539d-8682-4d289e38c4de', '92a2ec70-f3cd-5975-aa69-99cd3336221d', '927e3c90-a7b9-5d4d-a1cb-9deb0a02c53f', 'c93b728e-316c-5b97-93bb-fa6ca12596be', '548ced8c-2a71-52c6-8bb5-18d4e27397f2', '17b40b56-6c0b-56a0-bbe3-37ddbaf95732', 'c555f247-76bb-53b9-bf67-5363d7455dc9', 'b83f7929-db8e-5894-a8d8-8ecb7d67e1f0', 'f87058d5-7926-50cb-8640-317b2d71df7b', 'bf48dd5c-df44-50d7-b514-d5cf469613b9', 'a8913482-d134-57e2-b14d-7d9de352e8a8', '3a1b77b8-a4b7-522b-bcff-f054f93a8ef2', '32e7807b-0725-597d-ab0a-1afdb61210a1', '2b59061a-d8b6-5c9c-887e-a5f15e346cde', '9ec750d6-547c-50a3-b51e-1294b64411ec', '918e6738-c9df-5c3d-910e-ec1ac91ce4d7', '224b1cdc-c8f5-5d71-9bb2-fb29c0e6d787', 'a5818bc0-adee-5f65-9d80-aa533f5e9c01', 'afd6b0ca-81be-56e6-99bc-5e649dec4458', '796c7281-17b5-50c3-b9f5-9aa1258ad66b', 'a946cc51-d129-5353-b154-a35778682d0c', '68b8cfd6-0996-5749-aaf8-090541509f9f', '3fe4e6fc-88ed-5c3a-995e-d2969c5a94e5', '44957e19-cb15-5db8-83f5-7611e155d904', '06097a51-9859-519c-afe1-2b72ff19d7cc', '4d5f24a8-c226-589c-b41e-05bfe960861e', '85a862b4-6114-59bc-ae63-6d02464b64f8', 'b7ffa13f-87cc-503a-8683-8452968f499e', 'f05a9a1c-8b0f-58a2-a693-d1a4d3a1e8f4', 'd0b77e43-85b1-5039-a38b-de2c1f1258de', '358d9f9d-e3ad-51c7-bc7d-9acce7798569', '65594789-bfbf-5b81-97bd-48fa3b9d7741', '2294c85f-c8a6-5579-abc0-4dddabbd803e', '0153b0dc-1dde-5746-93de-fe314d3cc2d3', 'cd910a1a-3f67-5bac-87c9-53d8118be985', '52e92ceb-1af0-57c8-9323-bba5501321af', '9469ce93-c661-556d-8e1a-cc29125234a5', 'f715ec45-f478-5365-a3ac-69901d371211', '9560a11b-37e3-5cc3-8e4f-270643d810bf', '970afee3-6e11-5679-a0fc-b1fa426e29b2', 'f9e19837-276a-5d8c-9592-7ccee8d6d97a', 'ef373931-fb0f-5f5b-899a-bcc15a033fbc', '2685931d-8432-5725-8601-3bb2e2187eff', '4d519b11-c71f-5202-a960-c1c419eba8ad', 'bee7f51a-18a7-5964-bb59-6d1b145cc757', '97202bbb-7f7d-590c-81c6-bf25ea107fd8', 'ec0bd63a-31af-5ba8-ac72-b32bd9cdfe7d', 'b8e4346d-8e4e-5d5f-8fd9-cb060fc87599', '43a49b4f-e20a-5745-a6bf-1453c4e00e96', 'ed60f16b-129e-53e8-bdc9-0d64eb9f34b3', 'f6ca122f-bb08-5942-88af-d3c42d3d1e97', 'd45ea7b2-7808-5728-9db6-f17bd5e21efc', 'aba1777f-0a35-54c6-8141-22b60404868b', '7a738bc6-ca0b-56b8-8a10-57fff131b07b', '47e973b8-5063-571b-9e01-1d6a41b45ebc', '2732fd51-4bef-57df-854c-fb6b1870a697', '2a17be12-dde5-5fb6-8292-b042689463d8', 'f610f497-ab2d-5e4d-9b72-3fd10879a48a', 'bb68d9f6-d69a-5b15-846f-dd5429ae9dc6', 'cd2f546c-4820-5ea1-9b81-76161f516122', '2edf8c23-4c0e-5e15-8370-01577ccbdc56', 'cc6db4f4-1da6-57bb-bb04-ed5b4bf3ed9c', '47d3da06-646f-5696-9add-a90760b8addc', '53eeb6ce-5547-5af2-8708-819a5f8512fa', '21e55933-905e-58bb-a67c-93f46019dde9', '251e30c1-7cf2-5fb3-9962-af8a79a2e43b', '93da78db-2a2b-574b-8beb-6993cd158cfb', '6dedbc7a-38e7-58ba-bde9-85d733d04014', 'ccf03e46-2788-5bc9-88bd-c3fd46193da4', 'cd9995bc-c2a6-509f-8035-284c8f892093', '06bd5b5d-1d31-559d-9251-7bdbb269cfd4', '4f31d3ae-b583-525b-bc8c-87475f5727d8', 'bfb17a7a-b726-5275-99d7-121ac9faaf21', '926e6e6d-d034-5316-9bf2-f31a1caad165', '61d32eb5-429e-53e5-bcc7-2e3873cb9a5f', '3422b7fc-9eab-5017-aa07-5337e6e8c040', 'f10b71ad-a114-5bbc-a51a-7ccb9e303372', '569040d2-ced0-5e47-8609-4a754adacd88', 'ce78feae-ebbf-500e-b778-239d504a9c2a', '82038f50-24dc-5588-a913-839d6f9ad387', '24c5e735-d8ff-5e53-bb17-d026a4faf63b', 'f820f218-fda3-5fa3-ab78-85448ef5b831', 'eef993e8-af78-56c6-9d85-6cd46ceafd36', '44a917d5-3b7f-5910-a434-a01114fdd703', 'f698f67e-1805-58d7-8998-d8a181de87d2', '6f16588c-8ae4-5140-892a-47401662a398', 'a99aa147-504f-5547-ab82-29b340fe0519', '2dae2254-c092-529d-b04c-a9600723ad69', '71990920-a671-5f23-8f89-ab83883d5803', '484bda19-4cef-5e90-972d-8c24a2421beb', 'efe43566-a409-5419-aef4-18f51b3763b5', '650ed5fe-d1d9-5200-9aa7-6f1809677711', '386f3318-0bea-57ce-aed8-a2c902102456', 'fecfddad-e021-5ba0-9a3c-0196b007ae00', 'd15a8a97-c4f6-58d5-bb5a-9099e27cf031', '38d9ebd2-ff83-5142-bae7-6b096243f982', '46a9c86e-e18f-5d2c-abd2-963feb3ca821', 'b1a7ca0a-b1a1-5e05-95fd-56070cf022ea', '75801af2-d21c-5646-bfa3-93b7e78cdb53', '21c02271-e8b5-5d87-9e18-4c3ce9b96cfa', '5564cff7-fcf4-530e-92db-77968103b02d', '18fd72a7-f69c-51a8-9393-2e188fa7e58f', '12052fa7-8f36-58d1-8541-afd82e9ad89a', '03604ebe-a322-52cb-b48a-77eda32ff24c', '492eb81f-eed5-5df5-bfc2-bbfe9e7a0dfe', 'b6284465-e4ec-5252-ac00-d0e136ece4ab', '1b6c319f-140a-526a-913c-77f6ff4ae7d0', '53e949f9-80a5-5f31-bae8-10174d87b24a', '68d4292b-992c-5e27-870d-8a1b46a8904c', 'c30ba092-3a54-5a08-b807-124be3e97730', '380b62a7-e85d-5288-b9c6-43b447490188', '04005334-a5d6-5e49-8d2c-eed05e1d3f24', '79df0567-5b9e-5dd0-8f72-da85c069a444', '8d19a40a-9454-50ca-a5c5-eb03ad152fb2', '1e82b21c-a7d1-521e-a6f3-a85015218278', 'd3f62dee-8fc6-5fef-bb01-8eeba7600439', 'cfcd3a33-17b5-5e8a-b56d-0d4731eeb9a1', '8b8fbfec-b265-5919-9fb0-501b489ae48c', '2011f182-1c2b-5362-8309-ca793d265995', '62cff3c8-437e-58b9-982b-cc97dcfdf615', '4beaac2e-e990-5378-9a0b-60bc04c2e5c6', '75cc3e24-2207-59e0-8093-57d1431f75ff', '46bbba72-af30-521e-97df-9aeab75be089', '16387bf6-5a50-57bd-8992-39435761afc2', '4d8eacf8-5854-5138-8336-a679d30bfe2b', 'a63d7b90-5d53-53c9-b389-d67d08f4be05', '3d15f3dd-cb9b-5c78-91ad-ec137640a77e', '7675ba23-666f-571a-9206-db19b88fdb4b', 'a7a4ddd2-3feb-53ef-b781-414eaaea9101', '775154fb-f042-55c4-9bcf-2995ea111637', '2b5087f7-2b07-5588-b4b9-e161ea6c06aa', 'a1005f6e-5ea9-5dc5-9438-90b91966d9db', 'c69da630-68ff-5d68-9a3c-a797f590b60a', '74c48445-c21e-565a-9cb4-69b8851f56d2', '8d284dd2-ba2d-5005-b43b-0a6224f7d19d', 'd01fbb92-48aa-5e91-b09d-d0d70ef798a0', '5fe46c3b-e397-5b7d-8e46-b3fdefe338a3', '16751184-da2a-5e53-b6a9-bdf99e7d4844', '52c96b50-57aa-5831-8594-615838dbabd9', '7a78a061-0e42-501d-bfba-7de359e419cf', '51f3c373-99ca-50d1-a6f6-8f1e69f9fd4b', 'ba1b3cc9-66ed-59ff-aa90-24f2ef7fe4e7', '74abf29e-e433-5159-8fe6-9afda430dbf5', '2c415dbb-8202-5f61-b161-6de0a85fb56a', '5d1e1d87-f4f6-5571-a7c5-961ae3246f09', '6d917d2b-ad63-50ee-84e4-d64523531e3a', 'e38393a0-2ee5-53a0-96dc-c5f04aef4b90', '98d98f52-09c3-5a07-9773-4f5a13274253', '08ce33ee-7c99-54f9-a922-e1be91d1640a', '8f6ce4ef-42d8-5a3d-ad24-bd28d9440f94', '46003927-5346-526a-9b2b-ef91a890c53b', '6b95dee3-60e2-5fac-b272-9368416ee59c', 'ccb60ae6-864e-5fbc-a4c8-fafb557164ba', '17c32cf1-32e9-5e32-a787-252a78950e4a', '6aba5ab3-a8b7-5079-acfe-955b4630a171', 'b02a3602-48a1-5116-b7d7-5f8827fa6df0', '99161c3e-fe28-5db1-b348-c926db0326df', '97edaf88-e718-5f61-9cda-27f5dd4004c4', 'd59169d7-b1fa-540f-806b-f429e01fde61', '06a67c8e-2478-5c6e-a1ab-fb58bb9fc08c', '397f3ef7-5e6c-5839-a1e3-86fd9d0d523e', 'a5395ea7-49ac-5e24-95f2-1ad0731c8220', '9556d58d-dcd0-5271-b7c5-9150e300b9b4', '8637079f-22e9-5a58-a4d4-a8e267f2c7f4', 'b9763211-212b-5e55-86ce-6a575c4c89a7', 'd0e548b3-e19a-5822-b2c2-7f9b42b19e16', 'ade56de4-3428-52fa-8c4b-53331fe5cb58', '744b52ee-b6a6-55ef-871c-06332f6ae8fc', '71d49e70-267f-504a-a3d6-956f739ac337', '4dd2f839-65ef-5f83-bbbe-3b9253de8ac4', 'd5a080f2-5285-5b6e-abba-f3bbeafbb5ce', '0159a4c1-59c9-5b4c-a934-2a159b081137', '876862f2-ee12-5baf-9b78-8242a3243863', 'acfc8521-d315-5b9c-b47a-17735f15cfae', '6062943e-c711-56d0-91ba-260fcd01d867', 'd6d7c54c-be5f-5bd3-b9d8-29237c0e3ba5', '7d04b93b-b03b-5263-826d-9b1c98a7eabe', 'f60c63a8-7a9d-579d-a775-a74d76ec1b06', 'c1f58c3e-fda3-5622-bc67-61833dfd525d', '16ad8370-2213-50ee-a4cc-5e7fa6c194a0', 'b0474355-37d5-5d65-9b20-355ca54b2b19', '8afdd58d-5039-5207-b3ec-f087612aa885', '9a2569c9-a6bc-5e60-83df-4d906270ac4d', 'f64d4913-bedb-5a17-b85a-51ec2b5ea8f1', '0ddee050-9be6-55b3-ad1c-9ae17baceca9', '5675de68-3e6d-5779-a542-b5b9062896b7', '1eb3f4b5-7570-545e-b40c-da03b30326a7', '2a5fd18f-0330-5a10-96e9-0c2536ef47eb', 'dd099f33-42c8-5074-ba1e-43825f208495', '3d83a8c0-11ba-5e9b-973b-7eaa19c316da', '630fa6d8-dade-5754-9dbd-9418a8fb0f68', 'c3bae986-5931-534a-a852-6407c6d7b716', 'f7204103-1f0c-5577-8059-ae9780a86bcd', '69e3187d-b765-5aeb-a324-de0e0276c4c9', 'a1b7dd7a-dd42-5e1b-9631-6cbdd75a7599', 'c311395e-f8ac-57d1-b29b-c345aca23bfb', 'd555328c-1dc8-5f9a-a028-1028e0bd8e11', 'e2f4863d-a950-527f-8041-62b82f093957', '8f204501-d7cd-5310-9997-42610f7b4710', 'ed1f0734-8191-564a-904d-3bd4b7eb9a67', 'd8789cfd-9237-568b-8834-11bb68cabb4b', '5ed125b1-10eb-541b-81be-9695f3a9ec10', 'a6ac24d3-47e1-5e91-9b46-5e2a114b576f', '82ebe633-3c6d-5789-9f48-a9c83641879f', 'b659db3b-d3c2-5cad-9bf1-3e1ffe937e32', 'cda6419b-ece9-5f99-82bc-c59daadc831c', '9d90637d-8605-54c5-8b68-94a46d73499c', '16352313-c821-51e4-987e-3e05db462360', '1212b172-14d9-5533-a26a-e3c92af2c038', '3f877f01-b59c-595c-bd2c-499fd224c7ff', '01cdc575-63d4-591e-a1d2-99311471a609', '8a9a30d0-ff0c-51d8-a4f3-d0edcba86cfc', 'af1d71bb-9db2-5880-9693-425a895fb616', 'ecee23fe-3aec-512d-96ff-2bf4c4d9056f', '82c4c908-1497-5252-be2f-b5078973394f', '14e35d16-fbbe-52ed-824d-e438db4c265d', '339f5340-e91d-5795-9b3b-9b6383e0d9ec', '1ae2bfdc-1117-5fdb-987f-441449b89549', '83103cd5-57f1-500e-adbd-954257e0192a', 'a45718d0-b9eb-53fd-8eae-8c1e97d9211b', 'bd225f38-bfd4-5164-9334-f3a3fea00372', 'af358070-e321-5a80-84ed-afb147f3ef00', '8817985e-5da3-55ea-b58e-b19cf5e8dfbf', '1b9d4b56-3ab1-5d6a-93d8-90d8b95d3689', '2b673923-f0d3-5357-810e-5354be4e4460', 'd6f6ebe4-4250-5cda-a97e-445d898a2c1f', '9e9e7dda-9275-5103-84d3-f2992e475a42', '73ccc58e-e5b4-5e0e-ae59-91990493ede8', 'f7a90819-52ef-55bc-a5c0-f1720bba1a63', '22d2fe1f-dec9-535c-afba-16e762383827', 'ef2a2afc-0464-5587-8b63-b3543733a857', 'e58c1514-48d5-5c4f-b4a5-86ea19a313e1', 'd8d2cd11-4bba-5c6a-9d4e-4d9af00cdcf4', 'c55ef80a-8c6a-5a5e-8a2a-0424618b84df', 'd90b56b7-de24-5c76-be3a-f76bbafb46a9', 'd0bc72b7-cfaf-57a3-991e-381c2ea446bb', '4c36f957-8ee0-5dde-9931-b27221cc96e8', 'ca76ef85-1a68-53a2-8db5-c2093e2586d3', 'b5009e83-ffba-5b49-8c82-91f287cba176', '4949d503-0f5e-569d-ac3d-9ec41706203a', '5b533623-9ee6-5f45-aade-0c523aaa3aff', '738df936-610c-5435-93ff-a0e54144e068', 'abc5315e-1b53-55f3-9df7-7c68c4b14a25', 'f05dfab3-392a-55f0-a966-a0b7b7b8aaa1', '964eddd4-485c-5a1c-b72a-2225dd73fa42', 'bf23217d-f6ea-57b0-86a9-f0f7e5954d0d', '2628fd40-aed0-50e2-9742-68b66fd78e21', 'e1a0320a-0a90-5282-959f-5ec4d6e1347d', 'd87655ff-d023-55f0-b238-298477de91ff', '785dc7d9-0fb8-595e-a563-836915a5d820', 'a1b3753c-b781-5b36-8230-fce288e7cd32', 'bd270409-4f8f-5317-a80c-b066a5613e04', '0f88003d-8e9d-5af0-b8e6-2c4bf371d56e', '0e1775d3-35c1-5bef-a7bb-918c1c3f4363', 'a3a560f8-e1c6-58c2-bd3a-a01cfc2e03ab', 'ff674f94-39e8-5032-be3d-fa6bfe94e7fb', 'd54ac264-a966-5e21-aae3-c4b4f83cf86e', '6d76f873-eb16-5c03-be93-1a131c432436', '7dc055a6-9992-5187-92d6-ede49b596047', 'd0941ac3-2ef5-5cfd-bf28-2e14f306cf73', 'f5661669-1b90-5cf2-abdb-0207b66c332a', 'dab857a1-9120-58ba-bc58-e5050220c05d', '0d5adc03-d9e2-522a-80fb-15e97266ade6', 'b3ab6bb4-5ae6-5631-9b31-50e7d3965dec', 'b0cbe79b-16e1-5cef-bea9-11423d47f9d3', '597c20f1-3c5c-5853-a7c8-d5265b2b025a', '466be16e-b6f4-5727-9050-8e22bc743fb2', 'af5f3576-a4d9-5b21-b574-a88a2e39699e', '96faf77f-da52-5f8b-9cc5-b968735589e1', 'fd9c4f35-a09e-534e-8cb0-14bc676df1c4', 'b05edd31-b5be-5019-bd63-41896c032628', 'c58a7e00-3de5-5f82-bd92-d81a3e9e8fe9', 'e3a132c5-e2d5-509e-a2e1-f60686a7a1eb', '06409265-2963-537c-be6c-7cf2e3771fad', '5a81accd-dd75-524f-be60-e76954e4031f', 'dc8872c0-3b1a-5e1e-b931-d1137e6e1243', '2ac16e76-31c4-50c0-af0e-239bcdb64dd0', 'e3f22909-f51e-5a0d-a638-036dd8c99449', '3c0e0cdc-f7b7-5907-9f15-df8d953cbc7b', '79a4b506-da79-57a5-b2c6-547e62ebb64d', 'bd8d9037-6295-5d3c-8bf6-d077d3043bae', 'b9fdec43-ee81-50b7-b7e5-9c2d23ded7bd', 'a2a5a29a-5134-5453-ba6f-ef23ab295272', '93b33628-d179-5d3f-9e1d-987b85d5e2ab', 'cf7e822a-250f-5da5-9fcf-6e1ede6921fe', '064c4aba-d49e-52d6-9179-e1b94b4fe26d', 'ae2f3196-78d0-5f12-8314-f10456b4e076', '93e4ccd5-a700-550e-aa02-f215ad6b3d68', 'a50e79de-e4d9-5bf2-8b23-09d70e05a34a', '58ec6a13-854a-57ad-8059-927ef21e523f', 'c8ddea4f-a75e-5c21-bec2-24ae32afd476', 'f8922c23-8fdc-5d66-92ea-d0b41f492fcd', '72289fd9-f63c-5204-9a04-6978d46b7402', '9de1e65c-c81c-5cb4-a5d4-52d51521acc2', 'ec752e88-3b7a-5595-8f89-402ccf60dd5e', 'af97a3ae-498b-572f-b21a-e74ceb5dccc8', 'c36bd579-039e-5642-8773-7acb5461f2f0', '1941a65e-de17-5897-a3f8-508cb8c9d64a', '1f109fb7-21a3-51b6-b450-a18bddb510b7', '8d2ff6d1-f17b-5d1d-933b-90ba287a96c1', '41293643-a25e-52c3-b471-8ef0f060111c', 'f3fec255-b83e-5597-98d1-096ae10a2df9', 'e4865b61-8903-533e-88d8-e43b5319be6c', 'ad5c49a8-12b5-5580-b2b3-89d60c8dffe4', 'ab61b791-26d4-5394-a2eb-d6397d466671', '0d1c5ea9-2d30-5647-b605-c06b8a95e82a', '72254b4f-cfc7-5a25-89a2-c7c3d1ec1a2d', '17071c41-a1f5-5113-9a6b-4d5dd8afd2ba', '2f369c0d-5380-5945-a5e2-6dfe985bce12', 'ff449849-5cd2-5df6-8f0e-5960f11a9a7e', '10462c84-693e-5386-8077-b2dbe8f7eb90', 'e384fd1a-5255-568a-a1cb-ca4bc519c4a3', '25f5285d-b3a3-5f66-aae3-4904922d305f', '331d1101-1f18-5337-afb1-9cca04d09ed6', '9b3ea273-a199-5ded-97ea-0726aa930eff', '08c764fe-89c1-5a5e-8e64-1a6b5a9dd9f8', 'fdcb1d2a-5978-5f44-bfb3-defb9b92d818', 'b0716970-c0c4-5887-b276-f2f3d44986a7', '97c9eb49-1fde-5716-9b3c-dace128c85e2', 'e8f7262d-d54b-5dd3-bc53-9613bd0f85c6', '51a30cfd-640b-5e6d-b99a-09a11db2e45c', '6f7fdef6-886f-5091-ab91-0864edc2f524', '43484d6e-04e9-514a-9d9d-9a1b5faca5f9', 'a5cdcfab-d781-5fa0-8c31-111183533774', 'e82482d0-bc8d-534d-a0a4-1346388536d3', 'e7514c8a-354b-5dce-b40b-498c78174a18', 'b7610883-788f-58ea-a757-7b13c3a0f7bc', 'e02e192c-6f5e-5eba-907a-025fad40e81f', '2a4c971c-46e5-50b0-b69d-a958a8cc8d02', 'fbccd2c1-3b08-5cda-af19-2ff1e34ea92b', '88e33c65-d4d2-54a6-afa7-b4137c76c36e', '6edb3f25-934c-5b1b-a819-b985a5022925', 'f1697c07-7f0a-562c-80ab-a9e67790de52', '4b024ec6-eeee-5202-a7ba-e1918718b4b8', '48ed332b-f89a-5339-b59e-b639b4b38c1e', 'c63a5c89-043a-5a0e-82d9-4bbc20b00203', '19f65e99-04d1-5807-b754-64e80aa5aaca', '70b7842f-8adf-5082-87c2-7ee2e59d8052', '25283d1c-51ed-5df9-99fc-5400ccdb3815', '0306b8a3-910d-5ccf-a7a9-ad784d6d2c7e', '2997e2e3-a8ae-5460-b306-c86340c5df1c', '850093fa-46fa-5bb9-a612-ef763fc402dc', '44c6c509-88fa-541e-993f-ccc68a29d092', 'd5c40627-ceca-5b6e-bae8-cd7ac7adc1be', '8040e5eb-4512-52f5-ade8-8ffbaf8a7abd', '0bc736fb-ba78-52a1-8bb5-d630953ca344', 'ef98c692-7905-5b5c-b884-82194b482e1a', 'bea75087-cd23-5680-9552-887b5bd10030', '1d2a41de-ed74-54fc-a079-23ac15d7cae0', '5f0c4266-ff1b-5436-a385-e76164645299', '645079ac-fc0c-5e42-a736-3ced86d164fa', '7c25f564-b679-5c33-a1cf-256054a89db3', '6afde0fd-404d-57df-b9d7-6b4a183ed838', '5db5145e-3030-5062-9ad1-0263eb8ea995', '3b7dfed9-e382-51d2-a146-6e1466cdcc45', '4453a35e-9c36-5b83-afff-3b85b687dd25', '4e444e79-942b-51c8-81bb-12363a01de5c', 'ebf98939-1845-50b5-af7e-25a98525ba1c', '629d9de0-ae5c-5625-9585-4f277822a181', 'af9acfc8-d898-5bbe-b3f8-dbabc93f8897', 'b0974b62-dd7e-5b34-aa56-b897508675d5', 'c35c10ab-20ea-5bf0-bc1d-891a0b8538fa', '99ee593b-a04a-5139-8dec-f29a77cb2492', 'e77c94fb-4774-5466-80ff-509f5bce1b62', '24eccc29-3383-53ab-a520-0f3235c7c22f', 'be9baa21-8962-5d1a-980a-02e025e15523', 'abec7413-4afd-528e-8da0-7d0c1aff8c7f', '6ec250da-4626-587e-910b-33dd6af31b50', 'be966c8d-62c6-5415-bf5f-b1809cf05978', '85622e77-546b-57a8-883d-d3ab64105c89', '0aea4846-1af0-57df-b404-669929f2262f', 'ed26255a-096c-50b4-9f0a-e9c5050179e8', '8522d1c1-010c-5933-89d8-732be361a64d', 'ab5c7d87-13fe-53e3-ba48-77dabac31238', '3f4c53f2-32dd-54c2-bcab-eaf3a0b41c53', 'ece50193-7c8c-5524-b499-cc2579bd3749', '6fd6b8e6-ab83-5670-bb94-96a2518eecfe', 'b1debe65-e0ab-5a45-aeb3-15b429efbabd', 'dcb0fe34-0b8c-5b62-87bf-368bcd3c06e3', '991b5e32-db72-5b10-96d2-481d0884dbc8', '083eb778-941a-55ae-8ff0-d001a8f5d24b', '9007a005-f632-5f29-a055-53e308eae518', '1ddaaf94-26cc-5716-a7c8-c8cb5b355fea', 'e70735b2-2329-5b7e-bb2a-9bbf74c2eba0', '1089fe10-670d-5074-b71c-720cb80bb344', '0febda2c-50ca-5cde-9a28-be02cd59de4d', '55a12a05-8ae8-59d4-8f53-584799bcff7c', '0c36c4f5-ce34-519a-af6b-2787a8e76c76', '25c5761f-25eb-50ca-82dd-9e865ca6a2e1', '33e4722c-74b0-5244-b2b3-343d99a94b46', 'dbbaf6de-3ba5-5537-a7f7-c413c52cb484', '6423e646-dcbe-5a6f-bb5d-8b10c8f2f9dd', 'b080d74f-096c-5ea8-b739-637ada73e20d', 'd3ecee99-e5d1-57d5-ae5e-32c95e99032f', '52795697-c4b5-56d1-803e-2dfe5eac6023', 'c80bf292-50eb-5d9a-a845-9a8f692cd949', '70d31eb0-5f4d-5982-a1e3-a3a6a14712f3', 'ac2ef3e5-974b-53d3-a594-e76850517ae8', '2b60be36-d471-5077-a513-490aeacf5c31', '75925cd7-f621-5dc2-8c84-284f5820a817', '91f07224-9370-502b-9e40-5a6de75de55c', 'a499a8de-2db3-5b2d-a3ef-3f0434732c1d', '428e773a-4ee9-5b6b-bb54-ef814f5995cb', '7923898b-8846-5af4-b7f6-8dfda20e36cc', 'ac175031-cede-50a7-b482-ebcfea437a9e', '8ec1f2ce-9ff8-50a3-97a8-d15cc4e212a0', '75b9149a-9a4b-5852-a44d-faf1fda0558d', 'b9986079-4813-58a8-af52-baf78c2ee57d', '0e884491-7f85-5cc9-9c50-1b294ad55592', '416952e1-066e-5113-8ae1-8878c880b6ca', 'd789067b-ab94-5981-9bfd-54daf4bb58f4', '8ae3defb-1c65-50b6-8dc5-8c2147a6bc81', '21757513-77b5-5328-b7f1-6146fcca5bfd', 'c9e42e34-5629-5349-af6e-b8f2e018882c', '43771d69-0095-5aa5-8c8d-25e7bf0adf65', 'fa896969-6a63-5acc-9b4f-9f120fbb9035', 'e9ffdee0-3d39-574d-b38f-dcbafe6ac6c9', '3a9e9702-3cf4-5864-bfb5-bf9199292bf2', '11e27a41-935b-5e2f-b4c7-fe389c9edf41', '08f0929c-12bc-5bff-b690-11a5bb92aa80', '57159532-dbae-5356-a769-443e8760cb65', '3ef0070e-9ae5-5876-8648-dbbef298fecb', 'dba5a487-0404-5c71-a110-f11c6b9cb9c5', '9e8dbade-e3b5-5497-ad3a-014caff3b8fa', '7a4874a7-5d15-5ea0-9454-15f1773476b2', '9cd002af-cfdf-5bb7-85d4-4d09b500e944', '5e7055d1-0faa-5148-ade2-5df753841e49', '912b5457-5932-5daa-b246-0461e42e3bde', '4f8eb9aa-b380-5d4c-aded-eefe8997b5a7', 'bc1412ec-7692-5aa4-825f-474d4f7fafc7', '4becc104-fbdb-50f0-bfbf-8f770aec9f41', 'f064bdf2-56e8-5c5b-bf45-1204573908d8', '529c7d4b-e914-5b09-bfdd-1c1d658fdd09', '9f839fd5-8d31-5eb6-a6e8-cd47ba45a13a', '2f673e24-940c-547c-91a2-f8916114b65e', '201f3b2e-acef-51f3-b1fb-35d2a352e2d0', 'a6b9e1b0-cf2d-5b56-8159-2de9b8ca4d8c', 'b67733e3-1cb4-51ae-91f4-b28ffb607dcb', '5de74df5-b7bc-5109-8879-3328aad9af99', 'c01813bb-44a8-5e5b-abea-fd8d56dc3bee', 'dec16174-b214-5c86-976f-1ee1d78425c7', '98470f36-d086-56e9-81b2-45461a926211', 'fdd4eaf6-d9ba-5166-a58b-72f69305b14f', '3fb1df76-56f3-58c1-83cc-c91a143e5329', 'a6cae0ee-c49b-520b-9175-916a07c71343', 'c2365230-b4aa-5644-a5fb-1329f8da911b', '306c08bc-4bea-5dc7-893a-98c631837d16', '13f0032d-9568-570f-9fb0-574b7148cbbe', '30db33c2-4940-554f-b719-e46bae419776', '3b02ee14-8b7d-537d-bdac-a19db7ff8251', 'e7d62bac-8a46-52fc-8b1b-3fc1d9a6aa7b', 'ec4f79b9-9b79-554b-8da4-dbb317f4407f', 'a24d44c2-46de-5230-839e-ffa05af05eab', '51778625-9671-5c28-95a7-4dcec4c753dc', '699193e5-e896-598e-9b30-0feb8ca54633', 'd7292f8e-8a15-5e0a-9882-c7d82163fece', 'ecd63ece-87b8-5e26-9480-821c1ff85272', '8ba4e0e7-ca7e-52ce-98a5-97e539100fc7', '6d570a21-fc4d-555b-938f-7b44d91b74a4', 'eadc377c-16e1-529c-aedf-11023aa92fbe', 'a2e7af35-0e68-5dbd-9135-ee2cdee05820', '5f7d5dcc-5917-5f98-bfdb-9822470589f2', 'e2775746-9ebb-59f7-aabd-e18043010fe4', '648b323e-75be-571d-8f72-6cbb7a1354a6', '5c1bb764-0406-5cc2-9e7a-0de8bc7ec561', '10ace9c8-bb2a-5805-9e7b-c8185f324625', '5258cfb7-cb7c-594e-957d-e01853e90539', '233bc027-f6d8-5b5b-994c-4e5978e1384b', '7e03311f-d039-5ead-a1a3-2dc04b354a99', '5b29b96f-b533-5300-b744-4a2879037171', '6f06282f-2ca0-53db-97c4-5c825d7f60d0', 'eb6b57f7-4105-5fbf-864f-3f116f2cbae8', '48f4e477-f11e-56fd-8e89-e1031be45700', '1bde02c3-63cb-5267-be4b-77302cfee3bd', '98855047-bc15-5ec7-9535-7ad5a427b448', 'fd004088-f420-5bb2-bbab-131bd93f7dd4', '3fc3e74d-552e-5765-a3e7-120406b61d86', '175e97de-1cdc-5b28-9102-4fc2c45aa194', '96c61f3b-d6c5-5997-805d-ac305438ac4f', 'c5942434-bb5b-5906-8045-fb86ab415fbd', 'a7411a3e-0733-5c71-9465-1538678ac28c', '750913f4-31a3-50e2-8226-dd0b68ebf40d', '63503db4-7ed0-58f5-a47c-f337b7af157e', '4250bbed-35f3-5827-a5b0-3866f0162878', 'c6e4b1e2-0fc3-5261-a985-9fec058aa5a0', 'f3e22ff6-d773-54b0-bcf5-dd636821b494', '88117371-c8db-5b38-8761-810aaed8292f', 'bee7d3bc-7c9f-51ef-99a6-4f8303bf9129', '446afb6d-9772-501f-a9db-9759d9831248', 'fde2ae17-3c33-59d0-9970-5b8b31e8f1f6', 'a758987b-3c42-552f-9c59-2a8967c0b669', '7585bd19-a38a-50dc-9011-3f05756a98e9', 'b2ec1fad-0c48-5441-956d-eb311bc0b91e', '1539e8e4-430c-5342-ba06-4b0a50cbb38b', '1c48dad1-d640-52bc-8812-10d0b49cb16c', '65e3d8b0-3951-5db2-b1a6-10397f59498a', 'aa48dd86-53f3-59a0-9036-671d7bf8be70', 'ac03af19-8e15-530a-83e0-7f84652e765c', 'd81bbc46-1a93-55fd-bdc2-bd700b70e5ec', '4c4de136-1f46-5d5a-a1d3-aaf04174c608', 'f143c6f5-53bd-5dcb-91a5-97a92d089d3c', 'bca39fe5-81d3-5c55-8dbc-c210f92ca4cf', '9e7a6998-7e66-5b38-9a7b-9df37565d01b', '96d58b2d-7322-56df-a0a6-78f40a4402e8', '961f3629-c098-537c-95a9-13687f0af327', 'a76e1f59-1b03-50bd-91bc-28febccf5d4b', '3beb20dd-2913-5f88-b103-bf02578185c5', '0f00029d-ad4b-5566-bbef-7eeff49b3288', 'c51a2fee-7a03-5933-bb76-2de5eccc11a8', '6a670914-bee0-58d7-b7e5-11a229202601', '4c33a0a6-64ce-5b3d-b3d2-2c3863bcf71e', 'b3cba3d3-89ef-5f58-b63f-53cd65f22b52', '3ac0d255-6eef-5217-b0da-e70f55c3d802', '666ad901-9607-5720-9b62-17b0a681fc04', 'e2dae33f-c12e-5d3c-b31e-3981292648fe', '39c7da13-0fc6-5e11-99d0-19195e2a91bf', 'a5389d67-d3a0-5b03-830d-53e2b1bd9960', 'a64cfce5-f09c-5ea4-ae64-22f212518ec6', '13c1615e-6076-556c-a1d0-739ebf1edae4', '4f99d60b-635f-5796-a41e-eec5181f05cc', 'ba630435-7fb3-5c07-ae62-e026f4ead29b', '3ad2466b-5a0c-579e-8f00-4f007bbde686', '8fe8cdf3-c7b9-5d23-9eec-72b7e7e76eb0', '00b4f339-d6a9-5ffe-9005-61af5e55b0e7', '7b5db3e5-42d2-5692-bb96-9021eec3f484', 'd6ed4684-c41f-5a3e-83c5-fa4f560962bd', 'c8519dac-5e6b-5d4b-8a5e-52eba32d3f46', '99b9fd2c-f811-5b8b-9957-c8aca7641ff8', '5d9c39be-5499-502a-9f76-a64f7eb42cf5', '2048ca98-d1a0-5daf-bfae-630faf6aeabc', 'edda19ca-aac3-5f84-8cde-9cee8ce9d1dc', '3e70a2dd-3c86-58a9-a4db-06681e00a60b', '6e152cc3-08b0-59f8-9099-205bddd377a8', '475dfa14-9705-5f81-9c9d-2deab21171c1', 'e14fe5b0-11fb-5c4a-a246-6c3e0294b3af', '663d9f02-f979-59b2-acd9-2466e35ab963', '4752b3f5-579b-5afe-b037-01d542cafa41', '257ab63c-b6c2-55b4-9db0-5923207ac865', '1a92e736-7090-50c2-991c-5ce6333b56c6', 'f3d3912a-aed3-52d1-bafc-7d8d95dfce5c', 'b433fc59-c036-5659-badd-cd497b219d30', 'b1615b9d-a09f-5af6-8c4f-3760866a153e', '74594c0b-0eb7-5b6c-abe9-236e7e57cedb', 'd8f89cb8-e805-5d40-b23c-8a4f59930abb', '3ae2b092-4ce8-5024-8e1f-cbfa4b778fe1', 'cc1b9172-c09c-5886-b0e2-a3a7115a6e47', 'bb100088-b13f-50e8-b7e5-e52f1399f765', 'd0c56423-04d7-5642-9ec5-ea4928466218', 'c5c46bd3-22fd-5b80-807c-fde019c46628', 'c2ccd354-d3e1-53a6-8610-12bac3cf3398', 'ed37da0e-01d9-5f20-940c-b41d7f9bdb26', 'f418f9f1-848d-5827-b7f9-fa9d1b4a35b6', '508422c9-8d22-5006-8f77-5ed5a2a46699', 'e5064f93-48f3-50a4-96f2-9bb3f2d905da', 'aae1582f-f02b-5346-bcbd-a75d368889ba', 'd9e803f0-72e8-5ad4-a541-a5e3a802dbde', 'abe79ea2-74ef-522c-9340-4cf49da2944b', 'ad08506a-6bde-5859-9bc1-fc2c6f341a0b', '858384cd-75a6-5683-8d88-78f1143aecef', '9b37d03a-2b9b-5e37-96f6-f3516c9e388f', '76994a24-ad8e-583f-937b-362095999a46', 'cd03b212-14b0-59d9-9db4-b5070d21baf6', '46db54fa-3407-56d0-899c-192012d6f98d', 'd2e95ab1-fadf-5278-a1ba-33b1e11a9f50', 'fef2b01f-954f-5195-a93c-4e32352151fe', 'db951a29-500c-5f7b-8ede-ec1387a89b3e', '0217a76b-b6c3-552c-afc0-c0e4d0e6a72a', 'fd6990c3-1900-5252-8ae8-48b80f6d509e', 'bb847586-e73e-5c9d-9041-eb24aa12ae6d', '8da3d5c3-4a98-541d-809d-0867eb8bfb63', '3770269c-b072-56e2-9660-9f6b4981a0c9', 'c72888a5-f054-5ad1-8d1a-13f3e542ea69', '5e2efebb-3148-5674-9900-6b9fcc014ddf', '66c89619-3820-5ecd-989a-10c09689e19a', 'd8ebe6c4-202d-585f-80c4-b43b4dc48300', 'f3b72ec3-df83-575b-8076-2fb800e8b8a7', '015093bb-fac3-5fd3-8429-74cfde6c4c45', '083978da-d4a7-5f2b-bcec-429d02dec2ea', 'e3ff3001-550a-5dc2-8321-bd8d0bb7521b', 'fedd2d34-e8d7-5809-ab52-952a5d7eecab', 'd963d176-5953-5ccc-9d17-5caea5cf7231', '1d62db44-9988-53ef-8fa6-7d5abf943b11', '8246d27f-4c90-5430-9034-27944ae2dec6', 'f73af14c-1f0c-544b-8b30-a758c915c735', '9ada2bde-370a-5f41-867e-615d8f4e9ad0', '04770d14-b6f3-53dd-a2d1-92d2b56670a6', '8185446a-0be4-5c4b-a545-7f2367b95a30', '44aaf2b7-3b45-535f-81ca-ffe8d0c0ef57', '0ce74f97-75a6-5464-b456-7889d4c2334e', 'f7303f87-35ce-50e0-8ae9-530c6d9a2e81', '5bf607d1-5900-5dd9-b0c3-813af260ca9c', '012e55b9-e63a-509e-877c-b2fcc80d5a57', '7af984b3-1208-539c-98bb-b5e469b8bbe6', 'daf85a1a-d0ed-53ae-bef2-32860c394fcb', '5e32348e-25b2-5dc4-a478-d91e61a9e1f9', 'db1663dd-c046-5e4e-a857-040406506e20', '0728c6b3-f885-5e31-ae6d-4c3990d2ecae', 'a564aaa0-2385-5da7-822c-b47b2b05f581', 'fab99e13-aded-5698-a0f8-509b32457937', 'dfc48899-2f43-53db-8031-1e7b0b25fdbe', '28a10263-28fb-535a-9481-3d87a03bac53', '68f74769-9d91-5190-ba01-f9660a6db819', 'a63c2495-a2ad-510e-b035-f96d8fa71dbc', '3ff5eb67-e0c0-58e3-a045-75e31ea2c62a', '651e86c5-278f-5d27-a629-e60b4d3db3ca', '2d0d2911-9e00-5118-aabb-9ea2efa92f49', '011da8e6-3972-51d5-8965-8ab668be3156', '8de39282-d78b-5289-9411-ff5b9b57786d', '00b2cac3-9c18-52b1-bd58-f59458fe3717', '1640f511-60b2-5b8a-bf5b-877b2470806b', '7bdcdf29-689d-59d4-a287-d131bb4bc5c6', '986d0d36-bd11-5058-a362-3cb8f702ecd1', '43e132f1-901a-5464-a973-6f10b06a554a', '3a22a172-8492-5082-a93e-5b5a04b971e8', 'c5564cb5-d756-5b31-a601-09e7bb405197', '1439e7c1-9b65-51ed-9296-6c5087a1a58b', '588b5985-56f5-50b0-a1ec-879e07fdfb47', '125f54ce-43f8-5f3e-89c9-373214a4b92e', '65cd156b-8920-5da5-9bf0-db4d75f25674', 'b9e665e1-ceeb-5b0c-950e-db10bca0eac4', 'a1ad196f-24f5-5396-a215-e010ee2a139a', '7414fb05-d263-5c1b-b668-d15240c569dd', '6249ce1a-46eb-503a-a418-4c697bcddf42', '6c9891f9-6f7b-58a3-8090-6f24cede5d7d', 'a48ad23f-df39-50fd-bdf4-9c08e81d065e', '23701f0a-d8ca-544e-b7ac-54861bfc0253', '975833c0-da85-5cf4-8d47-0c98f4520592', '3927c352-fba6-56c1-9c6d-d6f29e62f473', '6bad99d5-11d6-5060-a03f-1f0718d6f691', '0796e64b-80ca-571b-95c2-676fd295fcf5', '9c2605c7-1180-5f52-9e6a-6e4cdea5f824', 'dd570987-d6ff-577c-b908-94c248485484', '9eaeb150-6d8e-556d-839d-f76930907282', '1b91abe2-c430-5a1a-a6e2-b00004b1ce6e', '02cc33e0-28e4-5713-8431-fe59c359a58a', '044abbfc-1807-5dd2-b48c-18a8db9eb689', '7cc84c9f-e0ec-51ec-b46b-a15dcb8dfb88', 'e78d8909-803c-5a44-b645-9aba65662dc1', '116ce2d9-4499-5261-b940-694477e85db0', '08a14c3f-c83c-5f62-980f-639078fc5c44', 'bcd68443-dff8-52e6-b8f0-9244b656cec3', '48925f63-d04b-5a7f-a3cb-d79b7f594db5', '8def6d69-c3b3-5111-816e-6435f93ae133', 'ea4667c1-e5dd-5df9-9556-25d901b6a1bb', '30d4e2d9-a0e1-546b-a23c-e58d9fc7e74b', 'c7b6a299-c2b8-5a99-be95-f50ebb06ed4a', 'bd588a5c-c9de-5a34-830c-aece8525711c', '6fcb2e17-3ccc-5992-afed-dcffcbde7553', '8dc5db05-986c-5ddc-a13a-b8a51b88f990', 'a491171d-26ad-5503-9a81-20847f7009d5', '8a6ee866-f999-5950-9155-37518ce02826', '01124067-92bf-55ce-93df-e0a87768f767', '2bbbc1ad-98cf-5c59-bf18-d813e28f899c', 'c75c44aa-15f6-5011-bbdf-edd3689537b6', '0b704049-c87b-5543-8603-9924d4eabfa2', '20308001-30e3-5445-b903-e24a809865e2', '4e84c2ff-0ea3-58b0-a3da-5508d165bab7', '948524f2-bb7b-5519-a314-d768c8b4796a', '56af683f-8c1b-57e8-9030-75a2e0d438b1', 'a3d0b905-7f7f-50ca-b927-1a14a325d7f4', 'c5499715-e10c-5f66-ab60-4c05602f1fa8', 'bb21fe3b-1816-5838-821e-366fd1f526e1', 'baf0b065-4aaa-5c7e-8cf9-82f1e2d3cde1', '93576c26-bb42-5779-ba83-e9a084786f73', '728f6d59-b158-5cd1-b002-8f5a9d5255d1', 'd968a67d-8b72-5e09-9894-a30cfe38d71d', '5668e7a6-3948-5c78-bdd8-5953ac28be5f', 'da26c2d9-389c-59e3-93ad-8ab22f55918a', '4cedad35-9b5e-5959-a72c-574a0ba823d9', '1d529e90-25cb-54c7-b811-769b042eb3cf', 'f6de3265-48d2-5d04-8c30-60e23abe2fcb', '62a26d3b-8e90-580b-b526-657a54f1764e', '3b5781ca-9bad-5bee-92a6-b87fd7392ce1', 'e0c97841-296f-5dd9-a0ef-0a2dab999ecc', '2165b223-af4d-5910-a147-320ea5d81481', 'f5a65949-e362-513e-b275-31b39199c35a', '0648596f-c162-5df8-a5d4-5d92d1cd0497', 'b54c05b9-6ade-54d0-805e-2fe8bd091609', '55b53396-dfb5-538f-9f73-0ba47685c704', '21efb66f-ae5b-53c2-8d72-82a9e27c4458', '920fe5ed-6c39-52d9-9276-f37afcb467d5', '68f05818-5050-5f6b-950e-33aad446b516', '50fb8544-9ed4-5ecd-bf02-e7d5a5081ac3', '6993f7fe-0471-5791-8d6c-64aa10eb32f6', '50c4d259-9799-59d0-bbf1-be5967e303d7', '4e32059a-9de7-5875-9c98-15a674b8fcad', '92245e7f-fb60-5f05-adcb-0ca1b233d4c9', '7befc334-bf46-5ecb-a500-fa79afb57f2c', '67f36bca-ae09-5cf8-878c-de996deb2a4d', 'f4e67e45-8c6d-5b74-a97a-60c15784f440', '491079f8-fd0a-5af9-ba67-bfd1e752f2b1', 'fe067cf0-f6ee-5175-978e-b9f5e972cf0b', '6149f0dd-bab8-5c93-a8e7-c14150089b3a', 'a627c973-702f-55fb-afd1-b360ae0694e1', 'eeb4b0b8-033b-5494-97e8-5db1ee6b0b1c', 'ceea2b1b-a27b-5d13-b024-c38b9f0d5595', '2a0b1e06-2145-55ce-a285-57edcb599bdf', '517c7f8b-91e9-5d65-bc80-88dd31c24f85', 'b1860bcf-4651-5ee4-91ac-2f31fa9fe632', 'e9907ae9-af95-5a8e-bd07-d7a89477bbdb', '9f94061b-e240-55a7-b447-6ad2b2f37a37', '75264b20-c439-5652-873f-371f9099c547', 'c9e4e191-d322-5e1b-9eea-97b24858acd0', '0d3e0bf6-867e-501e-9ecc-60f93190eef2', '8c4898d9-ecad-58ed-acf3-d7b3b9b31581', 'f4069bb5-73ef-53ce-ab1d-7e2161da4c75', 'f4daf5bc-9cfe-5c28-9efe-edf102bf0093', 'd80fae1c-21b9-5733-a832-90e130249c6b', '981505c5-c867-5ffa-bcfb-6066ed94ece4', '9f6bacc9-8579-521f-a39d-9d05f25f5edf', 'b7a6ee7e-851b-5bd9-9035-02f6f1551440', 'b950e8dd-e001-5110-9cd7-9cf864af7789', 'd8aad000-4cdc-56b5-bf3f-5d1d8cdd5ffd', '3b46544b-a940-5e69-a2bc-58b973aae8c3', '02edd0e3-9e16-5bf6-9f79-ab3a026ce761', '753850ba-730f-5697-a710-501ec16d0335', '694d5245-bbab-5ef3-a84b-3c12d8b1c1bf', '2567d0d2-1243-5c2b-8a42-feafeac58bd6', '0bda5d1e-8bb2-53f1-9ab0-1f340e892279', 'eb239c8a-c305-5df7-9895-f6551e418991', '1214e5bc-3c4c-5d88-acc4-d82956775407', 'b3050f17-36f6-5b09-862d-1db70e008162', 'f928c957-210b-5fa2-8f1f-8bca26a2b3a6', '026be3bc-affc-51f0-8c77-e12d7bf3a241', '1d30bc17-b386-5e9a-82f5-f9f867ada6fd', '4034a733-0f16-5c43-a66c-12e56cde6b7a', '46f2ccdd-053e-5b34-b9e3-ec4b4292ed34', '3683d324-66c3-5412-a2ef-aedcae20e694', 'f8eee170-1fe5-550b-8bcd-d5d81576af9c', '2401ae4f-edce-5e2a-9910-637ea7ea7b8a', 'f1a46ca6-2b10-5e6c-8ffe-00dea62b0d88', '67532848-e2d2-5099-9dd5-7f44b6b68cd7', '1cb4182b-90ec-57a1-9604-b5d261f563fb', 'c4f6f609-4b85-571f-8f43-0e3ccaf34238', 'c4667689-a368-5b78-9ecd-ee81874da958', '83f55dd0-a9b4-5787-8385-ee4130453907', '84a50a66-3ae9-5043-a30c-71a834e773ab', '6844e9ee-385a-594c-9020-348239766515', '79ce459b-a20f-59bc-a1c8-ccc4bbdd9c41', '79143582-25a4-50d0-a601-fade006aca82', '25611055-e9dc-5004-a74f-59cdc44ae7e2', 'b76fccdc-0e34-55ac-8137-60d912a1573f', 'b14e7b1b-be72-54ea-8f90-ee758a673705', '5c9c14b6-1924-5f91-b03d-ff3868e977a3', 'a8e25b2b-35a5-53e2-a10c-500ba5f32feb', 'a983c5be-dd82-506f-8a88-b1dfacc10dc9', '1c08d3e1-eb32-55e3-9e70-4ddc4ae0679e', '71e2d17b-eacf-59ce-8ecd-75d56cdc1389', '560406c6-1c9a-597e-bf2f-527014f23aa6', '2d83bbaf-676f-5c81-94b0-3c6e826cd4dd', '3251204d-5f0d-5082-ac40-7d1950d1155d', '5a5e0616-7dcf-5fa7-a458-75f5bea8240c', '830cadfc-d008-5e99-aa2c-52c4780e411d', '150a43e3-ee81-5991-bda6-0e069b6d7ab1', '93a9c971-cf71-50ca-8136-eaebc6bc3653', '0eb27823-077f-5e9f-a112-3cc4dee1ae62', '5bb7ef9e-54b3-5f39-aee6-3a7b736c1d64', '3cee2838-7eb2-5fa6-ad48-e548e92d2bc4', 'a8a4fe44-9146-5775-adf9-06907ec24a1a', '814bcb2b-6a13-5563-8f38-4581ba1ed4f0', '5f9b3287-4090-5f5c-8a31-103943eacc12', '88de7a6b-a6d2-571e-ac91-8cdb27082150', '849efb35-06cd-542c-9696-560134ae47c7', '45327273-f640-5be9-af76-93308362bc41', 'f06a7358-8426-5671-847a-2d14eb6d3b65', '98d189f3-433d-55a0-82fb-c846f9feb9af', '05285518-8e38-5d58-bbec-77abaa185cc8', 'b650b22f-7194-51c9-a98b-dd7d28462974', '8aabd9b0-d44b-51b1-8eaf-99a3b40fad00', 'dc9143b2-3094-5efd-b8b4-226c86ecd29f', 'f02d00a2-cbfd-59be-bee4-13140edb40d5', 'ddd0d236-19c3-57ba-9ffb-de639f006dd5', 'c38d6518-e6e0-522e-9d97-e304147e0afd', '7045afe1-8c7e-54b8-b8a3-34f097c4a68b', '3c269849-5025-5755-a914-d2de3bf601a1', '5eda907a-52bc-5787-a368-e9bfba54d2b9', 'cc0fffd5-0425-592d-888f-ed12b64dc782', '471dddf9-f4c9-5405-8e1a-d7946fcbb3a3', '02a1e1ea-eb05-5f17-a325-0d733a434d5b', 'df43830f-53e8-52b3-9afd-32020543aa25', '47544767-41fa-5752-a503-c2cae31a6a8e', '468a17c8-d52b-537c-9688-2b0183edd096', '1416ca25-a189-5369-bd26-0667ff97c5d7', 'b7260d93-d9c1-5e8d-a826-1cd6b7f79ca0', 'ba21bc0e-0239-5a68-a903-9d42758f79a7', '03f5e8ac-3ec9-5a68-b85d-3bb3f5109d4f', 'fca65bd6-9448-53cf-a289-089991f8c7e8', 'e312b496-39d1-51f2-bc5a-248d9a9bbc08', '2a407d43-95bb-588f-b2f5-cc1726c71546', '19e32cb4-9c83-52c4-a641-f26cf70d0cde', '6443efef-5a12-5eb3-9587-62db964dab93', '751d027f-5551-5ef3-b020-e93eea57ca87', '31cd49ce-f81c-5553-8849-4d4e9d9b5b64', '0dce4674-afb3-52d5-ba7f-070a2be9220b', 'cf7e49b3-799f-5dcc-9a4a-96e849df22a3', 'fe7fefc0-3f1e-5a99-9185-ce7eacd336f8', '6a979ee7-df18-516d-88da-8544dfedab74', '1cf31f5d-35d0-580c-b09e-e000df9b35bd', '6b649084-fc71-5853-a786-18e6dd199020', 'b222c4dd-56fd-5b15-9533-0a160277460c', '9da26b43-ec3d-5932-9058-4c98b45db456', '558abdae-ef1b-5f8e-8574-29ae1607d59d', '36630238-0121-577d-a435-ce3bb85738be', '3b39582a-3f15-54aa-86c9-e894fc673ebe', '2b980188-d9ed-5399-a35c-7d21acdb999f', '18a6a297-d7c8-55c1-9ccd-01cea1f6a510', '139a5ef1-b148-538f-87d6-3ac52b0de67d', '94d7c71c-f6f5-5b30-a85f-aeee4189abb9', 'ec074383-1c62-5023-beb9-30e1c10067ae', 'b59582e3-7839-530f-ad29-01fafe820a77', '41f3890c-5a34-515b-82ac-4a741f20fc25', '8386d689-979f-5efe-b45f-907fa76ef746', '1e3ae71b-909a-52a3-9e2f-58356f212cfc', '6728e33c-2caa-5831-9b91-17a3cd0f9d1e', '12a70cb5-7a2c-580f-bc4b-edf3caa052b2', '57febc4e-e76f-508a-931c-7307bd175cf5', 'bf820146-8237-559c-91af-03ffbcc0f2d2', 'd7b7b693-17b1-59b2-b23e-d543dbcb9350', '137c74a5-518c-51bd-9b5a-58f0ae8adb01', 'c036e824-3bfc-5a7e-9096-7da4b7ae3898', 'f59abb16-f6d0-56a3-bf48-362cd0e57306', '54d26350-8471-5c10-a8d7-c2d4f3822c4b', '0651d7d4-581b-5117-896a-350b135b4a81', 'af5bf434-1c43-5b52-826a-391154a00938', '31cf82e5-e621-501b-a15c-362b3c984b55', '2454f93e-ddc5-51d1-82b6-fbfd442c890d', '1ecd4308-0f05-53dd-acdf-25d3f9b3d27b', '58708f13-a0c7-51a7-bbe4-cff1ff089a8d', '1263cbd3-d8e9-53ee-8c99-c3325c7deeec', '10928f5d-0029-5ec9-be3a-49b73f148158', '3206e3ec-0699-535f-a95c-7cf85bfa91f5', '0165a2d5-2055-5535-80a5-8ad4ae36b537', '66c251ea-8e1d-56de-9c9e-369c8423e089', '8ac0638e-df6f-52c2-a764-52a536d6ce83', '9140caf1-210f-57bc-929d-018e37a80be4', '020429cb-af90-5b67-8743-149b73142549', '447f6125-a852-5422-af13-a7c48ea718d1', '3558494c-e30b-5de8-a3f6-f6ed7f8efd17', '905b3314-3644-5400-b152-2614458789c9', 'fb0896bc-6e41-59b5-9aa7-de58bbdae6e7', '9a1b6a66-7b03-55c3-bab2-50fe0613b0f1', '90618499-05b1-599e-8190-784d62b19544', '15fc0894-f705-54fe-a80a-9a77254bd238', '03edebe2-8219-50a2-8e20-31d9b8b2ef3c', '3433d408-55ed-5314-8f62-77391a3d5cf7', 'fa8367d9-e93b-565b-9804-fbc947ffa4ff', 'aead248e-817b-5c0a-82fa-bec833913a69', '162999e8-fc1c-5d69-83de-ab8fb968fbf2', '931a1a5d-62f0-5fa0-a9a3-41a7f3285e07', '28e14c35-2244-5207-b854-5ac300c02b3c', 'a1130451-73f4-5324-91af-3ab6b8ca98a0', 'd3b7fdd2-ef9b-5ac9-93f1-501f105ad96b', 'ed94e450-9d6c-5d32-9dcb-2b490a461cd3', 'a424803b-2c6f-5a92-a682-8a7e10993d89', 'c064dd5d-702b-5d2b-91a8-8a8a6412a9db', '63ddc5e9-2d3d-5bca-8f69-50f9891e128c', '56c9f3d2-3520-591c-bb4e-2deb5bf2f62c', 'f0bcbeff-9e09-5480-a2b2-fa3ab6bd887c', '231fc69e-d24a-5cfa-a629-a36de7ffbaba', 'c51cab4e-d0fe-51a3-96f1-f755d2ec1507', '1b08acef-720d-5293-9cc1-dc847c468d27', 'afbc6db3-4145-5749-85f7-e02dc14f07fb', '990ed297-d7b3-5465-a379-e5d4493364b9', '8c93a3ea-d655-5b1e-a274-b886cb76eff9', '0e180e93-7378-5dae-8fdb-97c8a4d30db6', '612dba6b-aabc-53c7-a08c-16651d2ee184', '45d95f9a-ab97-59a3-a36a-db3d87f73e9f', '946f870a-95c3-5c42-8ec6-db704939e4da', 'c2f7b541-98ec-55e6-bc46-0034b37d5dca', 'fab2b4e4-64ff-58d9-a28c-dd8e36030a54', '94a2a65e-cc40-5657-82fc-3294df6652dd', '5d3f0b88-1077-54bd-8f6b-86ae3184e048', '1098764e-5adf-583f-9c5d-91aa945d58a3', '8ff1880a-5f56-590d-9ffd-1257e503feb8', '75445039-c2a4-58d4-b4eb-4f141c641e25', '915167b5-b297-5d00-8804-2f3f1af988df', '423b2add-7135-5a54-8864-5e6365df5442'}

In [20]:
index(
    [doc1, doc2],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [21]:
index(
    [doc1, doc2],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 0}

In [22]:
index([], record_manager, vectorstore, cleanup="incremental", source_id_key="source")

{'num_added': 0, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [23]:
changed_doc_2 = Document(page_content="puppy", metadata={"source": "doggy.txt"})

In [24]:
index(
    [changed_doc_2],
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

{'num_added': 1, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 1}

## source

In [25]:
from langchain.text_splitter import CharacterTextSplitter

In [26]:
doc1 = Document(
    page_content="kitty kitty kitty kitty kitty", metadata={"source": "kitty.txt"}
)
doc2 = Document(page_content="doggy doggy the doggy", metadata={"source": "doggy.txt"})

In [30]:
new_docs = CharacterTextSplitter(
    separator="t", keep_separator=True, chunk_size=12, chunk_overlap=2
).split_documents([doc1, doc2])
new_docs

[Document(page_content='kitty kit', metadata={'source': 'kitty.txt'}),
 Document(page_content='tty kitty ki', metadata={'source': 'kitty.txt'}),
 Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),
 Document(page_content='doggy doggy', metadata={'source': 'doggy.txt'}),
 Document(page_content='the doggy', metadata={'source': 'doggy.txt'})]

In [31]:
_clear()

In [32]:
index(
    new_docs,
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

{'num_added': 5, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [33]:
changed_doggy_docs = [
    Document(page_content="woof woof", metadata={"source": "doggy.txt"}),
    Document(page_content="woof woof woof", metadata={"source": "doggy.txt"}),
]

In [34]:
index(
    changed_doggy_docs,
    record_manager,
    vectorstore,
    cleanup="incremental",
    source_id_key="source",
)

{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 2}

In [35]:
vectorstore.similarity_search("dog", k=30)

[Document(page_content='woof woof', metadata={'source': 'doggy.txt'}),
 Document(page_content='woof woof woof', metadata={'source': 'doggy.txt'}),
 Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),
 Document(page_content='hi there'),
 Document(page_content='tty kitty ki', metadata={'source': 'kitty.txt'}),
 Document(page_content='kitty kit', metadata={'source': 'kitty.txt'}),
 Document(page_content='harrison worked at kensho')]

## loader

In [36]:
from langchain.document_loaders.base import BaseLoader

class MyCustomLoader(BaseLoader):
    def lazy_load(self):
        text_splitter = CharacterTextSplitter(
            separator="t", keep_separator=True, chunk_size=12, chunk_overlap=2
        )
        docs = [
            Document(page_content="woof woof", metadata={"source": "doggy.txt"}),
            Document(page_content="woof woof woof", metadata={"source": "doggy.txt"}),
        ]
        yield from text_splitter.split_documents(docs)

    def load(self):
        return list(self.lazy_load())

In [37]:
_clear()

In [38]:
loader = MyCustomLoader()
loader.load()

[Document(page_content='woof woof', metadata={'source': 'doggy.txt'}),
 Document(page_content='woof woof woof', metadata={'source': 'doggy.txt'})]

In [39]:
index(loader, record_manager, vectorstore, cleanup="full", source_id_key="source")

{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [40]:
vectorstore.similarity_search("dog", k=30)

[Document(page_content='woof woof', metadata={'source': 'doggy.txt'}),
 Document(page_content='woof woof woof', metadata={'source': 'doggy.txt'}),
 Document(page_content='hi there'),
 Document(page_content='harrison worked at kensho')]

# ingest

In [1]:
import logging
import os
import re
from parser import langchain_docs_extractor

import weaviate
from bs4 import BeautifulSoup, SoupStrainer
from langchain.document_loaders import RecursiveUrlLoader, SitemapLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import SQLRecordManager, index
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.utils.html import PREFIXES_TO_IGNORE_REGEX, SUFFIXES_TO_IGNORE_REGEX
from langchain.vectorstores import Weaviate

from constants import WEAVIATE_DOCS_INDEX_NAME
import datetime

logger = logging.getLogger(__name__)

WEAVIATE_URL = os.environ["WEAVIATE_URL"]
WEAVIATE_API_KEY = os.environ["WEAVIATE_API_KEY"]
RECORD_MANAGER_DB_URL = os.environ["RECORD_MANAGER_DB_URL"]

  Base = declarative_base()


In [2]:
# https://github.com/langchain-ai/langchain/issues/8494
!pip install nest-asyncio
import nest_asyncio
nest_asyncio.apply()



In [3]:
from recursive_url_loader import RecursiveUrlLoader2

In [44]:
def metadata_extractor(meta: dict, soup: BeautifulSoup) -> dict:                                
    title = soup.find("title")                                
    description = soup.find("meta", attrs={"name": "description"})                                
    html = soup.find("html")                                
    return {                                
        "source": meta["loc"],                                
        "title": title.get_text() if title else "",                                
        "description": description.get("content", "") if description else "",                                
        "language": html.get("lang", "") if html else "",                                
        **meta,                                
    }                                
                                
                                
def load_langchain_docs():                                
    return SitemapLoader(                                
        "https://python.langchain.com/sitemap.xml",                                
        filter_urls=["https://python.langchain.com/"],                                
        parsing_function=langchain_docs_extractor,                                
        default_parser="lxml",                                
        bs_kwargs={                                
            "parse_only": SoupStrainer(                                
                name=("article", "title", "html", "lang", "content")                                
            ),                                
        },                                
        meta_function=metadata_extractor,                                
    ).load()                                
                                
                                
def simple_extractor(html: str) -> str:                                
    soup = BeautifulSoup(html, "lxml")                                
    return re.sub(r"\n\n+", "\n\n", soup.text).strip()

def simple_extractor2(html: str) -> str:                                
    soup = BeautifulSoup(html, "html.parser")                                
    return re.sub(r"\n\n+", "\n\n", soup.get_text(separator="\n")).strip()
                                
                                
def load_api_docs():                                
    return RecursiveUrlLoader(                                
        url="https://api.python.langchain.com/en/latest/api_reference.html",                                
        max_depth=1,                                
        extractor=simple_extractor,                                
        prevent_outside=True,
        use_async=True,
        timeout=600,
        # Drop trailing / to avoid duplicate pages.
        link_regex=(
            f"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)"
            r"(?:[\#'\"]|\/[\#'\"])"
        ),
        check_response_status=True,
        exclude_dirs=(
            "https://api.python.langchain.com/en/latest/_sources",
            "https://api.python.langchain.com/en/latest/_modules",
        ),
    ).load()

def load_wiki_docs():                                
    return RecursiveUrlLoader2(                                
        url="http://wiki.skplanet.com/pages/viewpage.action?pageId=295656385",                                
        max_depth=1,                                
        extractor=simple_extractor,                                
        prevent_outside=True,
        use_async=False,
        timeout=600,
        # Drop trailing / to avoid duplicate pages.
        link_regex=(
            f"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)"
            r"(?:[\#'\"]|\/[\#'\"])"
        ),
        check_response_status=True,
        exclude_dirs=(
            "https://api.python.langchain.com/en/latest/_sources",
            "https://api.python.langchain.com/en/latest/_modules",
        ),
    ).load()

In [45]:
def ingest_docs(docs_from_documentation, docs_from_api):
    # docs_from_documentation = load_langchain_docs()
    # logger.info(f"Loaded {len(docs_from_documentation)} docs from documentation")
    # docs_from_api = load_api_docs()
    # logger.info(f"Loaded {len(docs_from_api)} docs from API")

    print("Start docs transform")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
    docs_transformed = text_splitter.split_documents(
        docs_from_documentation + docs_from_api
    )

    # We try to return 'source' and 'title' metadata when querying vector store and
    # Weaviate will error at query time if one of the attributes is missing from a
    # retrieved document.
    for doc in docs_transformed:
        if "source" not in doc.metadata:
            doc.metadata["source"] = ""
        if "title" not in doc.metadata:
            doc.metadata["title"] = ""

    # client = weaviate.Client(
    #     url=WEAVIATE_URL,
    #     auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
    # )
    # embedding = OpenAIEmbeddings(
    #     chunk_size=200,
    # )  # rate limit
    # vectorstore = Weaviate(
    #     client=client,
    #     index_name=WEAVIATE_DOCS_INDEX_NAME,
    #     text_key="text",
    #     embedding=embedding,
    #     by_text=False,
    #     attributes=["source", "title"],
    # )

    # record_manager = SQLRecordManager(
    #     f"weaviate/{WEAVIATE_DOCS_INDEX_NAME}", db_url=RECORD_MANAGER_DB_URL
    # )
    # record_manager.create_schema()

    print("Start index")
    indexing_stats = index(
        docs_transformed,
        record_manager,
        vectorstore,
        cleanup="full",
        source_id_key="source",
    )

    print("Indexing stats: ", indexing_stats)
    # print(
    #     "LangChain now has this many vectors: ",
    #     client.query.aggregate(WEAVIATE_DOCS_INDEX_NAME).with_meta_count().do(),
    # )

In [None]:
docs = RecursiveUrlLoader2(                                
        url="http://wiki.skplanet.com/pages/viewpage.action?pageId=295656385",                                
        max_depth=1,                                
        extractor=simple_extractor2,                                
        prevent_outside=True,
        use_async=False,
        timeout=600,
        # Drop trailing / to avoid duplicate pages.
        link_regex=(
            f"href=[\"']{PREFIXES_TO_IGNORE_REGEX}((?:{SUFFIXES_TO_IGNORE_REGEX}.)*?)"
            r"(?:[\#'\"]|\/[\#'\"])"
        ),
        check_response_status=True,
        exclude_dirs=(
            "https://api.python.langchain.com/en/latest/_sources",
            "https://api.python.langchain.com/en/latest/_modules",
        ),
    ).load()
print(docs[0].page_content)

In [23]:
docs_from_documentation = load_langchain_docs()
print(f"Loaded {len(docs_from_documentation)} docs from documentation")

Fetching pages: 100%|#################################################################################################################################################################| 1053/1053 [00:23<00:00, 43.95it/s]


Loaded 1053 docs from documentation


In [24]:
docs_from_api = load_api_docs()
print(f"Loaded {len(docs_from_api)} docs from API")

Loaded 1 docs from API


In [25]:
print(docs_from_api[0])



In [27]:
print(docs_from_api[0].page_content)

langchain API Reference — 🦜🔗 LangChain 0.0.339rc1

API

Core

Experimental

Python Docs

Toggle Menu

Prev
Up
Next

LangChain 0.0.339rc1

langchain API Reference
langchain.adapters
Classes
Functions

langchain.agents
Classes
Functions

langchain.agents.format_scratchpad
Functions

langchain.agents.output_parsers
Classes
Functions

langchain.cache
Classes
Functions

langchain.callbacks
Classes
Functions

langchain.chains
Classes
Functions

langchain.chat_loaders
Classes
Functions

langchain.chat_models
Classes
Functions

langchain.docstore
Classes

langchain.document_loaders
Classes
Functions

langchain.document_transformers
Classes
Functions

langchain.embeddings
Classes
Functions

langchain.evaluation
Classes
Functions

langchain.graphs
Classes
Functions

langchain.hub
Functions

langchain.indexes
Classes
Functions

langchain.llms
Classes
Functions

langchain.memory
Classes
Functions

langchain.model_laboratory
Classes

langchain.output_parsers
Classes
Functions

langchain.prompts
Cla

In [18]:
docs_from_wiki = load_wiki_docs()
print(f"Loaded {len(docs_from_wiki)} docs from API")

###load() start
###requests.get start
Loaded 1 docs from API


In [28]:
print(docs_from_wiki[0].page_content)

03. 검색 인프라 - 추천 검색 플랫폼 - Global Site

Skip to main content
assistive.skiplink.to.breadcrumbs
assistive.skiplink.to.header.menu
assistive.skiplink.to.action.menu
assistive.skiplink.to.quick.search

윤태형(TaeHyoung Yun)/커머스플랫폼개발팀/SKP

			                                Personal space
                    

			                                Recently viewed
                    

			                                Recently worked on
                    

			                                User dashboard
                    

			                                Profile
                    

			                                Tasks
                    

			                                Saved for later
                    

			                                Watches
                    

			                                Drafts
                    

			                                Network
                    

			                                Settings
                    

			            

In [121]:
_clear()

In [122]:
now = datetime.datetime.now()
print(now.strftime("%Y-%m-%d %H:%M:%S"))

2023-11-28 15:00:28


In [123]:
docs_selected = [ docs_from_documentaion[0], docs_from_documentaion[1], docs_from_documentaion[2], docs_from_documentaion[3]]

In [124]:
ingest_docs(docs_selected, docs_from_api)

Start docs transform
Start index
Indexing stats:  {'num_added': 20, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}


In [125]:
now = datetime.datetime.now()
print(now.strftime("%Y-%m-%d %H:%M:%S"))

2023-11-28 15:00:34


In [126]:
keys = record_manager.list_keys()
len(keys)

20

In [127]:
docs = vectorstore.similarity_search("initial", k=30)

In [128]:
print(len(docs))
print(docs[0].page_content)

21
Initial document
