diff --git a/notebooks/working-with-vector-data/notebook.ipynb b/notebooks/working-with-vector-data/notebook.ipynb index 1737dd7d..d51b6a9a 100644 --- a/notebooks/working-with-vector-data/notebook.ipynb +++ b/notebooks/working-with-vector-data/notebook.ipynb @@ -31,6 +31,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "782ffea9-fbc0-4942-8a1a-da8788ed2fec", "metadata": {}, @@ -64,6 +65,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "d087092f-696c-4735-9c66-33b8efc885ca", "metadata": {}, @@ -85,6 +87,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9b1cd9d3", "metadata": {}, @@ -99,6 +102,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "e2e322f5-b81d-4249-b512-bd36f88aa168", "metadata": {}, @@ -120,6 +124,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1f2db020-7f76-44d0-9b32-cc81d35979ef", "metadata": {}, @@ -134,6 +139,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3e65bd3b-49b4-48ca-8409-e3da89ebcce4", "metadata": {}, @@ -159,13 +165,23 @@ "execution_count": 5, "id": "aab67ef8-8bd1-4f9e-957a-ac8248984f4f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[array([0.94529617, 0.5391597 , 0.3666218 , 0.27040002, 0.33434793,\n 0.81942284, 0.7387169 , 0.0188459 , 0.07248586, 0.5413058 ],\n dtype=float32),\n array([0.39620587, 0.56646174, 0.09738464, 0.6073699 , 0.86925113,\n 0.40876037, 0.17535466, 0.5120548 , 0.03570552, 0.842908 ],\n dtype=float32),\n array([0.10918448, 0.43081337, 0.03388631, 0.55986017, 0.80183506,\n 0.6763027 , 0.25283858, 0.41930103, 0.7678156 , 0.13405219],\n dtype=float32)]" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vec_f32 = [np.random.rand(10).astype(np.float32) for _ in range(1000)]\n", "vec_f32[:3]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1539013e-6ad8-49cc-aafd-e0aa5c2dbf60", "metadata": {}, @@ -181,13 +197,23 @@ "execution_count": 6, "id": "c72202fa-3a15-42a0-83f2-2650a6d5faa6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[[0.9452961683273315,\n 0.5391597151756287,\n 0.36662179231643677,\n 0.2704000174999237,\n 0.3343479335308075,\n 0.8194228410720825,\n 0.7387169003486633,\n 0.018845897167921066,\n 0.07248586416244507,\n 0.5413057804107666],\n [0.396205872297287,\n 0.5664617419242859,\n 0.09738463908433914,\n 0.6073698997497559,\n 0.8692511320114136,\n 0.4087603688240051,\n 0.17535465955734253,\n 0.5120548009872437,\n 0.03570551797747612,\n 0.8429080247879028],\n [0.10918448120355606,\n 0.43081337213516235,\n 0.03388631343841553,\n 0.5598601698875427,\n 0.8018350601196289,\n 0.6763026714324951,\n 0.2528385818004608,\n 0.41930103302001953,\n 0.7678155899047852,\n 0.13405218720436096]]" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vec_f32_list = [list([float(y) for y in x]) for x in vec_f32]\n", "vec_f32_list[:3]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ebe71955-7358-4c7c-add8-162f5bca098a", "metadata": {}, @@ -199,6 +225,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2860a4f6-bfc6-4bc0-89d8-6c9d765f1240", "metadata": {}, @@ -220,6 +247,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "66e77736-4625-481b-9991-d7e7f28401cb", "metadata": {}, @@ -228,6 +256,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2d1453cd-21d2-4843-a41a-6aa1a33ce0a1", "metadata": {}, @@ -249,6 +278,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a752e82f-bdf9-442e-94eb-9e29459da840", "metadata": {}, @@ -261,12 +291,22 @@ "execution_count": 9, "id": "9fcdb1ce-254b-4420-815e-76cb2199ac05", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[array([0.94529617, 0.5391597 , 0.3666218 , 0.27040002, 0.33434793,\n 0.81942284, 0.7387169 , 0.0188459 , 0.07248586, 0.5413058 ],\n dtype=float32),\n array([0.39620587, 0.56646174, 0.09738464, 0.6073699 , 0.86925113,\n 0.40876037, 0.17535466, 0.5120548 , 0.03570552, 0.842908 ],\n dtype=float32),\n array([0.10918448, 0.43081337, 0.03388631, 0.55986017, 0.80183506,\n 0.6763027 , 0.25283858, 0.41930103, 0.7678156 , 0.13405219],\n dtype=float32)]" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vec_f32[:3]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "df0f98b0-d916-4113-a34c-e0c13cffa242", "metadata": {}, @@ -279,12 +319,22 @@ "execution_count": 10, "id": "b55d0954-9e8c-468b-b1da-019a3adf4fd2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "1000" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cursor.executemany('INSERT INTO vectors(vec_f32) VALUES (%s)', vec_f32)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f929f1ed-2ee1-4209-a27d-121bec2a3a79", "metadata": {}, @@ -298,12 +348,22 @@ "execution_count": 11, "id": "218071ef-0742-460b-b0a4-b079970ae568", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "5" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cursor.execute('SELECT vec_f32 FROM vectors LIMIT 5')" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "22892481-3d71-48aa-abe3-ffd63b309419", "metadata": {}, @@ -316,13 +376,23 @@ "execution_count": 12, "id": "52bfac93-5503-4144-8700-95db21f13897", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[array([0.7970012 , 0.34688511, 0.14492278, 0.73332036, 0.37237272,\n 0.15225586, 0.26400378, 0.1706023 , 0.2066024 , 0.2986435 ],\n dtype=float32),\n array([0.15736586, 0.7116634 , 0.55218774, 0.8527479 , 0.8662606 ,\n 0.21808125, 0.19137949, 0.19946271, 0.31750116, 0.4048979 ],\n dtype=float32),\n array([0.329683 , 0.7214109 , 0.48456433, 0.6002015 , 0.4030805 ,\n 0.32164323, 0.02634622, 0.10913838, 0.13508031, 0.33974582],\n dtype=float32),\n array([0.35889304, 0.37261793, 0.22267127, 0.57628405, 0.10873934,\n 0.66360027, 0.67708856, 0.69097304, 0.8924684 , 0.07560002],\n dtype=float32),\n array([0.46105748, 0.24171682, 0.3059963 , 0.95824414, 0.33805165,\n 0.30686185, 0.89336896, 0.70329565, 0.45199844, 0.6623023 ],\n dtype=float32)]" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_f32 = [np.frombuffer(x[0], dtype=np.float32) for x in cursor]\n", "out_f32" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "390b149f-8039-43ee-ae43-215ea7997a4f", "metadata": {}, @@ -356,7 +426,16 @@ "execution_count": 14, "id": "a0777da4-daba-4b06-8fb6-c7fcc30dcc25", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[b'\\xee\\xfeq?_\\x06\\n?\\xda\\xb5\\xbb>\\xdfq\\x8a>\\xa7/\\xab>\\xb2\\xc5Q?\\x8d\\x1c=?\\xb6b\\x9a\\xa3\\x03\\x11?\\x99q\\xc7=\\x98|\\x1b?>\\x87^?\\nI\\xd1>,\\x903>\\x06\\x16\\x03?\\xf3?\\x12=\\xd2\\xc8W?',\n b'\\x1d\\x9c\\xdf=\\x92\\x93\\xdc>`\\xcc\\n=\\xffR\\x0f?\\x10EM?,\"-?\\x0ft\\x81>\\xa0\\xae\\xd6>\\x90\\x8fD?\\xfaD\\t>']" + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import struct\n", "\n", @@ -368,6 +447,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "77a3b930-33cd-4436-a021-9e99ed94cd9c", "metadata": {}, @@ -380,7 +460,16 @@ "execution_count": 15, "id": "0a1f4d5b-50f1-4987-b8f8-613b2b6f03bd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "1000" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cursor.executemany('INSERT INTO vectors(vec_f32) VALUES (%s)', vec_f32_list_bytes)" ] @@ -390,12 +479,22 @@ "execution_count": 16, "id": "171acbee-c663-4073-843b-a3f83fa0a99a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "5" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "cursor.execute('SELECT vec_f32 FROM vectors LIMIT 5')" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b0b40daa-52a9-4bf8-aefd-4722974cb8f5", "metadata": {}, @@ -408,13 +507,23 @@ "execution_count": 17, "id": "63490736-c68b-49d5-8db1-8ec203c7a583", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[[0.3937252461910248,\n 0.5025281310081482,\n 0.17226243019104004,\n 0.7201003432273865,\n 0.987917423248291,\n 0.36919161677360535,\n 0.03498654067516327,\n 0.7478368878364563,\n 0.34253644943237305,\n 0.33940786123275757],\n [0.761231005191803,\n 0.8932342529296875,\n 0.06776423007249832,\n 0.8769919276237488,\n 0.48779383301734924,\n 0.9544709920883179,\n 0.8270399570465088,\n 0.9150049686431885,\n 0.8350704908370972,\n 0.9739500880241394],\n [0.9656015634536743,\n 0.4987963140010834,\n 0.6006644368171692,\n 0.000701306969858706,\n 0.5339081287384033,\n 0.22828376293182373,\n 0.3365790545940399,\n 0.2838159203529358,\n 0.3415278196334839,\n 0.7082713842391968]]" + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_f32_list = [list(struct.unpack(fmt, x[0])) for x in cursor]\n", "out_f32_list[:3]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "91927fbd-f19c-448a-926a-d4ee8dc3e607", "metadata": {}, @@ -442,6 +551,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6ca1960a-a55e-465c-a4f2-3daeb56e2739", "metadata": {}, @@ -451,6 +561,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "473114ce-4b51-484d-90d9-eaafce4d4b58", "metadata": {}, @@ -469,6 +580,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "86eff78c-4b8f-40d1-bc9f-978fd39dada6", "metadata": {}, @@ -482,13 +594,23 @@ "execution_count": 20, "id": "03905527-9239-4fd7-9a9b-4c35da0b7447", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query = sa.text('INSERT INTO vectors(vec_f32) VALUES (:vec_f32)')\n", "conn.execute(query, [dict(vec_f32=x) for x in vec_f32])" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f95fb2be-e513-4555-b580-118f337e0f19", "metadata": {}, @@ -507,6 +629,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "f7bc12cf-6ce6-4c20-8fa4-e83c2cb49e71", "metadata": {}, @@ -519,13 +642,23 @@ "execution_count": 22, "id": "3391ee73-86c5-4913-b412-bf4d12fb9b68", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[array([0.06707381, 0.3336899 , 0.23638362, 0.54146034, 0.21330866,\n 0.57814604, 0.7436944 , 0.21778256, 0.32921487, 0.18143076],\n dtype=float32),\n array([0.17625922, 0.6122456 , 0.65093136, 0.680956 , 0.99456173,\n 0.785619 , 0.8397423 , 0.34446132, 0.9549833 , 0.53008443],\n dtype=float32),\n array([0.12105445, 0.27007556, 0.33191404, 0.35239697, 0.104354 ,\n 0.560923 , 0.95614606, 0.6793355 , 0.12789273, 0.01870769],\n dtype=float32),\n array([0.05535996, 0.13312466, 0.9434161 , 0.52270526, 0.24034844,\n 0.73964477, 0.8723515 , 0.02157358, 0.14537902, 0.8052284 ],\n dtype=float32),\n array([0.17335513, 0.87024 , 0.11818643, 0.40915504, 0.65390265,\n 0.519701 , 0.1028851 , 0.8442223 , 0.64491796, 0.31468135],\n dtype=float32)]" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_f32 = [np.frombuffer(x[0], dtype=np.float32) for x in result]\n", "out_f32" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "893ff3b8-5f16-4736-b157-52eec72a6fea", "metadata": {}, @@ -548,6 +681,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3a09006f-6c12-40fd-b3d7-cd3b4c33f040", "metadata": {}, @@ -561,7 +695,16 @@ "execution_count": 24, "id": "465ffe10-cc32-400c-adec-f4e91f25fb98", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[b'\\xee\\xfeq?_\\x06\\n?\\xda\\xb5\\xbb>\\xdfq\\x8a>\\xa7/\\xab>\\xb2\\xc5Q?\\x8d\\x1c=?\\xb6b\\x9a\\xa3\\x03\\x11?\\x99q\\xc7=\\x98|\\x1b?>\\x87^?\\nI\\xd1>,\\x903>\\x06\\x16\\x03?\\xf3?\\x12=\\xd2\\xc8W?',\n b'\\x1d\\x9c\\xdf=\\x92\\x93\\xdc>`\\xcc\\n=\\xffR\\x0f?\\x10EM?,\"-?\\x0ft\\x81>\\xa0\\xae\\xd6>\\x90\\x8fD?\\xfaD\\t>']" + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vec_f32_list_bytes[:3]" ] @@ -571,7 +714,16 @@ "execution_count": 25, "id": "97b2069f-2cd2-4af5-95cc-87637d1fc838", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "query = sa.text('INSERT INTO vectors(vec_f32) VALUES (:vec_f32)')\n", "conn.execute(query, [dict(vec_f32=x) for x in vec_f32_list_bytes])" @@ -588,6 +740,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8fa7bd8e-8842-438f-a336-e93ecc321820", "metadata": {}, @@ -600,13 +753,23 @@ "execution_count": 27, "id": "78b0619f-a057-4edb-a230-1e96c5b0b2e7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[[0.6067176461219788,\n 0.2106485515832901,\n 0.6345869302749634,\n 0.6352039575576782,\n 0.829525887966156,\n 0.2823314368724823,\n 0.017225714400410652,\n 0.22034095227718353,\n 0.24362443387508392,\n 0.7712428569793701],\n [0.26649677753448486,\n 0.6021978259086609,\n 0.8979067206382751,\n 0.9429398775100708,\n 0.589701771736145,\n 0.24339258670806885,\n 0.3752290904521942,\n 0.34352484345436096,\n 0.647399365901947,\n 0.19694264233112335],\n [0.6997039914131165,\n 0.08066725730895996,\n 0.19695895910263062,\n 0.08963707834482193,\n 0.3289657235145569,\n 0.8245747089385986,\n 0.782729983329773,\n 0.43013912439346313,\n 0.765410304069519,\n 0.8552709817886353]]" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_f32_list = [list(struct.unpack(fmt, x[0])) for x in result]\n", "out_f32_list[:3]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2b2db64d-8e96-4f59-b91a-3731ee934287", "metadata": {}, @@ -639,6 +802,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "91b876a8-da7d-48b5-89a9-8149fab91566", "metadata": {}, @@ -647,6 +811,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "fdf50e43-68a2-4cfb-a6a0-215d442f27c8", "metadata": {}, @@ -659,13 +824,24 @@ "execution_count": 30, "id": "7f871623-9176-4865-97f4-5e89cf7c3a70", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.94529617, 0.5391597, 0.3666218, 0.27040002,...
1[0.39620587, 0.56646174, 0.09738464, 0.6073699...
2[0.10918448, 0.43081337, 0.033886313, 0.559860...
3[0.027094776, 0.03226529, 0.49422556, 0.171387...
4[0.65606296, 0.022113293, 0.57438064, 0.867151...
\n
", + "text/plain": " vec_f32\n0 [0.94529617, 0.5391597, 0.3666218, 0.27040002,...\n1 [0.39620587, 0.56646174, 0.09738464, 0.6073699...\n2 [0.10918448, 0.43081337, 0.033886313, 0.559860...\n3 [0.027094776, 0.03226529, 0.49422556, 0.171387...\n4 [0.65606296, 0.022113293, 0.57438064, 0.867151..." + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(dict(vec_f32=pd.Series(vec_f32)))\n", "df.head()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "c37150fa-e5f1-49d5-b13b-e26e9e88ed92", "metadata": {}, @@ -679,12 +855,22 @@ "execution_count": 31, "id": "1a853637-f29e-434a-9dd4-d2fb92bc4597", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "1000" + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.to_sql('vectors', con=conn, if_exists='append', index=False)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "67fdc9d4-9d48-4af9-a4f9-b643a43992b9", "metadata": {}, @@ -698,13 +884,24 @@ "execution_count": 32, "id": "a75c5726-0ee7-4876-aac7-e71dc9752eae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0b\"\\xc4.\\x81<\\x8a\\x0fW?\\x11~\\xcd>_\\x82@?Vq\\x05?...
1b'rp\\xdc>U\\xd7\\x89>\\xe6BC?\\xe7\\xcd\\xfb>P\\xe4\\x...
2b\"\\xaf\\x10,?\\xc9\\x8c\\\\?\\xa3\\xccQ>c\\xd0'?\\xe2y\\...
\n
", + "text/plain": " vec_f32\n0 b\"\\xc4.\\x81<\\x8a\\x0fW?\\x11~\\xcd>_\\x82@?Vq\\x05?...\n1 b'rp\\xdc>U\\xd7\\x89>\\xe6BC?\\xe7\\xcd\\xfb>P\\xe4\\x...\n2 b\"\\xaf\\x10,?\\xc9\\x8c\\\\?\\xa3\\xccQ>c\\xd0'?\\xe2y\\..." + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_df = pd.read_sql('vectors', con=conn)\n", "out_df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9d774b5f-88f9-45b3-a54d-229020aa16af", "metadata": {}, @@ -727,12 +924,23 @@ "execution_count": 34, "id": "c4e77d6b-a93c-47d2-89ce-b1c502950c71", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.01576937, 0.84008086, 0.40135244, 0.7519893...
1[0.4305454, 0.26922098, 0.76273954, 0.4918053,...
2[0.6721296, 0.8615232, 0.20488219, 0.6555235, ...
\n
", + "text/plain": " vec_f32\n0 [0.01576937, 0.84008086, 0.40135244, 0.7519893...\n1 [0.4305454, 0.26922098, 0.76273954, 0.4918053,...\n2 [0.6721296, 0.8615232, 0.20488219, 0.6555235, ..." + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "71b184dd-641c-4ef0-91cf-c581143d3945", "metadata": {}, @@ -754,6 +962,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6b0fa295-99e9-4846-9996-a704df463a36", "metadata": {}, @@ -766,13 +975,24 @@ "execution_count": 36, "id": "1ed1b6c2-3c79-42b9-a671-41b2828c4c31", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.9452961683273315, 0.5391597151756287, 0.366...
1[0.396205872297287, 0.5664617419242859, 0.0973...
2[0.10918448120355606, 0.43081337213516235, 0.0...
\n
", + "text/plain": " vec_f32\n0 [0.9452961683273315, 0.5391597151756287, 0.366...\n1 [0.396205872297287, 0.5664617419242859, 0.0973...\n2 [0.10918448120355606, 0.43081337213516235, 0.0..." + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df = pd.DataFrame(dict(vec_f32=vec_f32_list))\n", "df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "94263962-9ec2-4e34-a08e-1e2ad41247dd", "metadata": {}, @@ -786,7 +1006,16 @@ "execution_count": 37, "id": "3cb1b6e1-a732-4a2f-a751-095d6727e6ae", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "'<10f'" + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fmt" ] @@ -806,12 +1035,22 @@ "execution_count": 39, "id": "af739340-e5fd-482a-96c8-5eedf8202f1c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "0 b'\\xee\\xfeq?_\\x06\\n?\\xda\\xb5\\xbb>\\xdfq\\x8a>\\xa...\n1 b'\\x7f\\xdb\\xca>\\xa3\\x03\\x11?\\x99q\\xc7=\\x98|\\x1...\n2 b'\\x1d\\x9c\\xdf=\\x92\\x93\\xdc>`\\xcc\\n=\\xffR\\x0f?...\nName: vec_f32, dtype: object" + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['vec_f32'].head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6f2d8675-c1ee-44d2-ac17-eef1c543d71c", "metadata": {}, @@ -824,7 +1063,16 @@ "execution_count": 40, "id": "49dde7bd-9823-4c55-8f34-4e16643e6b8e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "1000" + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.to_sql('vectors', con=conn, if_exists='append', index=False)" ] @@ -834,13 +1082,24 @@ "execution_count": 41, "id": "137a7f8e-d713-4179-bcad-66f194d1f839", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0b'>kS?;\\xec\\x8b>0\\xe5\\x19?\\r\\xddF?\\xf2\\x044?u\\...
1b'\\xedJ[=\\xbfq\\xce=G#\\xa4<\\xcbV\\xe3=\\xeb:;?\\xa...
2b'\\x0e\\x08n>\\xe8\\xb2\\x98>\\x10\\x133>\\xd4\\xf7\\x1...
\n
", + "text/plain": " vec_f32\n0 b'>kS?;\\xec\\x8b>0\\xe5\\x19?\\r\\xddF?\\xf2\\x044?u\\...\n1 b'\\xedJ[=\\xbfq\\xce=G#\\xa4<\\xcbV\\xe3=\\xeb:;?\\xa...\n2 b'\\x0e\\x08n>\\xe8\\xb2\\x98>\\x10\\x133>\\xd4\\xf7\\x1..." + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_df = pd.read_sql('vectors', con=conn)\n", "out_df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "99233fdb-57b2-4290-9038-7c3e5eaf553e", "metadata": {}, @@ -863,12 +1122,23 @@ "execution_count": 43, "id": "2924f8b8-f543-4a2f-90c8-8e6e5c15275d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.8258551359176636, 0.2732866704463959, 0.601...
1[0.0535382516682148, 0.10080289095640182, 0.02...
2[0.2324526011943817, 0.29823994636535645, 0.17...
\n
", + "text/plain": " vec_f32\n0 [0.8258551359176636, 0.2732866704463959, 0.601...\n1 [0.0535382516682148, 0.10080289095640182, 0.02...\n2 [0.2324526011943817, 0.29823994636535645, 0.17..." + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8f070295-78e3-4137-82d6-8be8c64b3898", "metadata": {}, @@ -892,6 +1162,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3ac2349f-d2bd-452d-9e4f-d869ef0e774f", "metadata": {}, @@ -909,7 +1180,16 @@ "execution_count": 45, "id": "f6781046-e636-4495-8a99-e035db8988aa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "'eefe713f5f060a3fdab5bb3edf718a3ea72fab3eb2c5513f8d1c3d3fb6629a3c7873943d04930a3f'" + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Convert an element of the numpy array to a hex string\n", "vec_f32[0].tobytes().hex()" @@ -920,13 +1200,23 @@ "execution_count": 46, "id": "957f98e1-c3d5-4e7c-b43a-5583cdff045e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "'eefe713f5f060a3fdab5bb3edf718a3ea72fab3eb2c5513f8d1c3d3fb6629a3c7873943d04930a3f'" + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Convert an element of the Python list to a hex string\n", "struct.pack(fmt, *vec_f32_list[0]).hex()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5424355e-fffb-4cc7-b0c3-eba7012d1bd1", "metadata": {}, @@ -942,7 +1232,16 @@ "execution_count": 47, "id": "e269be32-5b56-4e19-baed-6420d6fd4bfb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[\"(X'eefe713f5f060a3fdab5bb3edf718a3ea72fab3eb2c5513f8d1c3d3fb6629a3c7873943d04930a3f')\",\n \"(X'7fdbca3ea303113f9971c73d987c1b3f3e875e3f0a49d13e2c90333e0616033ff33f123dd2c8573f')\",\n \"(X'1d9cdf3d9293dc3e60cc0a3dff520f3f10454d3f2c222d3f0f74813ea0aed63e908f443ffa44093e')\"]" + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "params = [\"(X'{}')\".format(x.tobytes().hex()) for x in vec_f32]\n", "params[:3]" @@ -953,13 +1252,24 @@ "execution_count": 48, "id": "bd51d277-eec1-4787-b9b9-7a943f3eea0c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "\n \n \n \n \n \n \n
", + "text/plain": "++\n||\n++\n++" + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%sql\n", "INSERT INTO vectors(vec_f32) VALUES {{ ','.join(params) }}" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5b982cc3-5e8a-460c-beff-440dbae58144", "metadata": {}, @@ -983,12 +1293,23 @@ "execution_count": 50, "id": "b10b2f71-02fd-4630-8ae0-7845a8385934", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
b'\\xfe\\xd7@?\\xecr\\xec>\\xbdW1?\\xbb_\\xcb>\\x88\\xb9\\xf4>\\x04G\\xaa>#d\\xf3=\\x07\\xb5\\xcb>\\xcd\\xd7&?{Es?'
b'\\x0e\\x08n>\\xe8\\xb2\\x98>\\x10\\x133>\\xd4\\xf7\\x1b?Q$-?t\\x11\\xfa>,}S?\\xa8\\x14k;\\x1a\\xf8h>\\xf8\\xbc-?'
b'1~\\x01>\\xb7>/?\\xd3\\x10\\x1f?Z\\xcc\\x05=>X\\xa8>\\x01\\r\\x1b>Q\\xf1\\xc3>/-\\xab=\\xea\\x9c6?\\xbc\\xd6|?'
b'\\xe3\\x02\\xba=zz)?<\\xa33?\\x15\\x03\\x14>\\x99\\x97\\x19?(\\x13#?!\\xe9\\xe9>\\xba#i?\\xdc\\xe1y?\\xe1\\xc1)?'
b'\\x8c\\x86\\x12?\\xc3+h?\\xba?=<\\xa45\\xda>\\xb5\\xf2\\r?\\xdb\\xa8\\x08?]\\x84&?\\xf2\\xd1s?\\xec\\xce\\xab>\\x10\\x19k?'
", + "text/plain": "+----------------------------------------------------------------------------------------------------------+\n| vec_f32 |\n+----------------------------------------------------------------------------------------------------------+\n| b'\\xfe\\xd7@?\\xecr\\xec>\\xbdW1?\\xbb_\\xcb>\\x88\\xb9\\xf4>\\x04G\\xaa>#d\\xf3=\\x07\\xb5\\xcb>\\xcd\\xd7&?{Es?' |\n| b'\\x0e\\x08n>\\xe8\\xb2\\x98>\\x10\\x133>\\xd4\\xf7\\x1b?Q$-?t\\x11\\xfa>,}S?\\xa8\\x14k;\\x1a\\xf8h>\\xf8\\xbc-?' |\n| b'1~\\x01>\\xb7>/?\\xd3\\x10\\x1f?Z\\xcc\\x05=>X\\xa8>\\x01\\r\\x1b>Q\\xf1\\xc3>/-\\xab=\\xea\\x9c6?\\xbc\\xd6|?' |\n| b'\\xe3\\x02\\xba=zz)?<\\xa33?\\x15\\x03\\x14>\\x99\\x97\\x19?(\\x13#?!\\xe9\\xe9>\\xba#i?\\xdc\\xe1y?\\xe1\\xc1)?' |\n| b'\\x8c\\x86\\x12?\\xc3+h?\\xba?=<\\xa45\\xda>\\xb5\\xf2\\r?\\xdb\\xa8\\x08?]\\x84&?\\xf2\\xd1s?\\xec\\xce\\xab>\\x10\\x19k?' |\n+----------------------------------------------------------------------------------------------------------+" + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3178ea48-2bfd-44d2-8211-a291dd5bf5ba", "metadata": {}, @@ -1022,12 +1343,23 @@ "execution_count": 53, "id": "821ac65c-b8d5-47f5-8b14-945ed8e8d1fa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.7532958, 0.46181428, 0.69274503, 0.39721474...
1[0.2324526, 0.29823995, 0.1748774, 0.6092503, ...
2[0.12645794, 0.6845507, 0.62135047, 0.03266558...
\n
", + "text/plain": " vec_f32\n0 [0.7532958, 0.46181428, 0.69274503, 0.39721474...\n1 [0.2324526, 0.29823995, 0.1748774, 0.6092503, ...\n2 [0.12645794, 0.6845507, 0.62135047, 0.03266558..." + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out_df.head(3)" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ad5c2b96-0002-4948-87a4-949a68c3e0a2", "metadata": {}, @@ -1055,7 +1387,16 @@ "execution_count": 55, "id": "79f06760-9039-408a-a4c2-6331947dd3e4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "['(JSON_ARRAY_PACK(\"[0.9452961683273315, 0.5391597151756287, 0.36662179231643677, 0.2704000174999237, 0.3343479335308075, 0.8194228410720825, 0.7387169003486633, 0.018845897167921066, 0.07248586416244507, 0.5413057804107666]\"))',\n '(JSON_ARRAY_PACK(\"[0.396205872297287, 0.5664617419242859, 0.09738463908433914, 0.6073698997497559, 0.8692511320114136, 0.4087603688240051, 0.17535465955734253, 0.5120548009872437, 0.03570551797747612, 0.8429080247879028]\"))',\n '(JSON_ARRAY_PACK(\"[0.10918448120355606, 0.43081337213516235, 0.03388631343841553, 0.5598601698875427, 0.8018350601196289, 0.6763026714324951, 0.2528385818004608, 0.41930103302001953, 0.7678155899047852, 0.13405218720436096]\"))']" + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "params = ['(JSON_ARRAY_PACK(\"{}\"))'.format(json.dumps(x)) for x in vec_f32_list]\n", "params[:3]" @@ -1066,13 +1407,24 @@ "execution_count": 56, "id": "92217c8d-f374-49a6-8fb8-f21666681f95", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "\n \n \n \n \n \n \n
", + "text/plain": "++\n||\n++\n++" + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%sql\n", "INSERT INTO vectors(vec_f32) VALUES {{ ','.join(params) }}" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "bd323c0a-7de4-4c56-9fc4-f2a22f4f661c", "metadata": {}, @@ -1096,7 +1448,17 @@ "execution_count": 58, "id": "9f9e9b6e-5a25-483b-9d40-aea95a302b5f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
vec_f32
0[0.177021876, 0.717939079, 0.825487614, 0.7764...
1[0.749338746, 0.589595854, 0.704872251, 0.9270...
2[0.971682549, 0.574555218, 0.174982488, 0.4692...
3[0.814588428, 0.773147047, 0.970053494, 0.9038...
4[0.247024894, 0.828292727, 0.599695325, 0.4499...
\n
", + "text/plain": " vec_f32\n0 [0.177021876, 0.717939079, 0.825487614, 0.7764...\n1 [0.749338746, 0.589595854, 0.704872251, 0.9270...\n2 [0.971682549, 0.574555218, 0.174982488, 0.4692...\n3 [0.814588428, 0.773147047, 0.970053494, 0.9038...\n4 [0.247024894, 0.828292727, 0.599695325, 0.4499..." + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out = pd.DataFrame(out)\n", "out" @@ -1107,12 +1469,22 @@ "execution_count": 59, "id": "bbfe895a-0f09-4094-a835-793329ee388e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "[0.177021876,\n 0.717939079,\n 0.825487614,\n 0.77646929,\n 0.137723535,\n 0.358667195,\n 0.41495508,\n 0.027805429,\n 0.291372836,\n 0.413403481]" + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "out['vec_f32'][0]" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6bdb300a-a8f6-40cc-a5f9-de54508bb22b", "metadata": {}, @@ -1126,12 +1498,22 @@ "execution_count": 60, "id": "b5234146-b058-4462-b5d0-516ae699efc6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "list" + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "type(out['vec_f32'][0])" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8a8cfd32-c903-4e9b-b27b-253fcbca6ad4", "metadata": {}, @@ -1144,6 +1526,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "42060943", "metadata": {}, @@ -1195,7 +1578,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/resources/nb-check.py b/resources/nb-check.py index dc971943..0c8a5f8e 100755 --- a/resources/nb-check.py +++ b/resources/nb-check.py @@ -145,13 +145,10 @@ def new_markdown_cell(cell_id: str, content: list[str]) -> dict[str, Any]: cells = nb.get('cells', []) - # Remove metadata and outputs + # Remove metadata for i, cell in enumerate(cells): if 'metadata' in cell: cell['metadata'] = {} - # TODO: do not remove outputs once helios has migrated to published zips - if 'outputs' in cell: - cell['outputs'] = [] # Remove empty cells at the end of the notebook end = len(cells) - 1 @@ -253,6 +250,11 @@ def new_markdown_cell(cell_id: str, content: list[str]) -> dict[str, Any]: cell['execution_count'] = code_idx code_idx += 1 + if 'outputs' in cell and len(cell['outputs']) > 0: + for output in cell['outputs']: + if 'execution_count' in output: + output['execution_count'] = code_idx + with open(f, 'w') as outfile: outfile.write(json.dumps(nb, indent=2)) outfile.write('\n')