In [1]:
import sqlite3
import json

In [2]:
import numpy as np

import os
os.environ["TOKENIZERS_PARALLELISM"] = "True"

from datetime import datetime



In [3]:
smoldb = sqlite3.connect("smol.db")

print(datetime.now())

ids = np.array([id for (id,) in smoldb.execute("select id from page_title_embeddings order by id")])

print(datetime.now())

titles = [title for (title,) in smoldb.execute("select title from page_title_embeddings order by id")]

print(datetime.now())

embeddings = np.array([
    np.array(json.loads(e), dtype=np.float32) for (e,) in smoldb.execute("select embedding from page_title_embeddings order by id")
], dtype=np.float32)

print(datetime.now())

2023-04-18 22:10:28.798750
2023-04-18 22:10:31.738520
2023-04-18 22:10:34.945773
2023-04-18 22:14:00.632824


In [4]:
print(datetime.now())

print(np.sum([
    len(e) for (e,) in smoldb.execute("select embedding from page_title_embeddings")
]))

print(datetime.now())

#print(np.average(
#    [np.array(json.loads(e)).sum() for (e,) in smoldb.execute("select embedding from page_title_embeddings")]
#))

print(datetime.now())

2023-04-18 22:14:00.635745
18672184788
2023-04-18 22:14:05.639236
2023-04-18 22:14:05.639260


In [5]:
from multiprocessing import Pool

def npjsonloads(s, d=np.float32):
    return np.array(json.loads(s[0]), dtype=d)
    

In [6]:
print(datetime.now())

#with Pool(32) as p:
#    print(np.average(
#        p.map(npjsonloads, smoldb.execute("select embedding from page_title_embeddings"))
#    ))

print(datetime.now())


2023-04-18 22:14:05.693432
2023-04-18 22:14:05.693461


In [7]:
nptitles = np.array(titles)

nptitles[0:5]

array(['Anarchism', 'Albedo', 'A', 'Alabama', 'Achilles'], dtype='<U251')

In [8]:
from sentence_transformers import SentenceTransformer

minilml6v2 = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

onomato_query = minilml6v2.encode("word that sounds like what it represents, like meow, oink, woof")
cheeses_query = minilml6v2.encode("cheeses")

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
cheeses_query

array([-6.28128201e-02, -1.18328584e-02, -4.85378765e-02,  7.56069869e-02,
       -6.29612729e-02,  5.24101360e-03,  8.88406560e-02,  3.82924452e-02,
        1.63012110e-02, -2.92984396e-02,  3.20276879e-02, -3.97614948e-02,
       -2.54703797e-02, -8.77302373e-04, -7.12979510e-02, -3.26210000e-02,
        7.33206421e-03,  7.21905679e-02, -9.11571644e-03, -4.77604903e-02,
       -4.73998226e-02, -2.30509676e-02, -2.15109996e-02,  3.86618674e-02,
       -1.05217239e-02,  6.76695630e-02,  1.06834043e-02, -8.60001333e-03,
        3.92565504e-04, -5.64423129e-02, -2.41308101e-02, -4.65472043e-02,
       -1.24787949e-02, -4.98961918e-02, -2.42104828e-02, -7.50152569e-04,
        9.32364836e-02,  2.36256863e-03,  5.46098687e-02, -3.35524371e-03,
       -1.13846706e-02, -2.73707714e-02,  2.27206759e-02,  7.84932077e-02,
       -5.59639558e-02,  2.68044788e-03, -4.89668660e-02,  1.22303493e-01,
        5.38281687e-02, -9.03165713e-03, -5.56312799e-02, -4.10511345e-02,
       -9.82284639e-03, -

In [10]:
tinydocs = np.array([[1,0,0],[0,1,0],[0,0,1],[1,0,1]], dtype=np.float32)
tinyquery = np.array([0.1, 0.9, 0.1], dtype=np.float32)

tinynorm = np.linalg.norm(tinydocs - tinyquery, axis=1)

In [11]:
tinynorm

array([1.2767144 , 0.17320509, 1.2767144 , 1.5588456 ], dtype=float32)

In [12]:
np.sqrt(np.sum(np.power(tinydocs - tinyquery, 2), axis=1))

array([1.2767144 , 0.17320509, 1.2767144 , 1.5588456 ], dtype=float32)

In [13]:
np.argsort(tinynorm)

array([1, 0, 2, 3])

In [14]:
np.argpartition(tinynorm, 0)

array([1, 0, 2, 3])

In [15]:
np.dot(tinydocs, tinyquery)

array([0.1, 0.9, 0.1, 0.2], dtype=float32)

In [16]:
print(datetime.now())
print(nptitles[np.argsort(np.linalg.norm(embeddings - cheeses_query, axis=1))[0:10]])
print(datetime.now())

2023-04-18 22:14:08.823757
['Bergenost' 'Cheese' 'Types of cheese' 'List of blue cheeses'
 'Cheese (disambiguation)' 'Bergkäse' 'Cheesemaking' 'Ibores cheese'
 'List of Swiss cheeses' 'Cheese dream']
2023-04-18 22:14:13.401357


In [17]:
embout = np.zeros_like(embeddings)
normout = np.zeros_like(np.sum(embeddings, axis=1))
sortout = np.zeros_like(normout)

In [18]:

print(datetime.now())

np.subtract(embeddings, cheeses_query, out=embout)

print(datetime.now())

np.power(embout, 2, out=embout)

print(datetime.now())

np.sum(embout, axis=1, out=normout)

print(datetime.now())

sortout = np.argsort(normout)

print(datetime.now())

nptitles[sortout[0:10]]


2023-04-18 22:14:14.889315
2023-04-18 22:14:15.536349
2023-04-18 22:14:35.599805
2023-04-18 22:14:36.109989
2023-04-18 22:14:36.756595


array(['Bergenost', 'Cheese', 'Types of cheese', 'List of blue cheeses',
       'Cheese (disambiguation)', 'Bergkäse', 'Cheesemaking',
       'Ibores cheese', 'List of Swiss cheeses', 'Cheese dream'],
      dtype='<U251')

In [19]:
print(datetime.now())

np.dot(embeddings, cheeses_query, out=normout)

print(datetime.now())

idx = np.argpartition(normout, -10)[-10:]

print(datetime.now())

idx = idx[np.argsort(normout[idx])][::-1]

print(datetime.now())

nptitles[idx]

2023-04-18 22:14:36.760139
2023-04-18 22:14:36.811543
2023-04-18 22:14:36.853148
2023-04-18 22:14:36.853224


array(['Bergenost', 'List of blue cheeses', 'Cheeseboard', 'Cheese straw',
       'Chevington cheese', 'Lavaş cheese', 'Grevé',
       'Swiss cheese (mathematics)', 'List of sheep milk cheeses',
       'Cheese: A Global History'], dtype='<U251')

In [20]:
embeddings.max()

0.3436

In [21]:
embeddings.shape

(6593739, 384)

In [22]:
! pip install nanopq

import nanopq


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [23]:
pq = nanopq.PQ(M=64)

M: 64, Ks: 256, code_dtype: <class 'numpy.uint8'>


In [24]:
print(datetime.now())
pq.fit(embeddings.astype(np.float32))
print(datetime.now())

2023-04-18 22:14:38.545004
iter: 20, seed: 123
Training the subspace: 0 / 64
Training the subspace: 1 / 64
Training the subspace: 2 / 64
Training the subspace: 3 / 64
Training the subspace: 4 / 64
Training the subspace: 5 / 64
Training the subspace: 6 / 64
Training the subspace: 7 / 64
Training the subspace: 8 / 64
Training the subspace: 9 / 64
Training the subspace: 10 / 64
Training the subspace: 11 / 64
Training the subspace: 12 / 64
Training the subspace: 13 / 64
Training the subspace: 14 / 64
Training the subspace: 15 / 64
Training the subspace: 16 / 64
Training the subspace: 17 / 64
Training the subspace: 18 / 64
Training the subspace: 19 / 64
Training the subspace: 20 / 64
Training the subspace: 21 / 64
Training the subspace: 22 / 64
Training the subspace: 23 / 64
Training the subspace: 24 / 64
Training the subspace: 25 / 64
Training the subspace: 26 / 64
Training the subspace: 27 / 64
Training the subspace: 28 / 64
Training the subspace: 29 / 64
Training the subspace: 30 / 64
Tr

In [25]:
print(datetime.now())
embedding_codes = pq.encode(embeddings)
print(datetime.now())

2023-04-20 10:07:17.686609
Encoding the subspace: 0 / 64
Encoding the subspace: 1 / 64
Encoding the subspace: 2 / 64
Encoding the subspace: 3 / 64
Encoding the subspace: 4 / 64
Encoding the subspace: 5 / 64
Encoding the subspace: 6 / 64
Encoding the subspace: 7 / 64
Encoding the subspace: 8 / 64
Encoding the subspace: 9 / 64
Encoding the subspace: 10 / 64
Encoding the subspace: 11 / 64
Encoding the subspace: 12 / 64
Encoding the subspace: 13 / 64
Encoding the subspace: 14 / 64
Encoding the subspace: 15 / 64
Encoding the subspace: 16 / 64
Encoding the subspace: 17 / 64
Encoding the subspace: 18 / 64
Encoding the subspace: 19 / 64
Encoding the subspace: 20 / 64
Encoding the subspace: 21 / 64
Encoding the subspace: 22 / 64
Encoding the subspace: 23 / 64
Encoding the subspace: 24 / 64
Encoding the subspace: 25 / 64
Encoding the subspace: 26 / 64
Encoding the subspace: 27 / 64
Encoding the subspace: 28 / 64
Encoding the subspace: 29 / 64
Encoding the subspace: 30 / 64
Encoding the subspace:

In [27]:
print(datetime.now())
cheeses_dists = pq.dtable(cheeses_query).adist(embedding_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses_dists)[0:10]])
print(datetime.now())

2023-04-20 10:17:55.755425
2023-04-20 10:17:57.623818
['Bergenost' 'Cheese' 'Cheesemaking' 'List of blue cheeses'
 'Types of cheese' 'Cheese (disambiguation)' 'Caboc' 'Swiss-type cheeses'
 'Egyptian cheese' 'Cheese ripening']
2023-04-20 10:17:58.277428


In [29]:
print(datetime.now())
pq16 = nanopq.PQ(M=16)
print(datetime.now())
pq16.fit(embeddings)
print(datetime.now())
embedding16_codes = pq16.encode(embeddings)
print(datetime.now())
cheeses16_dists = pq16.dtable(cheeses_query).adist(embedding16_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses16_dists)[0:10]])
print(datetime.now())

2023-04-20 10:25:09.340511
M: 16, Ks: 256, code_dtype: <class 'numpy.uint8'>
2023-04-20 10:25:09.341506
iter: 20, seed: 123
Training the subspace: 0 / 16
Training the subspace: 1 / 16
Training the subspace: 2 / 16
Training the subspace: 3 / 16
Training the subspace: 4 / 16
Training the subspace: 5 / 16
Training the subspace: 6 / 16
Training the subspace: 7 / 16
Training the subspace: 8 / 16
Training the subspace: 9 / 16
Training the subspace: 10 / 16
Training the subspace: 11 / 16
Training the subspace: 12 / 16
Training the subspace: 13 / 16
Training the subspace: 14 / 16
Training the subspace: 15 / 16
2023-04-20 11:20:03.972503
Encoding the subspace: 0 / 16
Encoding the subspace: 1 / 16
Encoding the subspace: 2 / 16
Encoding the subspace: 3 / 16
Encoding the subspace: 4 / 16
Encoding the subspace: 5 / 16
Encoding the subspace: 6 / 16
Encoding the subspace: 7 / 16
Encoding the subspace: 8 / 16
Encoding the subspace: 9 / 16
Encoding the subspace: 10 / 16
Encoding the subspace: 11 / 16
E

In [30]:
print(datetime.now())
pq4 = nanopq.PQ(M=4)
print(datetime.now())
pq4.fit(embeddings)
print(datetime.now())
embedding4_codes = pq4.encode(embeddings)
print(datetime.now())
cheeses4_dists = pq4.dtable(cheeses_query).adist(embedding4_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses4_dists)[0:10]])
print(datetime.now())

2023-04-20 11:24:12.463072
M: 4, Ks: 256, code_dtype: <class 'numpy.uint8'>
2023-04-20 11:24:12.464711
iter: 20, seed: 123
Training the subspace: 0 / 4
Training the subspace: 1 / 4
Training the subspace: 2 / 4
Training the subspace: 3 / 4
2023-04-20 11:41:03.473109
Encoding the subspace: 0 / 4
Encoding the subspace: 1 / 4
Encoding the subspace: 2 / 4
Encoding the subspace: 3 / 4
2023-04-20 11:41:50.926172
2023-04-20 11:41:51.117570
['Nutshell' 'Finishing (whisky)' 'Omelette de la mère Poulard' 'Posset'
 'Sugar nips' 'Michel Lotito' "Dragon's Breath (dessert)"
 'The English and Australian Cookery Book' 'Marauding Scot'
 "Puits d'amour"]
2023-04-20 11:41:51.606678


In [32]:
nanopq.PQ??

In [33]:
print(datetime.now())
pq4 = nanopq.PQ(M=64, Ks=2)
print(datetime.now())
pq4.fit(embeddings)
print(datetime.now())
embedding4_codes = pq4.encode(embeddings)
print(datetime.now())
cheeses4_dists = pq4.dtable(cheeses_query).adist(embedding4_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses4_dists)[0:10]])
print(datetime.now())

2023-04-20 12:17:53.662161
M: 64, Ks: 2, code_dtype: <class 'numpy.uint8'>
2023-04-20 12:17:53.662644
iter: 20, seed: 123
Training the subspace: 0 / 64
Training the subspace: 1 / 64
Training the subspace: 2 / 64
Training the subspace: 3 / 64
Training the subspace: 4 / 64
Training the subspace: 5 / 64
Training the subspace: 6 / 64
Training the subspace: 7 / 64
Training the subspace: 8 / 64
Training the subspace: 9 / 64
Training the subspace: 10 / 64
Training the subspace: 11 / 64
Training the subspace: 12 / 64
Training the subspace: 13 / 64
Training the subspace: 14 / 64
Training the subspace: 15 / 64
Training the subspace: 16 / 64
Training the subspace: 17 / 64
Training the subspace: 18 / 64
Training the subspace: 19 / 64
Training the subspace: 20 / 64
Training the subspace: 21 / 64
Training the subspace: 22 / 64
Training the subspace: 23 / 64
Training the subspace: 24 / 64
Training the subspace: 25 / 64
Training the subspace: 26 / 64
Training the subspace: 27 / 64
Training the subspac

In [34]:
print(datetime.now())
pq4 = nanopq.PQ(M=128, Ks=2)
print(datetime.now())
pq4.fit(embeddings)
print(datetime.now())
embedding4_codes = pq4.encode(embeddings)
print(datetime.now())
cheeses4_dists = pq4.dtable(cheeses_query).adist(embedding4_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses4_dists)[0:10]])
print(datetime.now())

2023-04-20 12:34:19.980006
M: 128, Ks: 2, code_dtype: <class 'numpy.uint8'>
2023-04-20 12:34:19.980805
iter: 20, seed: 123
Training the subspace: 0 / 128
Training the subspace: 1 / 128
Training the subspace: 2 / 128
Training the subspace: 3 / 128
Training the subspace: 4 / 128
Training the subspace: 5 / 128
Training the subspace: 6 / 128
Training the subspace: 7 / 128
Training the subspace: 8 / 128
Training the subspace: 9 / 128
Training the subspace: 10 / 128
Training the subspace: 11 / 128
Training the subspace: 12 / 128
Training the subspace: 13 / 128
Training the subspace: 14 / 128
Training the subspace: 15 / 128
Training the subspace: 16 / 128
Training the subspace: 17 / 128
Training the subspace: 18 / 128
Training the subspace: 19 / 128
Training the subspace: 20 / 128
Training the subspace: 21 / 128
Training the subspace: 22 / 128
Training the subspace: 23 / 128
Training the subspace: 24 / 128
Training the subspace: 25 / 128
Training the subspace: 26 / 128
Training the subspace: 

Encoding the subspace: 123 / 128
Encoding the subspace: 124 / 128
Encoding the subspace: 125 / 128
Encoding the subspace: 126 / 128
Encoding the subspace: 127 / 128
2023-04-20 12:57:59.627164
2023-04-20 12:58:03.155474
['Olivet cendré' 'Swiss cheese (North America)' 'Lüneberg cheese'
 'Who Moved My Cheese?' 'Explorateur' 'Gubbeen Farmhouse Cheese'
 'Cheese curd' 'Cheese ripening' 'Cheesemaking' 'Cheese crystals']
2023-04-20 12:58:03.800952


In [35]:
print(datetime.now())
pq192b = nanopq.PQ(M=192, Ks=2)
print(datetime.now())
pq192b.fit(embeddings)
print(datetime.now())
embedding192b_codes = pq192b.encode(embeddings)
print(datetime.now())
cheeses192b_dists = pq192b.dtable(cheeses_query).adist(embedding192b_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses192b_dists)[0:10]])
print(datetime.now())

2023-04-20 13:18:19.278715
M: 192, Ks: 2, code_dtype: <class 'numpy.uint8'>
2023-04-20 13:18:19.279397
iter: 20, seed: 123
Training the subspace: 0 / 192
Training the subspace: 1 / 192
Training the subspace: 2 / 192
Training the subspace: 3 / 192
Training the subspace: 4 / 192
Training the subspace: 5 / 192
Training the subspace: 6 / 192
Training the subspace: 7 / 192
Training the subspace: 8 / 192
Training the subspace: 9 / 192
Training the subspace: 10 / 192
Training the subspace: 11 / 192
Training the subspace: 12 / 192
Training the subspace: 13 / 192
Training the subspace: 14 / 192
Training the subspace: 15 / 192
Training the subspace: 16 / 192
Training the subspace: 17 / 192
Training the subspace: 18 / 192
Training the subspace: 19 / 192
Training the subspace: 20 / 192
Training the subspace: 21 / 192
Training the subspace: 22 / 192
Training the subspace: 23 / 192
Training the subspace: 24 / 192
Training the subspace: 25 / 192
Training the subspace: 26 / 192
Training the subspace: 

Encoding the subspace: 58 / 192
Encoding the subspace: 59 / 192
Encoding the subspace: 60 / 192
Encoding the subspace: 61 / 192
Encoding the subspace: 62 / 192
Encoding the subspace: 63 / 192
Encoding the subspace: 64 / 192
Encoding the subspace: 65 / 192
Encoding the subspace: 66 / 192
Encoding the subspace: 67 / 192
Encoding the subspace: 68 / 192
Encoding the subspace: 69 / 192
Encoding the subspace: 70 / 192
Encoding the subspace: 71 / 192
Encoding the subspace: 72 / 192
Encoding the subspace: 73 / 192
Encoding the subspace: 74 / 192
Encoding the subspace: 75 / 192
Encoding the subspace: 76 / 192
Encoding the subspace: 77 / 192
Encoding the subspace: 78 / 192
Encoding the subspace: 79 / 192
Encoding the subspace: 80 / 192
Encoding the subspace: 81 / 192
Encoding the subspace: 82 / 192
Encoding the subspace: 83 / 192
Encoding the subspace: 84 / 192
Encoding the subspace: 85 / 192
Encoding the subspace: 86 / 192
Encoding the subspace: 87 / 192
Encoding the subspace: 88 / 192
Encoding

In [36]:
print(datetime.now())
pq48 = nanopq.PQ(M=48, Ks=256)
print(datetime.now())
pq48.fit(embeddings)
print(datetime.now())
embedding48_codes = pq48.encode(embeddings)
print(datetime.now())
cheeses48_dists = pq48.dtable(cheeses_query).adist(embedding48_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses48_dists)[0:10]])
print(datetime.now())

2023-04-24 23:32:14.865476
M: 48, Ks: 256, code_dtype: <class 'numpy.uint8'>
2023-04-24 23:32:14.866440
iter: 20, seed: 123
Training the subspace: 0 / 48
Training the subspace: 1 / 48
Training the subspace: 2 / 48
Training the subspace: 3 / 48
Training the subspace: 4 / 48
Training the subspace: 5 / 48
Training the subspace: 6 / 48
Training the subspace: 7 / 48
Training the subspace: 8 / 48
Training the subspace: 9 / 48
Training the subspace: 10 / 48
Training the subspace: 11 / 48
Training the subspace: 12 / 48
Training the subspace: 13 / 48
Training the subspace: 14 / 48
Training the subspace: 15 / 48
Training the subspace: 16 / 48
Training the subspace: 17 / 48
Training the subspace: 18 / 48
Training the subspace: 19 / 48
Training the subspace: 20 / 48
Training the subspace: 21 / 48
Training the subspace: 22 / 48
Training the subspace: 23 / 48
Training the subspace: 24 / 48
Training the subspace: 25 / 48
Training the subspace: 26 / 48
Training the subspace: 27 / 48
Training the subsp

In [37]:
print(datetime.now())
pq48x7 = nanopq.PQ(M=48, Ks=128)
print(datetime.now())
pq48x7.fit(embeddings)
print(datetime.now())
embedding48x7_codes = pq48x7.encode(embeddings)
print(datetime.now())
cheeses48x7_dists = pq48x7.dtable(cheeses_query).adist(embedding48x7_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses48x7_dists)[0:10]])
print(datetime.now())

2023-04-26 11:31:51.649093
M: 48, Ks: 128, code_dtype: <class 'numpy.uint8'>
2023-04-26 11:31:51.649953
iter: 20, seed: 123
Training the subspace: 0 / 48
Training the subspace: 1 / 48
Training the subspace: 2 / 48
Training the subspace: 3 / 48
Training the subspace: 4 / 48
Training the subspace: 5 / 48
Training the subspace: 6 / 48
Training the subspace: 7 / 48
Training the subspace: 8 / 48
Training the subspace: 9 / 48
Training the subspace: 10 / 48
Training the subspace: 11 / 48
Training the subspace: 12 / 48
Training the subspace: 13 / 48
Training the subspace: 14 / 48
Training the subspace: 15 / 48
Training the subspace: 16 / 48
Training the subspace: 17 / 48
Training the subspace: 18 / 48
Training the subspace: 19 / 48
Training the subspace: 20 / 48
Training the subspace: 21 / 48
Training the subspace: 22 / 48
Training the subspace: 23 / 48
Training the subspace: 24 / 48
Training the subspace: 25 / 48
Training the subspace: 26 / 48
Training the subspace: 27 / 48
Training the subsp

In [41]:
cheeses48x7_dists.shape

(6593739,)

In [44]:
dtable48x7 = pq48x7.dtable(cheeses_query)

In [46]:
dtable48x7.dtable.shape

(48, 128)

In [47]:
pq48x7.codewords

array([[[-5.3758724e-03,  4.2093913e-03,  1.9834014e-03, ...,
          8.2521625e-03,  3.7193839e-02,  1.2343452e-02],
        [-4.6457235e-02, -3.0339463e-02, -6.0833983e-02, ...,
          3.7688754e-02, -4.6352342e-02,  4.8886180e-02],
        [-1.1907289e-02, -2.7092151e-02,  8.4226317e-04, ...,
          3.8341369e-02,  5.3051725e-02,  3.3950326e-03],
        ...,
        [ 9.0041459e-03,  2.9580690e-02, -2.8493036e-02, ...,
          5.0105162e-02,  3.5586305e-02,  3.1962596e-02],
        [ 3.3392343e-03,  4.2218052e-02, -2.7564276e-02, ...,
          4.8237763e-02, -3.0377753e-02,  2.4028083e-02],
        [-4.4370107e-02, -2.4671258e-02,  1.8078126e-02, ...,
          3.0240552e-02, -7.8113056e-03, -1.9013138e-02]],

       [[-8.6120813e-04, -2.8457234e-02,  2.7854979e-02, ...,
          3.3348978e-02, -8.3562806e-03,  6.0207546e-03],
        [ 2.4590353e-02, -3.2625984e-02, -2.2480009e-02, ...,
         -5.6121729e-02, -1.0217158e-02,  4.4263031e-02],
        [ 4.3625001e-02, 

In [50]:
cw = pq48x7.codewords

In [76]:
import json

print(len(json.dumps(cw.astype(np.float64).tolist())))
print(len(json.dumps(cw.astype(np.float64).round(5).tolist(), separators=(",",":"))))


1092674
422960


In [64]:
print(datetime.now())
pq32x7 = nanopq.PQ(M=32, Ks=128)
print(datetime.now())
pq32x7.fit(embeddings)
print(datetime.now())
embedding32x7_codes = pq32x7.encode(embeddings)
print(datetime.now())
cheeses32x7_dists = pq32x7.dtable(cheeses_query).adist(embedding32x7_codes)
print(datetime.now())
print(nptitles[np.argsort(cheeses32x7_dists)[0:10]])
print(datetime.now())

2023-04-28 10:40:18.229976
M: 32, Ks: 128, code_dtype: <class 'numpy.uint8'>
2023-04-28 10:40:18.231721
iter: 20, seed: 123
Training the subspace: 0 / 32
Training the subspace: 1 / 32
Training the subspace: 2 / 32
Training the subspace: 3 / 32
Training the subspace: 4 / 32
Training the subspace: 5 / 32
Training the subspace: 6 / 32
Training the subspace: 7 / 32
Training the subspace: 8 / 32
Training the subspace: 9 / 32
Training the subspace: 10 / 32
Training the subspace: 11 / 32
Training the subspace: 12 / 32
Training the subspace: 13 / 32
Training the subspace: 14 / 32
Training the subspace: 15 / 32
Training the subspace: 16 / 32
Training the subspace: 17 / 32
Training the subspace: 18 / 32
Training the subspace: 19 / 32
Training the subspace: 20 / 32
Training the subspace: 21 / 32
Training the subspace: 22 / 32
Training the subspace: 23 / 32
Training the subspace: 24 / 32
Training the subspace: 25 / 32
Training the subspace: 26 / 32
Training the subspace: 27 / 32
Training the subsp

In [72]:
len(cw.reshape(-1))

49152

In [74]:
json.dumps(cw.astype(np.float64).round(4).tolist())

'[[[-0.0054, 0.0042, 0.002, -0.0441, -0.0089, 0.0083, 0.0372, 0.0123], [-0.0465, -0.0303, -0.0608, -0.0176, 0.0334, 0.0377, -0.0464, 0.0489], [-0.0119, -0.0271, 0.0008, 0.0046, -0.0248, 0.0383, 0.0531, 0.0034], [-0.034, -0.0187, -0.0142, 0.0129, 0.0067, 0.0656, 0.0177, 0.0538], [0.0109, -0.0375, -0.0373, -0.0158, 0.0407, 0.0383, 0.0503, 0.014], [-0.0038, 0.0719, 0.0046, -0.0106, -0.0715, -0.031, -0.0626, 0.0202], [-0.0387, 0.0007, -0.0008, -0.0285, -0.0884, 0.0045, 0.0359, 0.0247], [-0.018, 0.0228, -0.0494, -0.0152, -0.0955, -0.0069, -0.0592, -0.0054], [-0.0003, -0.0016, -0.0038, 0.0582, -0.0403, 0.014, -0.0407, -0.0218], [0.0111, 0.0363, -0.0261, 0.0171, -0.0258, -0.0141, -0.0298, 0.0049], [0.0009, 0.0123, -0.0604, -0.0187, -0.0111, 0.0558, 0.0014, 0.0772], [-0.0025, 0.034, 0.0232, 0.0411, 0.0459, -0.0021, -0.0124, 0.0264], [0.0032, 0.0191, -0.0723, 0.0746, -0.037, -0.0439, 0.0036, 0.0004], [-0.0418, 0.0506, 0.0032, 0.0186, -0.0047, 0.0055, -0.0305, -0.0201], [-0.0599, 0.0641, -0.053,

In [78]:
-0xc0de

-49374

In [80]:
list(zip(range(5), np.arange(5), [10,11,12,13,14]))

[(0, 0, 10), (1, 1, 11), (2, 2, 12), (3, 3, 13), (4, 4, 14)]

In [81]:
list(range(7.5))

TypeError: 'float' object cannot be interpreted as an integer