In [1]:
import sys
import h5py
import numpy as np
from numpy.random import random

##### Filesize experiments

In [51]:
dset1 = random( size=(1000,773) )
dset2 = np.round( random( size=(1000,773) ) ).astype(bool)
dset3 = [1000*[773*'1']]
dset4 = [ np.array([''.join([str(num) for num in x.astype(int)])]).astype('S773') for x in dset2 ]
dset5 = [ np.concatenate((dset4[2*i],dset4[2*i+1])) for i in range(500)]

print(int(dset3[0][0], base=2) < sys.maxsize) # not good to save binary as integer
print(type(dset3[0]), type(dset3[0][0]))
print(np.asarray(dset3).shape)
print(np.asarray(dset4).shape)
print(len(dset4[0]), len(dset4[0][0]))
print(dset4[1])
print(print(len(dset5[0]), len(dset5[0][0])))
print(dset5[1])

False
<class 'list'> <class 'str'>
(1, 1000)
(1000, 1)
1 773
[b'11001000110111100010011001110111010010111111110000010001101011110100000001000100000000001010011000010101011011110111110100100101000011101001010010011010101010010000000001110111001111011001011110001000001001101011000101000000011101001100101010011100100011011000010001100011101111100010011010110111011010101010000111100100010001000100001000001010111110100111101011000111000001011010010110000100100011000110011100110111100100100000111000100011011000011111010100000000101111011011110001101011011010100001111011110110000000001100100011110101010111101011001101010110001110010000010001011000010001000011000010110011000111101001101010000111101000111100110001011101110000111100000000001010010000100010011011011010111100010011000001111010100110101010100101100010110101001100000001100']
2 773
None
[b'11111010010111011000001101101010110110001011111101011010110000011100000101011100111100001010000100100000011100111101100001001101110001110001010111

In [52]:
with h5py.File("../data/float.hdf5") as f:
    f.create_dataset("float", data=dset1, maxshape=(None, 773))

with h5py.File("../data/bool.hdf5") as f:
    f.create_dataset("bool", data=dset2, dtype=bool, maxshape=(None, 773), compression="gzip", compression_opts=9)

with h5py.File("../data/string.hdf5",'w') as f:
    data = f.create_dataset("string", (500,), dtype=h5py.vlen_dtype(np.dtype('S773')), maxshape=(None,), compression="gzip", compression_opts=9)
    for i in range(500):
        data[i] = dset5[i]


#### Converting binary to uint

In [2]:
chunk = 500000
a = np.round(np.random.random(size=(chunk,776))).astype(np.bool_)
a.shape


(500000, 776)

In [3]:
%%time
uint = np.zeros(shape=(chunk,97))
for j in range(97):
    uint[:,j] = np.packbits(a[:,j*8:(j+1)*8])
uint.shape, uint[0]

CPU times: user 1.66 s, sys: 94.8 ms, total: 1.75 s
Wall time: 1.76 s


((500000, 97),
 array([244., 143.,  19.,   1., 211.,  75., 223., 140.,  20.,  10.,  53.,
        237., 207., 168.,  59., 142., 141., 213., 255., 240., 116., 168.,
        117., 212.,  94., 154., 191.,  25., 192., 108., 149.,  56., 194.,
        119.,  88., 121.,  56.,  95.,   4.,  22.,  79., 228.,  96., 170.,
        204., 167., 216., 145., 105.,  27.,  50.,  99.,  19., 143.,  33.,
        194., 180., 222.,  75.,  62.,  46.,  83., 146., 163., 149., 162.,
         80., 210., 210., 203., 186., 110., 205., 189., 121.,  33.,  31.,
         94., 190.,   5., 215.,  89., 151., 123., 219., 224.,  49., 177.,
        158., 132.,  68., 221.,  63.,  41.,  50., 241., 171.]))

In [6]:
%%time
uint = a.reshape((chunk,97,8))
uint = np.packbits(b, axis=-1).reshape((chunk,97))
uint[0], uint.shape

CPU times: user 1.28 s, sys: 10.1 ms, total: 1.29 s
Wall time: 1.28 s


(array([244, 143,  19,   1, 211,  75, 223, 140,  20,  10,  53, 237, 207,
        168,  59, 142, 141, 213, 255, 240, 116, 168, 117, 212,  94, 154,
        191,  25, 192, 108, 149,  56, 194, 119,  88, 121,  56,  95,   4,
         22,  79, 228,  96, 170, 204, 167, 216, 145, 105,  27,  50,  99,
         19, 143,  33, 194, 180, 222,  75,  62,  46,  83, 146, 163, 149,
        162,  80, 210, 210, 203, 186, 110, 205, 189, 121,  33,  31,  94,
        190,   5, 215,  89, 151, 123, 219, 224,  49, 177, 158, 132,  68,
        221,  63,  41,  50, 241, 171], dtype=uint8),
 (500000, 97))

#### Test chess board set up

In [3]:
import chess

In [4]:
board = chess.Board()
print(board)

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


In [6]:
board.clear()
print(board)

. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .


In [15]:
board.set_piece_map({1:chess.Piece(chess.PAWN, chess.WHITE)})
print(chess.Piece(chess.PAWN, chess.WHITE), chess.Piece.from_symbol('q'))
print(board)

P q
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
. P . . . . . .


In [1]:
import chess
import chess.pgn

In [6]:
with open("../tools/lichess_db_standard_rated_2013-01-02.pgn", 'r') as f:
    i = 0
    while True:
        next_game = chess.pgn.read_game(f)
        i+=1
        if next_game is None:
            print(i)
            break

21314


### Test indices

In [1]:
import faiss
index = faiss.read_index("/media/pafrank/Backup/other/Chess/lichess/embeddings/IVF65536_HNSW32,SQ4.faiss")