In [1]:
import time
import chess
import torch
from model import ChessModel, get_model
from convert import board_to_tensor
from train import load_data

In [2]:
data = load_data()
model = get_model()

Loading data...
Loaded data. Shape: 
positions : torch.Size([2331, 7, 8, 8])
moves     : torch.Size([2331, 4096])
values     : torch.Size([2331])
Loaded model!


In [3]:
positions, probs, values = data

In [4]:
print(positions.device)
start = time.time()
positions = positions.to('cuda')
end = time.time()
print(positions.device)
print("Transfer took", end-start, "seconds.")

cpu
cuda:0
Transfer took 0.0005979537963867188 seconds.


In [34]:
for batch_size in [1,2,4,8,16,32,64,128,256,512,1024,2048,4096]:
    print("Batch size", batch_size)
    start = time.time()
    batch_size = 100
    for batch in range(0, len(positions), batch_size):
        start_batch_idx = batch
        end_batch_idx = start_batch_idx + batch_size
        positions_batch = positions[start_batch_idx:end_batch_idx]
        with torch.no_grad():
            result = model(positions_batch)
        result[0][0][0].item()
    end = time.time()
    print("Inference took", end-start, "seconds.")
    print("Throughput: ", (end-start)/len(positions), "seconds per position.")

Batch size 1
Inference took 0.19612622261047363 seconds.
Throughput:  8.413823363812683e-05 seconds per position.
Batch size 2
Inference took 0.03652310371398926 seconds.
Throughput:  1.5668427161728552e-05 seconds per position.
Batch size 4
Inference took 0.015794038772583008 seconds.
Throughput:  6.77564940908752e-06 seconds per position.
Batch size 8
Inference took 0.01477360725402832 seconds.
Throughput:  6.337883849861999e-06 seconds per position.
Batch size 16
Inference took 0.013971328735351562 seconds.
Throughput:  5.993706021171841e-06 seconds per position.
Batch size 32
Inference took 0.013750791549682617 seconds.
Throughput:  5.899095473909317e-06 seconds per position.
Batch size 64
Inference took 0.01364588737487793 seconds.
Throughput:  5.85409153791417e-06 seconds per position.
Batch size 128
Inference took 0.013627052307128906 seconds.
Throughput:  5.846011285769587e-06 seconds per position.
Batch size 256
Inference took 0.013577699661254883 seconds.
Throughput:  5.82483

In [23]:
torch.cuda.get_device_capability()

(8, 6)

In [24]:
torch.cuda.get_device_name()

'NVIDIA GeForce RTX 3070'

In [26]:
torch.cuda.get_device_properties('cuda')

_CudaDeviceProperties(name='NVIDIA GeForce RTX 3070', major=8, minor=6, total_memory=8191MB, multi_processor_count=46)