# Debug FAISS Upgrade to v1.6.5

In [1]:
%load_ext autoreload
%autoreload 2

In [37]:
# System imports
import os
import sys
from time import time as tt
import importlib

# External imports
import matplotlib.pyplot as plt
import scipy as sp
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from torch_geometric.data import DataLoader

from itertools import chain
from random import shuffle, sample
from scipy.optimize import root_scalar as root

from torch.nn import Linear
import torch.nn.functional as F
from torch_cluster import knn_graph, radius_graph
import trackml.dataset
import torch_geometric
from itertools import permutations
import itertools
from sklearn import metrics
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import WandbLogger
from torch.utils.checkpoint import checkpoint

import faiss

sys.path.append('/global/homes/d/danieltm/ExaTrkX/Tracking-ML-Exa.TrkX/src/Pipelines/Examples')
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
from utils.stage_utils import get_resume_id, load_config, combo_config, dict_to_args, get_logger, build_model, build_trainer, autocast
from LightningModules.Embedding.Models.layerless_embedding import LayerlessEmbedding
from LightningModules.Embedding.utils import filter_hit_pt, fetch_pt, fetch_type, load_dataset, graph_intersection, build_edges, res

## Exploration

In [96]:
def build_edges(spatial, r_max, k_max, return_indices=False):
    
    if device == "cuda":
        res = faiss.StandardGpuResources()
        D, I = faiss.knn_gpu(res, spatial, spatial, k_max)
    elif device == "cpu":
        index = faiss.IndexFlatL2(spatial.shape[1])
        index.add(spatial)
        D, I = index.search(spatial, k_max)
        
    D, I = D[:,1:], I[:,1:]
    ind = torch.Tensor.repeat(torch.arange(I.shape[0], device=device), (I.shape[1], 1), 1).T

    edge_list = torch.stack([ind[D <= r_max**2], I[D <= r_max**2]])

    if return_indices:
        return edge_list, D, I, ind
    else:
        return edge_list
    
def build_edges_with_index(spatial, index, r_max, k_max, return_indices=False):
    
    D, I = index.search(spatial, k_max)
        
    D, I = D[:,1:], I[:,1:]
    ind = torch.Tensor.repeat(torch.arange(I.shape[0], device=device), (I.shape[1], 1), 1).T

    edge_list = torch.stack([ind[D <= r_max**2], I[D <= r_max**2]])

    if return_indices:
        return edge_list, D, I, ind
    else:
        return edge_list

def search_index_pytorch(index, x, k, D=None, I=None):
    """call the search function of an index with pytorch tensor I/O (CPU
    and GPU supported)"""
    assert x.is_contiguous()
    n, d = x.size()
    assert d == index.d

    if D is None:
        D = torch.empty((n, k), dtype=torch.float32, device=x.device)
    else:
        assert D.size() == (n, k)

    if I is None:
        I = torch.empty((n, k), dtype=torch.int64, device=x.device)
    else:
        assert I.size() == (n, k)
    torch.cuda.synchronize()
    xptr = swig_ptr_from_FloatTensor(x)
    Iptr = swig_ptr_from_LongTensor(I)
    Dptr = swig_ptr_from_FloatTensor(D)
    index.search_c(n, xptr,
                   k, Dptr, Iptr)
    torch.cuda.synchronize()
    return D, I

def swig_ptr_from_FloatTensor(x):
    assert x.is_contiguous()
    assert x.dtype == torch.float32
    return faiss.cast_integer_to_float_ptr(
        x.storage().data_ptr() + x.storage_offset() * 4)

def swig_ptr_from_LongTensor(x):
    assert x.is_contiguous()
    assert x.dtype == torch.int64, 'dtype=%s' % x.dtype
    return faiss.cast_integer_to_long_ptr(
        x.storage().data_ptr() + x.storage_offset() * 8)

In [4]:
import faiss.contrib.torch_utils

In [113]:
torch.manual_seed(10)
d = 8
nb = 100000
nq = 10
k = 500
r = 0.1

In [106]:
%%time
spatial = torch.rand(nb, d, dtype=torch.float32, device=torch.device('cuda'))

CPU times: user 76 µs, sys: 90 µs, total: 166 µs
Wall time: 172 µs


In [102]:
%%time
edge_list = build_edges(spatial, r, k)

CPU times: user 168 ms, sys: 205 ms, total: 373 ms
Wall time: 372 ms


In [107]:
%%time
gpu_index = faiss.GpuIndexFlatL2(res, d)
gpu_index.add(spatial)

CPU times: user 0 ns, sys: 1.47 ms, total: 1.47 ms
Wall time: 1.21 ms


In [105]:
%%time
edge_list = build_edges_with_index(spatial, gpu_index, r, k)

CPU times: user 190 ms, sys: 75 ms, total: 265 ms
Wall time: 265 ms


In [108]:
%%time
D, I = gpu_index.search(spatial, k)

CPU times: user 2.29 ms, sys: 0 ns, total: 2.29 ms
Wall time: 1.92 ms


In [88]:
%%time
D, I = D[:,1:], I[:,1:]
ind = torch.Tensor.repeat(torch.arange(I.shape[0]), (I.shape[1], 1), 1).T.to(device)
edge_list = torch.stack([ind[D <= r**2], I[D <= r**2]])

CPU times: user 86.4 ms, sys: 66.3 ms, total: 153 ms
Wall time: 90.8 ms


In [109]:
%%time
D, I = D[:,1:], I[:,1:]
ind = torch.Tensor.repeat(torch.arange(I.shape[0], device=device), (I.shape[1], 1), 1).T
edge_list = torch.stack([ind[D <= r**2], I[D <= r**2]])

CPU times: user 1.94 ms, sys: 0 ns, total: 1.94 ms
Wall time: 1.53 ms


In [121]:
spatial = torch.rand(nb, d, dtype=torch.float32, device=torch.device('cuda'))

In [122]:
%%time
gpu_index = faiss.GpuIndexFlatL2(res, d)
gpu_index.add(spatial)



CPU times: user 1.48 ms, sys: 0 ns, total: 1.48 ms
Wall time: 1.18 ms


In [124]:
%%time
D, I = gpu_index.search(spatial, k)

D, I = D[:,1:], I[:,1:]


CPU times: user 2.75 ms, sys: 0 ns, total: 2.75 ms
Wall time: 2.32 ms


In [126]:
%%time
ind = torch.Tensor.repeat(torch.arange(I.shape[0], device=device), (I.shape[1], 1), 1).T
edge_list = torch.stack([ind[D <= r**2], I[D <= r**2]])

CPU times: user 3.22 ms, sys: 0 ns, total: 3.22 ms
Wall time: 2.77 ms
