# Edited Version 222

This is the new doc.

In [1]:
%%time
! conda install -c pytorch/label/nightly -y faiss-gpu

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done
Solving environment: | / - \ | 

In [2]:
import numpy as np 
import pandas as pd
import os
import time

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import faiss
from faiss.contrib import datasets

print(f"Faiss version: {faiss.__version__}")
print(f"GPUs: {faiss.get_num_gpus()}")

Faiss version: 1.7.0
GPUs: 1


# HNSW - Levels Distribution

In [3]:
# Setup HNSW parameters
d = 128  
M = 32
efSearch = 32        # number of entry points (neighbors) we use on each layer (the depth of exploration of the search)
efConstruction = 32  # number of entry points used on each layer during construction (the depth of exploration at add time)

ds = datasets.SyntheticDataset(128, 0, 500000, 0)
xb = ds.get_database()

index = faiss.IndexHNSWFlat(d, M)
index.hnsw.efConstruction = efConstruction
index.hnsw.efSearch = efSearch

In [4]:
%%time
index.add(xb)

CPU times: user 3min 45s, sys: 498 ms, total: 3min 46s
Wall time: 1min 59s


In [5]:
index.hnsw.max_level

4

In [6]:
# and levels (or layers) are now populated
levels = faiss.vector_to_array(index.hnsw.levels)  #level of each vector (base level = 1), size = ntotal
np.bincount(levels)

array([     0, 484359,  15162,    461,     17,      1])

In [7]:
unique, counts = np.unique(levels, return_counts=True)
np.column_stack((unique, counts))

array([[     1, 484359],
       [     2,  15162],
       [     3,    461],
       [     4,     17],
       [     5,      1]])

In [8]:
fig = px.bar(x=unique-1, y=counts, height=480, width=500, text_auto=',d')   # template="simple_white", text_auto=True, 
fig.update_traces(textfont_size=14, textposition="outside", textfont_color="blue", width=1)
fig.update_layout(xaxis_title_text="level", yaxis_title_text="count", yaxis_range=[0, 530000])
fig.show()
fig.write_html("levels.html")

# Comparison

In [9]:
def train_index(fac_string, index, gpu=True):       

    if gpu:
        res = faiss.StandardGpuResources()           # use a single GPU        
        index = faiss.index_cpu_to_gpu(res, 0, index)

    t0 = time.time()
    index.train(xt)
    train_time = time.time() - t0
    print(fac_string, "=> Training done in %.3f s. \n" % train_time)
    return index, gpu, train_time    

In [10]:
def add_index(fac_string, index):
    i = 0
    start = 0
    end = start + batch_size
    t0 = time.time()
    
    while start < ds.nb:   
        if end > ds.nb:
            end = ds.nb
        index.add(xb[start:end])
        if (i % batch_print_interval == 0):
            print(f"    Adding records [{start} : {end}]. => ntotal: {index.ntotal}")
        start, end = end, end + batch_size
        i += 1

    index_time = time.time() - t0
    print(fac_string, "=> Indexing done in %.3f s." % index_time)
    print(f"{fac_string} => ntotal: {index.ntotal} \n")
    return index_time

In [11]:
def save_n_get_filesize(fac_string, index, gpu=True):  
    index_name = fac_string + '.index'         # Set index name
    if gpu: 
        index = faiss.index_gpu_to_cpu(index)  
    faiss.write_index(index, index_name)       # Write index to file    
    file_size = os.path.getsize(index_name)    # Get file size    
#     os.remove(index_name)                    # Delete saved index
    file_size = file_size * 1e-6               # Convert bytes to MB
    print(f"{fac_string} => Memory usage: %.3f MB. \n" % file_size)
    return file_size 

In [12]:
def search_index(fac_string, index, nprobe=None):
    if "IVF" in fac_string:          
        index.nprobe = nprobe             # faiss.ParameterSpace().set_index_parameter(gpu_index, "nprobe", 64)        
        
    t0 = time.time()
    D, I = index.search(xq, k)

    rank = 1
    recall_1 = (I[:, :rank] == gt[:, :1]).sum() / ds.nq
#     print(f"Recall@{rank} = {recall_1} (nprobe = {nprobe})")
    
    rank = 3
    recall_3 = (I[:, :rank] == gt[:, :1]).sum() / ds.nq    
    
    rank = 5
    recall_5 = (I[:, :rank] == gt[:, :1]).sum() / ds.nq    
    
    print(f"(nprobe = {nprobe}) Recall@1 = {recall_1}, Recall@3 = {recall_3}, Recall@5 = {recall_5}")

    search_time = (time.time() - t0) * 1000 / ds.nq     # ms per query  
    search_time_all = (time.time() - t0) * 1000    
    print(fac_string, "=> Searching done in %.3f ms." % search_time_all, "\n")
    
    return search_time, recall_1, recall_3, recall_5

In [13]:
def append_results(df, fac_string, nprobe, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage):
    
    nprobe2 = None if "IVF" not in fac_string else nprobe    
    n_segment2 = None if "PQ" not in fac_string else n_segment
    
    hnsw_loc = fac_string.find('HNSW')             # Find start location of HNSW.  
    if hnsw_loc >= 0:
        last_loc = fac_string.rfind(',')           # Find last location of comma.
        if last_loc < 0:
            last_loc = fac_string.rfind('_')       # Find last location of underscore.        
        M = fac_string[hnsw_loc+4 : last_loc]
    else:
        M = None
    
    df = df.append({'index': fac_string,
                    'M': M,
                    'nprobe': nprobe2,
                    'n_segment': n_segment2,
                    'recall@1': recall_1,
                    'recall@3': recall_3,
                    'recall@5': recall_5,
                    'train_time':train_time,
                    'index_time': index_time,
                    'search_time': search_time,
                    'memory_usage': memory_usage
                    }, ignore_index=True)  
    
    df.to_csv('df.csv', sep='|', index=False)
    return df

In [14]:
df = pd.DataFrame({'index': [],
                   'M': [],
                   'nprobe': [],
                   'n_segment': [],
                   'recall@1': [],
                   'recall@3': [],
                   'recall@5': [],
                   'train_time':[],
                   'index_time': [],
                   'search_time': [],
                   'memory_usage': []
})

In [15]:
k = 5
n_centroids = 65536 
n_training = 39 * n_centroids  # x*n_centroids, where x is between 30 and 256.
n_base = 3 * 1000000           # millions (OOM if use 5 millions)
n_query = 1000
nprobes = [1, 8, 16, 64, 128, 256, 512, 1024]
batch_size = 32768            # batch size to add index.
batch_print_interval = 25        
d = 128
M = 32
n_segment = 32

# Example fac_string = "IVF65536_HNSW32,PQ32"

str1 = "IVF" + str(n_centroids) + ",PQ" + str(n_segment)
str2 = "IVF" + str(n_centroids) + "_HNSW" + str(M) + ",PQ" + str(n_segment)
str3 = "IVF" + str(n_centroids) + "_HNSW" + str(M) + ",Flat"
str4 = "IVF" + str(n_centroids) + ",Flat"
str5 = "HNSW" + str(M) + ",Flat"
str6 = "HNSW" + str(M) + "_PQ" + str(n_segment)

fac_strings = [str1, str2, str3, str4, str5, str6]

In [16]:
%%time

ds = datasets.SyntheticDataset(d, n_training, 0, 0)
xt = ds.get_train()

ds = datasets.SyntheticDataset(d, 0, n_base, n_query)
xb = ds.get_database()
xq = ds.get_queries()
gt = ds.get_groundtruth(k) 

for fac_string in fac_strings:
    t00 = time.time()  
    
    index = faiss.index_factory(d, fac_string)
    
    if (fac_string.startswith("IVF")):   
        index, gpu, train_time = train_index(fac_string, index, gpu=True)   # Training is required, use GPU.
    elif (fac_string.startswith("HNSW") and ("PQ" in fac_string)): 
        index, gpu, train_time = train_index(fac_string, index, gpu=False)  # Training is required, but GPU is not supported.
    else:                                                                   
        gpu = False                                                         # Training is not required.
        train_time = None
    
    index_time = add_index(fac_string, index)
    memory_usage = save_n_get_filesize(fac_string, index, gpu)
    if "IVF" in fac_string:
        for nprobe in nprobes:
            search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, nprobe)
            df = append_results(df, fac_string, nprobe, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)
    else:
        search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, None)
        df = append_results(df, fac_string, None, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)         

    del index
    total_time = (time.time() - t00) / 60
    print(fac_string, "=> Total processing time %.1f min." % total_time) 
    print("==============================================================\n") 

IVF65536,PQ32 => Training done in 156.804 s. 

    Adding records [0 : 32768]. => ntotal: 32768
    Adding records [819200 : 851968]. => ntotal: 851968
    Adding records [1638400 : 1671168]. => ntotal: 1671168
    Adding records [2457600 : 2490368]. => ntotal: 2490368
IVF65536,PQ32 => Indexing done in 28.526 s.
IVF65536,PQ32 => ntotal: 3000000 

IVF65536,PQ32 => Memory usage: 154.210 MB. 

(nprobe = 1) Recall@1 = 0.153, Recall@3 = 0.177, Recall@5 = 0.179
IVF65536,PQ32 => Searching done in 8.910 ms. 

(nprobe = 8) Recall@1 = 0.349, Recall@3 = 0.437, Recall@5 = 0.45
IVF65536,PQ32 => Searching done in 6.818 ms. 

(nprobe = 16) Recall@1 = 0.417, Recall@3 = 0.536, Recall@5 = 0.555
IVF65536,PQ32 => Searching done in 8.567 ms. 

(nprobe = 64) Recall@1 = 0.521, Recall@3 = 0.712, Recall@5 = 0.746
IVF65536,PQ32 => Searching done in 18.814 ms. 

(nprobe = 128) Recall@1 = 0.547, Recall@3 = 0.765, Recall@5 = 0.806
IVF65536,PQ32 => Searching done in 33.911 ms. 

(nprobe = 256) Recall@1 = 0.563, Rec

In [17]:
# %%time

# k = 5
# n_base = 3 * 1000000           # millions (OOM if use 5 millions)      
# n_query = 1000
# d = 128

# fac_string = "HNSW32,Flat"
# train_time = None

# ds = datasets.SyntheticDataset(d, 0, n_base, n_query)
# xb = ds.get_database()
# xq = ds.get_queries()
# gt = ds.get_groundtruth(k) 

# t00 = time.time()    

# index = faiss.index_factory(d, fac_string)

# # index, gpu, train_time = train_index(fac_string, index, gpu=False)
# index_time = add_index(fac_string, index)
# memory_usage = save_n_get_filesize(fac_string, index, gpu=False)
# if "IVF" in fac_string:
#     for nprobe in nprobes:
#         search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, nprobe)
#         df = append_results(df, fac_string, nprobe, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)
# else:
#     search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, None)
#     df = append_results(df, fac_string, None, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)         

# total_time = (time.time() - t00) / 60
# print(fac_string, "=> Total processing time %.1f min." % total_time) 
# print("==============================================================\n") 

In [18]:
# %%time

# k = 5
# n_base = 3 * 1000000           # millions (OOM if use 5 millions)  
# # n_base = 1000          # takes 8 mins to train 1000 nb for "HNSW32,PQ32"  
# n_query = 1000
# d = 128

# fac_string = "HNSW32_PQ32"

# ds = datasets.SyntheticDataset(d, n_training, 0, 0)
# xt = ds.get_train()

# ds = datasets.SyntheticDataset(d, 0, n_base, n_query)
# xb = ds.get_database()
# xq = ds.get_queries()
# gt = ds.get_groundtruth(k) 

# t00 = time.time()    

# index = faiss.index_factory(d, fac_string)
# # index.quantizer_efSearch = 16
# # index.quantizer_efConstruction = 32

# index, gpu, train_time = train_index(fac_string, index, gpu=False)
# index_time = add_index(fac_string, index)
# memory_usage = save_n_get_filesize(fac_string, index, gpu)
# if "IVF" in fac_string:
#     for nprobe in nprobes:
#         search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, nprobe)
#         df = append_results(df, fac_string, nprobe, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)
# else:
#     search_time, recall_1, recall_3, recall_5 = search_index(fac_string, index, None)
#     df = append_results(df, fac_string, None, n_segment, recall_1, recall_3, recall_5, train_time, index_time, search_time, memory_usage)         

# total_time = (time.time() - t00) / 60
# print(fac_string, "=> Total processing time %.1f min." % total_time) 
# print("==============================================================\n") 

In [19]:
df

Unnamed: 0,index,M,nprobe,n_segment,recall@1,recall@3,recall@5,train_time,index_time,search_time,memory_usage
0,"IVF65536,PQ32",,1.0,32.0,0.153,0.177,0.179,156.803884,28.526001,0.008907,154.209972
1,"IVF65536,PQ32",,8.0,32.0,0.349,0.437,0.45,156.803884,28.526001,0.006815,154.209972
2,"IVF65536,PQ32",,16.0,32.0,0.417,0.536,0.555,156.803884,28.526001,0.008564,154.209972
3,"IVF65536,PQ32",,64.0,32.0,0.521,0.712,0.746,156.803884,28.526001,0.018811,154.209972
4,"IVF65536,PQ32",,128.0,32.0,0.547,0.765,0.806,156.803884,28.526001,0.033908,154.209972
5,"IVF65536,PQ32",,256.0,32.0,0.563,0.804,0.856,156.803884,28.526001,0.0756,154.209972
6,"IVF65536,PQ32",,512.0,32.0,0.578,0.831,0.891,156.803884,28.526001,0.195632,154.209972
7,"IVF65536,PQ32",,1024.0,32.0,0.588,0.848,0.913,156.803884,28.526001,0.509627,154.209972
8,"IVF65536_HNSW32,PQ32",32.0,1.0,32.0,0.153,0.177,0.179,156.016223,28.53575,0.007721,154.209972
9,"IVF65536_HNSW32,PQ32",32.0,8.0,32.0,0.349,0.437,0.45,156.016223,28.53575,0.00669,154.209972


In [20]:
df256 = df[(df["nprobe"]==256) | (df["index"].str.startswith('HNSW'))]
df256

Unnamed: 0,index,M,nprobe,n_segment,recall@1,recall@3,recall@5,train_time,index_time,search_time,memory_usage
5,"IVF65536,PQ32",,256.0,32.0,0.563,0.804,0.856,156.803884,28.526001,0.0756,154.209972
13,"IVF65536_HNSW32,PQ32",32.0,256.0,32.0,0.563,0.804,0.856,156.016223,28.53575,0.074713,154.209972
21,"IVF65536_HNSW32,Flat",32.0,256.0,,0.904,0.904,0.904,87.001114,21.431804,0.088703,1594.078859
29,"IVF65536,Flat",,256.0,,0.904,0.904,0.904,86.502899,20.842895,0.089356,1594.078859
32,"HNSW32,Flat",32.0,,,0.854,0.854,0.854,,1126.212063,0.199158,2352.424546
33,HNSW32_PQ32,32.0,,32.0,0.63,0.791,0.813,76.508916,1007.656472,0.219738,912.555659


In [21]:
df128 = df[(df["nprobe"]==128) | (df["index"].str.startswith('HNSW'))]
df128

Unnamed: 0,index,M,nprobe,n_segment,recall@1,recall@3,recall@5,train_time,index_time,search_time,memory_usage
4,"IVF65536,PQ32",,128.0,32.0,0.547,0.765,0.806,156.803884,28.526001,0.033908,154.209972
12,"IVF65536_HNSW32,PQ32",32.0,128.0,32.0,0.547,0.765,0.806,156.016223,28.53575,0.033709,154.209972
20,"IVF65536_HNSW32,Flat",32.0,128.0,,0.84,0.84,0.84,87.001114,21.431804,0.055171,1594.078859
28,"IVF65536,Flat",,128.0,,0.84,0.84,0.84,86.502899,20.842895,0.055719,1594.078859
32,"HNSW32,Flat",32.0,,,0.854,0.854,0.854,,1126.212063,0.199158,2352.424546
33,HNSW32_PQ32,32.0,,32.0,0.63,0.791,0.813,76.508916,1007.656472,0.219738,912.555659


In [22]:
fig = px.line(df, x="nprobe", y="search_time", color="index", markers=True, height=400, width=800,
             labels=dict(search_time="search time (ms/query)",),
             template="simple_white")
fig.show()
fig.write_html("nprobe_search.html")

In [23]:
# fig = px.line(df, x="nprobe", y="search_time", color="index", markers=True, height=400, width=800,
#              labels=dict(search_time="search time (ms)",),
#              template="simple_white")

# # set unique marker style for different lines (https://plotly.com/python/marker-style/)
# fig.update_traces(mode='lines+markers')
# symbols = ["square-open", "hexagram", "diamond-open", "circle", ]  
# line_styles = ["solid", "solid", "solid", "solid"]        # ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
# sizes = [12, 8, 12, 8]

# for i, fdata in enumerate(fig.data):
#     fdata.marker.symbol = symbols[i]
#     fdata.marker.size = sizes[i]
#     fdata.line.dash = line_styles[i]  
    
# fig.show()
# fig.write_html("nprobe_search0.html")

In [24]:
fig = px.line(df, x="nprobe", y="recall@1", color="index", markers=True, height=400, width=800,
             template="simple_white")

# set unique marker style for different lines (https://plotly.com/python/marker-style/)
# fig.update_traces(mode='lines+markers')
# symbols = ["square-open", "hexagram", "diamond-open", "circle", ]  
# line_styles = ["solid", "solid", "solid", "solid"]        # ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
# sizes = [12, 8, 12, 8]

# for i, fdata in enumerate(fig.data):
#     fdata.marker.symbol = symbols[i]
#     fdata.marker.size = sizes[i]
#     fdata.line.dash = line_styles[i]
        
fig.show()
fig.write_html("nprobe_recall_1.html")

In [25]:
fig = px.line(df, x="nprobe", y="recall@3", color="index", markers=True, height=400, width=800,
             template="simple_white")
fig.show()
fig.write_html("nprobe_recall_3.html")

In [26]:
fig = px.line(df, x="nprobe", y="recall@5", color="index", markers=True, height=400, width=800,
             template="simple_white")
fig.show()
fig.write_html("nprobe_recall_5.html")

In [27]:
fig = px.line(df, x="search_time", y="recall@1", color="index", markers=True, height=400, width=800,
             template="simple_white")

# set unique marker style for different lines (https://plotly.com/python/marker-style/)
# fig.update_traces(mode='lines+markers')
# symbols = ["square-open", "hexagram", "diamond-open", "circle", ]  
# line_styles = ["solid", "solid", "solid", "solid"]        # ['solid', 'dot', 'dash', 'longdash', 'dashdot', 'longdashdot']
# sizes = [12, 8, 12, 8]

# for i, fdata in enumerate(fig.data):
#     fdata.marker.symbol = symbols[i]
#     fdata.marker.size = sizes[i]
#     fdata.line.dash = line_styles[i]
        
fig.show()
fig.write_html("search_recall.html")

In [28]:

# fig = px.bar(df.query("nprobe == 256"), x="index", y="search_time", color="index", height=400, width=400,
#             labels=dict(search_time="search time (ms/query)", index=""), title = "nprobe = 256", template="simple_white")
fig = px.bar(df256, x="index", y="search_time", color="index", height=400, width=400,
            labels=dict(search_time="search time (ms/query)", index=""), title = "nprobe = 256", template="simple_white")
fig.update_xaxes(ticks="")
fig.update_layout(showlegend=False)
fig.show()
fig.write_html("nprobe256_search.html")

In [29]:
# fig = px.bar(df.query("nprobe == 256"), x="index", y="recall@1", color="index", height=400, width=400, 
#              labels=dict(index=""), title="nprobe = 256", template="simple_white")
fig = px.bar(df256, x="index", y="recall@1", color="index", height=400, width=400, 
             labels=dict(index=""), title="nprobe = 256", template="simple_white")
fig.update_xaxes(ticks="")
fig.update_layout(showlegend=False)
fig.show()
fig.write_html("nprobe256_recall.html")

In [30]:
#https://plotly.com/python/discrete-color/
px.colors.qualitative.D3

['#1F77B4',
 '#FF7F0E',
 '#2CA02C',
 '#D62728',
 '#9467BD',
 '#8C564B',
 '#E377C2',
 '#7F7F7F',
 '#BCBD22',
 '#17BECF']

In [31]:
fig = make_subplots(rows=1, cols=4, horizontal_spacing = 0.1)
colors = px.colors.qualitative.D3  

fig.add_trace(go.Bar(x=df128.loc[:,'index'], y=df128.loc[:,'search_time'], marker_color=colors, name="search time"),
              row=1, col=1
             )              
fig.add_trace(go.Bar(x=df128.loc[:,'index'], y=df128.loc[:,'recall@1'], marker_color=colors, name="recall@1"),
              row=1, col=2
             )  
fig.add_trace(go.Bar(x=df128.loc[:,'index'], y=df128.loc[:,'recall@3'], marker_color=colors, name="recall@3"),
              row=1, col=3
             ) 
fig.add_trace(go.Bar(x=df128.loc[:,'index'], y=df128.loc[:,'recall@5'], marker_color=colors, name="recall@5"),
              row=1, col=4
             ) 
fig.update_yaxes(title_text="search time (ms/query)", row=1, col=1)
fig.update_yaxes(title_text="recall@1", row=1, col=2)
fig.update_yaxes(title_text="recall@3", row=1, col=3)
fig.update_yaxes(title_text="recall@5", row=1, col=4)
fig.update_xaxes(ticks="")
fig.update_layout(height=400, width=1100, template="simple_white", showlegend=False, title_text="nprobe = 128", title_x=0.5)
fig.show()
fig.write_html("nprobe128_search_recall.html")

In [32]:
fig = make_subplots(rows=1, cols=4, horizontal_spacing = 0.1)
colors = px.colors.qualitative.D3  

fig.add_trace(go.Bar(x=df256.loc[:,'index'], y=df256.loc[:,'search_time'], marker_color=colors, name="search time"),
              row=1, col=1
             )              
fig.add_trace(go.Bar(x=df256.loc[:,'index'], y=df256.loc[:,'recall@1'], marker_color=colors, name="recall@1"),
              row=1, col=2
             )  
fig.add_trace(go.Bar(x=df256.loc[:,'index'], y=df256.loc[:,'recall@3'], marker_color=colors, name="recall@3"),
              row=1, col=3
             ) 
fig.add_trace(go.Bar(x=df256.loc[:,'index'], y=df256.loc[:,'recall@5'], marker_color=colors, name="recall@5"),
              row=1, col=4
             ) 
fig.update_yaxes(title_text="search time (ms/query)", row=1, col=1)
fig.update_yaxes(title_text="recall@1", row=1, col=2)
fig.update_yaxes(title_text="recall@3", row=1, col=3)
fig.update_yaxes(title_text="recall@5", row=1, col=4)
fig.update_xaxes(ticks="")
fig.update_layout(height=400, width=1100, template="simple_white", showlegend=False, title_text="nprobe = 256")#, title_x=0.5)
fig.show()
fig.write_html("nprobe256_search_recall.html")

In [33]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=df256.loc[:,'index'], y=df256.loc[:,'search_time'], 
                         name="search time (ms/query)", #yaxis="y"
                        ),    
              secondary_y=False,
)
fig.add_trace(go.Scatter(x=df256.loc[:,'index'], y=df256.loc[:,"recall@1"],
                         name="recall@1", #yaxis="y2"
                         ),    
              secondary_y=True,
)
fig.update_layout(height=400, width=800, template="simple_white", title_text="")

fig.update_xaxes(title_text="")

fig.update_yaxes(range=[0, 110], secondary_y=False, title_text="search time (ms/query)", color="#1F77B4", )
fig.update_yaxes(range=[0, 1], secondary_y=True, title_text="recall@1", color="#FF7F0E", )
fig.show()
fig.write_html("nprobe256_index_search_recall.html")

In [34]:
# fig = px.bar(df.query("nprobe == 256"), x="index", y="memory_usage", color="index", height=400, width=800, #text_auto=',d',
#              template="simple_white",
# #             color_discrete_sequence=px.colors.qualitative.Set1,  # To override the colors from template
#             )

fig = px.bar(df256, x="index", y="memory_usage", color="index", height=400, width=800, #text_auto=',d',
             template="simple_white",
#             color_discrete_sequence=px.colors.qualitative.Set1,  # To override the colors from template
            )

fig.update_traces(textfont_size=14, textposition="outside", textfont_color="blue", texttemplate="%{value:,d} MB"  )
fig.update_layout(xaxis_title_text="", yaxis_title_text="memory usage (MB)", yaxis_range=[0, 1900])
fig.update_xaxes(ticks="")
fig.show()
fig.write_html("index_memory.html")