In [2]:
import openai
from openai.embeddings_utils import get_embeddings, get_embedding
from datetime import datetime
import hashlib
import re
import os
from tqdm import tqdm

import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


In [4]:
openai.api_key = os.environ.get('OPENAI_API_KEY')
pinecone_key = os.environ.get('PINECONE_API_KEY')

INDEX_NAME = 'semantic-search'
NAMESPACE = 'default'
ENGINE = 'text-embedding-ada-002'

In [5]:
import pinecone

pinecone.init(api_key=pinecone_key, environment="us-west1-gcp")

In [6]:
if not INDEX_NAME in pinecone.list_indexes():
    pinecone.create_index(
        INDEX_NAME,  # The name of the index
        dimension=1536,  # The dimensionality of the vectors
        metric='cosine',  # The similarity metric to use when searching the index
        pod_type="p1"  # The type of Pinecone pod
    )

# Store the index as a variable
index = pinecone.Index(INDEX_NAME)

In [7]:
def my_hash(s):
    # Return the MD5 hash of the input string as a hexadecimal string
    return hashlib.md5(s.encode()).hexdigest()

my_hash('I love to hash it')

'ae76cc4dfd345ecaeea9b8ba0d5c3437'

In [8]:
def prepare_for_pinecone(texts, engine=ENGINE):
    # Get the current UTC date and time
    now = datetime.utcnow()
    
    # Generate vector embeddings for each string in the input list, using the specified engine
    embeddings = get_embeddings(texts, engine=engine)
    
    # Create tuples of (hash, embedding, metadata) for each input string and its corresponding vector embedding
    # The my_hash() function is used to generate a unique hash for each string, and the datetime.utcnow() function is used to generate the current UTC date and time
    return [
        (
            my_hash(text),  # A unique ID for each string, generated using the my_hash() function
            embedding,  # The vector embedding of the string
            dict(text=text, date_uploaded=now)  # A dictionary of metadata, including the original text and the current UTC date and time
        ) 
        for text, embedding in zip(texts, embeddings)  # Iterate over each input string and its corresponding vector embedding
    ]


In [9]:
texts = ['hi']

In [10]:
prepare_for_pinecone(texts)[0]

('49f68a5c8493ec2c0bf489821c21fc3b',
 [-0.035133566707372665,
  -0.02054932527244091,
  -0.01542513258755207,
  -0.0398373119533062,
  -0.027486693114042282,
  0.021114300936460495,
  -0.022073443979024887,
  -0.019511347636580467,
  -0.00950603373348713,
  -0.013178371824324131,
  0.029536370187997818,
  -0.004674183204770088,
  -0.015214909799396992,
  -0.013953570276498795,
  0.00900675356388092,
  0.01514921523630619,
  0.038444582372903824,
  -0.005718729924410582,
  0.023847203701734543,
  -0.012869605794548988,
  -0.014938991516828537,
  -0.0031615609768778086,
  -0.0068453955464065075,
  -0.008468056097626686,
  -0.022651558741927147,
  -0.0001063434174284339,
  0.013467428274452686,
  -0.017041224986314774,
  0.004509946331381798,
  -0.022349361330270767,
  0.014439711347222328,
  -0.0009221903746947646,
  -0.044856391847133636,
  -0.009729395620524883,
  -0.009768812917172909,
  -0.01567477360367775,
  0.009867355227470398,
  -0.021258829161524773,
  0.01509665884077549,
  -0

In [11]:
_id, embedding, metadata = prepare_for_pinecone(texts)[0]

print('ID:  ',_id, '\nLEN: ', len(embedding), '\nMETA:', metadata)

ID:   49f68a5c8493ec2c0bf489821c21fc3b 
LEN:  1536 
META: {'text': 'hi', 'date_uploaded': datetime.datetime(2023, 5, 8, 16, 10, 32, 169188)}


In [12]:
def upload_texts_to_pinecone(texts, namespace=NAMESPACE, batch_size=None, show_progress_bar=False):
    # Call the prepare_for_pinecone function to prepare the input texts for indexing
    total_upserted = 0
    if not batch_size:
        batch_size = len(texts)

    _range = range(0, len(texts), batch_size)
    for i in tqdm(_range) if show_progress_bar else _range:
        batch = texts[i: i + batch_size]
        prepared_texts = prepare_for_pinecone(batch)

        # Use the upsert() method of the index object to upload the prepared texts to Pinecone
        total_upserted += index.upsert(
            prepared_texts,
            namespace=namespace
        )['upserted_count']

    return total_upserted


# Call the upload_texts_to_pinecone() function with the input texts
upload_texts_to_pinecone(texts)


1

In [13]:
def query_from_pinecone(query, top_k=3):
    # get embedding from THE SAME embedder as the documents
    query_embedding = get_embedding(query, engine=ENGINE)

    return index.query(
      vector=query_embedding,
      top_k=top_k,
      namespace=NAMESPACE,
      include_metadata=True   # gets the metadata (dates, text, etc)
    ).get('matches')

query_from_pinecone('hello')

[{'id': '49f68a5c8493ec2c0bf489821c21fc3b',
  'metadata': {'date_uploaded': datetime.datetime(2023, 5, 8, 16, 10, 33, 584748),
               'text': 'hi'},
  'score': 0.924794734,
  'values': []}]

In [14]:
import hashlib

def delete_texts_from_pinecone(texts, namespace=NAMESPACE):
    # Compute the hash (id) for each text
    hashes = [hashlib.md5(text.encode()).hexdigest() for text in texts]
    
    # The ids parameter is used to specify the list of IDs (hashes) to delete
    return index.delete(ids=hashes, namespace=namespace)

# delete our text
delete_texts_from_pinecone(texts)

# test that the index is empty
query_from_pinecone('hello')

[]

In [15]:
def query_from_pinecone(query, top_k=3):
    # get embedding from THE SAME embedder as the documents
    query_embedding = get_embedding(query, engine=ENGINE)

    return index.query(
      vector=query_embedding,
      top_k=top_k,
      namespace=NAMESPACE,
      include_metadata=True   # gets the metadata (dates, text, etc)
    ).get('matches')

query_from_pinecone('hello')

[]

In [16]:
# Importing the tiktoken library
import tiktoken

# Initializing a tokenizer for the 'cl100k_base' model
# This tokenizer is designed to work with the 'ada-002' embedding model
tokenizer = tiktoken.get_encoding("cl100k_base")

# Using the tokenizer to encode the text 'hey there'
# The resulting output is a list of integers representing the encoded text
# This is the input format required for embedding using the 'ada-002' model
tokenizer.encode('hey there')


[36661, 1070]

In [17]:
# Function to split the text into chunks of a maximum number of tokens. Inspired by OpenAI
def overlapping_chunks(text, max_tokens = 500, overlapping_factor = 5):
    '''
    max_tokens: tokens we want per chunk
    overlapping_factor: number of sentences to start each chunk with that overlaps with the previous chunk
    '''

    # Split the text using punctuation
    sentences = re.split(r'[.?!]', text)

    # Get the number of tokens for each sentence
    n_tokens = [len(tokenizer.encode(" " + sentence)) for sentence in sentences]
    
    chunks, tokens_so_far, chunk = [], 0, []

    # Loop through the sentences and tokens joined together in a tuple
    for sentence, token in zip(sentences, n_tokens):

        # If the number of tokens so far plus the number of tokens in the current sentence is greater 
        # than the max number of tokens, then add the chunk to the list of chunks and reset
        # the chunk and tokens so far
        if tokens_so_far + token > max_tokens:
            chunks.append(". ".join(chunk) + ".")
            if overlapping_factor > 0:
                chunk = chunk[-overlapping_factor:]
                tokens_so_far = sum([len(tokenizer.encode(c)) for c in chunk])
            else:
                chunk = []
                tokens_so_far = 0

        # If the number of tokens in the current sentence is greater than the max number of 
        # tokens, go to the next sentence
        if token > max_tokens:
            continue

        # Otherwise, add the sentence to the chunk and add the number of tokens to the total
        chunk.append(sentence)
        tokens_so_far += token + 1
    if chunk:
        chunks.append(". ".join(chunk) + ".")

    return chunks

In [25]:
def get_results_from_pinecone(query, top_k=3, re_rank=False, verbose=True):

    results_from_pinecone = query_from_pinecone(query, top_k=top_k)
    if not results_from_pinecone:
        return []

    if verbose:
        print("Query:", query)
    
    
    final_results = []

    if verbose:
        print('Document ID (Hash)\t\tRetrieval Score\tText')
    for result_from_pinecone in results_from_pinecone:
        final_results.append(result_from_pinecone)
        if verbose:
            print(f"{result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")

    return final_results

In [20]:
from datasets import load_dataset
from evaluate import load


dataset = load_dataset("boolq")

Downloading readme:   0%|          | 0.00/6.60k [00:00<?, ?B/s]

Found cached dataset boolq (/Users/sinanozdemir/.cache/huggingface/datasets/boolq/default/0.1.0/bf0dd57da941c50de94ae3ce3cef7fea48c08f337a4b7aac484e9dddc5aa24e5)


  0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
dataset['validation'][0]

{'question': 'does ethanol take more energy make that produces',
 'answer': False,
 'passage': "All biomass goes through at least some of these steps: it needs to be grown, collected, dried, fermented, distilled, and burned. All of these steps require resources and an infrastructure. The total amount of energy input into the process compared to the energy released by burning the resulting ethanol fuel is known as the energy balance (or ``energy returned on energy invested''). Figures compiled in a 2007 report by National Geographic Magazine point to modest results for corn ethanol produced in the US: one unit of fossil-fuel energy is required to create 1.3 energy units from the resulting ethanol. The energy balance for sugarcane ethanol produced in Brazil is more favorable, with one unit of fossil-fuel energy required to create 8 from the ethanol. Energy balance estimates are not easily produced, thus numerous such reports have been generated that are contradictory. For instance, a sep

In [22]:
for idx in tqdm(range(0, len(dataset['validation']), 256)):
    data_sample = dataset['validation'][idx:idx + 256]

    passages = data_sample['passage']

    upload_texts_to_pinecone(passages)

100%|███████████████████████████████████████████| 13/13 [01:29<00:00,  6.91s/it]


In [26]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)


is an echocardiogram the same as a sonogram
Query: is an echocardiogram the same as a sonogram
Document ID (Hash)		Retrieval Score	Text
921caa9374f88c17a9d2d57471d208f3	0.87	An echocardiogram, often referred to as a cardiac 
3888c00d4063d5b3f19de2bf5d878e81	0.75	A blighted ovum or anembryonic gestation is charac
27e5f7cc8bdd240fb70412f8f67031ec	0.74	The EF of the left heart, known as the left ventri


In [27]:
q_to_hash = {data['question']: my_hash(data['passage']) for data in dataset['validation']}

q_to_hash[query]

'921caa9374f88c17a9d2d57471d208f3'

In [28]:
logger.setLevel(logging.CRITICAL)

predictions = []

# Note we will keep top_k the same so latency from Pinecone is consistent
#  and the only major time difference will be in the re-ranking

for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=1, re_rank=False, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with OpenAI embeddings: {accuracy}')

Accuracy with OpenAI embeddings: 0.8522935779816514


# OPEN SOURCE ALTERNATIVE TO EMBEDDING

In [29]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

docs = ["Around 9 Million people live in London", "London is known for its financial district"]

doc_emb = model.encode(docs, batch_size=32, show_progress_bar=True)

doc_emb.shape#  == ('2, 768')


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

(2, 768)

In [30]:
#Encode query and documents
docs = dataset['validation']['passage']
doc_emb = model.encode(docs, batch_size=32, show_progress_bar=True)

Batches:   0%|          | 0/103 [00:00<?, ?it/s]

In [31]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)
# Using OpenAI
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)


is there a spin off from breaking bad
Query: is there a spin off from breaking bad
Document ID (Hash)		Retrieval Score	Text
c0af8c84c15f5943bdcf748ae5814b5b	0.83	Breaking Bad is an American neo-Western crime dram
3a03b748be0c5dd1743b984f0919b0b8	0.82	Better Call Saul is an American television crime d
6b643fa2bf72047270dd30f9e719065f	0.82	Nacho did not appear in Breaking Bad, but is named


In [33]:
from sentence_transformers import util

In [34]:
query_emb = model.encode(query)

#Compute dot score between query and all document embeddings
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

#Combine docs & scores
doc_score_pairs = list(zip(docs, scores))

#Sort by decreasing score
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#Output passages & scores
for doc, score in doc_score_pairs[:3]:
    print(score, doc)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.5969604253768921 Breaking Bad is an American neo-Western crime drama television series created and produced by Vince Gilligan. The show originally aired on the AMC network for five seasons, from January 20, 2008 to September 29, 2013. The series tells the story of Walter White (Bryan Cranston), a struggling and depressed high school chemistry teacher diagnosed with lung cancer. Together with his former student Jesse Pinkman (Aaron Paul), White turns to a life of crime by producing and selling crystallized methamphetamine to secure his family's financial future before he dies, while navigating the dangers of the criminal world. The title comes from the Southern colloquialism ``breaking bad'', meaning to ``raise hell'' or turn toward crime. Breaking Bad is set and filmed in Albuquerque, New Mexico.
0.5142889022827148 The third season of the American television drama series Better Call Saul premiered on April 10, 2017, and concluded on June 19, 2017. The ten-episode season was broadcast

In [40]:
import numpy as np
logger.setLevel(logging.CRITICAL)  # just to suppress some logs


def eval_ranking_open_source(query, top_k=3):
#     query_emb = model.encode(query)
    query_emb = np.array(model.encode(query))

    #Compute dot score between query and all document embeddings
    scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

    #Combine docs & scores
    doc_score_pairs = list(zip(docs, scores))

    #Sort by decreasing score
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    return retrieved_hash


In [41]:
eval_ranking_open_source(query)

'c0af8c84c15f5943bdcf748ae5814b5b'

In [384]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash = eval_ranking_open_source(question, finetuned, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum(predictions)/len(predictions)

        print(f'Accuracy: {raw_accuracy}')


  2%|█                                                                       | 51/3270 [00:16<11:01,  4.86it/s]

Step 50
Accuracy without re-ranking: 0.88
Accuracy with re-ranking: 0.84


  3%|██▏                                                                    | 100/3270 [00:31<14:50,  3.56it/s]

Step 100
Accuracy without re-ranking: 0.85
Accuracy with re-ranking: 0.85


  5%|███▎                                                                   | 151/3270 [00:49<10:45,  4.83it/s]

Step 150
Accuracy without re-ranking: 0.86
Accuracy with re-ranking: 0.8533333333333334


  6%|████▎                                                                  | 200/3270 [01:09<15:19,  3.34it/s]

Step 200
Accuracy without re-ranking: 0.865
Accuracy with re-ranking: 0.855


  8%|█████▍                                                                 | 250/3270 [01:21<13:01,  3.86it/s]

Step 250
Accuracy without re-ranking: 0.872
Accuracy with re-ranking: 0.844


  9%|██████▌                                                                | 301/3270 [01:35<11:34,  4.27it/s]

Step 300
Accuracy without re-ranking: 0.85
Accuracy with re-ranking: 0.8366666666666667


 11%|███████▌                                                               | 350/3270 [01:49<16:06,  3.02it/s]

Step 350
Accuracy without re-ranking: 0.86
Accuracy with re-ranking: 0.8457142857142858


 12%|████████▋                                                              | 401/3270 [02:00<08:31,  5.61it/s]

Step 400
Accuracy without re-ranking: 0.8625
Accuracy with re-ranking: 0.8475


 14%|█████████▊                                                             | 451/3270 [02:16<09:10,  5.12it/s]

Step 450
Accuracy without re-ranking: 0.8577777777777778
Accuracy with re-ranking: 0.8466666666666667


 15%|██████████▊                                                            | 500/3270 [02:34<12:59,  3.55it/s]

Step 500
Accuracy without re-ranking: 0.852
Accuracy with re-ranking: 0.844


 17%|███████████▉                                                           | 551/3270 [02:51<12:22,  3.66it/s]

Step 550
Accuracy without re-ranking: 0.8418181818181818
Accuracy with re-ranking: 0.8381818181818181


 18%|█████████████                                                          | 600/3270 [03:03<09:46,  4.55it/s]

Step 600
Accuracy without re-ranking: 0.8383333333333334
Accuracy with re-ranking: 0.8366666666666667


 20%|██████████████▏                                                        | 651/3270 [03:17<11:08,  3.92it/s]

Step 650
Accuracy without re-ranking: 0.8369230769230769
Accuracy with re-ranking: 0.8369230769230769


 21%|███████████████▏                                                       | 701/3270 [03:30<15:07,  2.83it/s]

Step 700
Accuracy without re-ranking: 0.8385714285714285
Accuracy with re-ranking: 0.8371428571428572


 23%|████████████████▎                                                      | 751/3270 [03:42<08:56,  4.69it/s]

Step 750
Accuracy without re-ranking: 0.832
Accuracy with re-ranking: 0.8306666666666667


 24%|█████████████████▎                                                     | 800/3270 [03:58<09:16,  4.44it/s]

Step 800
Accuracy without re-ranking: 0.835
Accuracy with re-ranking: 0.83375


 26%|██████████████████▍                                                    | 851/3270 [04:13<12:11,  3.31it/s]

Step 850
Accuracy without re-ranking: 0.8352941176470589
Accuracy with re-ranking: 0.8341176470588235


 28%|███████████████████▌                                                   | 900/3270 [04:25<15:00,  2.63it/s]

Step 900
Accuracy without re-ranking: 0.8344444444444444
Accuracy with re-ranking: 0.83


 29%|████████████████████▋                                                  | 950/3270 [04:39<09:11,  4.21it/s]

Step 950
Accuracy without re-ranking: 0.8378947368421052
Accuracy with re-ranking: 0.8294736842105264


 31%|█████████████████████▍                                                | 1001/3270 [04:53<12:52,  2.94it/s]

Step 1000
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.83


 32%|██████████████████████▍                                               | 1050/3270 [05:06<08:25,  4.39it/s]

Step 1050
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.8276190476190476


 34%|███████████████████████▌                                              | 1100/3270 [05:19<10:14,  3.53it/s]

Step 1100
Accuracy without re-ranking: 0.8418181818181818
Accuracy with re-ranking: 0.8281818181818181


 35%|████████████████████████▌                                             | 1150/3270 [05:33<21:36,  1.63it/s]

Step 1150
Accuracy without re-ranking: 0.8443478260869566
Accuracy with re-ranking: 0.8295652173913044


 37%|█████████████████████████▋                                            | 1200/3270 [05:51<12:57,  2.66it/s]

Step 1200
Accuracy without re-ranking: 0.8458333333333333
Accuracy with re-ranking: 0.8325


 38%|██████████████████████████▊                                           | 1251/3270 [06:08<06:36,  5.09it/s]

Step 1250
Accuracy without re-ranking: 0.8488
Accuracy with re-ranking: 0.8344


 40%|███████████████████████████▊                                          | 1301/3270 [06:21<08:24,  3.90it/s]

Step 1300
Accuracy without re-ranking: 0.8492307692307692
Accuracy with re-ranking: 0.8369230769230769


 41%|████████████████████████████▉                                         | 1351/3270 [06:35<07:35,  4.21it/s]

Step 1350
Accuracy without re-ranking: 0.8511111111111112
Accuracy with re-ranking: 0.8385185185185186


 43%|█████████████████████████████▉                                        | 1401/3270 [06:49<09:22,  3.32it/s]

Step 1400
Accuracy without re-ranking: 0.8492857142857143
Accuracy with re-ranking: 0.8371428571428572


 44%|███████████████████████████████                                       | 1450/3270 [07:05<06:45,  4.48it/s]

Step 1450
Accuracy without re-ranking: 0.8475862068965517
Accuracy with re-ranking: 0.8358620689655173


 46%|████████████████████████████████                                      | 1500/3270 [07:16<05:52,  5.03it/s]

Step 1500
Accuracy without re-ranking: 0.846
Accuracy with re-ranking: 0.834


 47%|█████████████████████████████████▏                                    | 1550/3270 [07:29<06:51,  4.18it/s]

Step 1550
Accuracy without re-ranking: 0.8464516129032258
Accuracy with re-ranking: 0.8374193548387097


 49%|██████████████████████████████████▎                                   | 1600/3270 [07:43<05:22,  5.19it/s]

Step 1600
Accuracy without re-ranking: 0.845625
Accuracy with re-ranking: 0.835625


 50%|███████████████████████████████████▎                                  | 1650/3270 [07:58<21:24,  1.26it/s]

Step 1650
Accuracy without re-ranking: 0.8460606060606061
Accuracy with re-ranking: 0.8357575757575758


 52%|████████████████████████████████████▍                                 | 1700/3270 [08:13<07:15,  3.61it/s]

Step 1700
Accuracy without re-ranking: 0.8482352941176471
Accuracy with re-ranking: 0.8358823529411765


 54%|█████████████████████████████████████▍                                | 1751/3270 [08:30<05:48,  4.36it/s]

Step 1750
Accuracy without re-ranking: 0.848
Accuracy with re-ranking: 0.8371428571428572


 55%|██████████████████████████████████████▌                               | 1800/3270 [08:45<06:40,  3.67it/s]

Step 1800
Accuracy without re-ranking: 0.8483333333333334
Accuracy with re-ranking: 0.8383333333333334


 57%|███████████████████████████████████████▌                              | 1850/3270 [08:58<11:07,  2.13it/s]

Step 1850
Accuracy without re-ranking: 0.8475675675675676
Accuracy with re-ranking: 0.8394594594594594


 58%|████████████████████████████████████████▋                             | 1901/3270 [09:12<05:18,  4.30it/s]

Step 1900
Accuracy without re-ranking: 0.8494736842105263
Accuracy with re-ranking: 0.8415789473684211


 60%|█████████████████████████████████████████▊                            | 1951/3270 [09:28<03:51,  5.69it/s]

Step 1950
Accuracy without re-ranking: 0.8492307692307692
Accuracy with re-ranking: 0.841025641025641


 61%|██████████████████████████████████████████▊                           | 2001/3270 [09:41<04:26,  4.77it/s]

Step 2000
Accuracy without re-ranking: 0.851
Accuracy with re-ranking: 0.8415


 63%|███████████████████████████████████████████▉                          | 2050/3270 [09:52<04:36,  4.41it/s]

Step 2050
Accuracy without re-ranking: 0.8526829268292683
Accuracy with re-ranking: 0.844390243902439


 64%|████████████████████████████████████████████▉                         | 2101/3270 [10:10<04:09,  4.68it/s]

Step 2100
Accuracy without re-ranking: 0.8514285714285714
Accuracy with re-ranking: 0.8433333333333334


 66%|██████████████████████████████████████████████                        | 2150/3270 [10:20<04:18,  4.33it/s]

Step 2150
Accuracy without re-ranking: 0.8502325581395349
Accuracy with re-ranking: 0.8437209302325581


 67%|███████████████████████████████████████████████                       | 2200/3270 [10:34<03:47,  4.71it/s]

Step 2200
Accuracy without re-ranking: 0.8504545454545455
Accuracy with re-ranking: 0.8445454545454546


 69%|████████████████████████████████████████████████▏                     | 2251/3270 [10:49<03:06,  5.45it/s]

Step 2250
Accuracy without re-ranking: 0.8506666666666667
Accuracy with re-ranking: 0.8444444444444444


 70%|█████████████████████████████████████████████████▏                    | 2300/3270 [11:03<04:24,  3.66it/s]

Step 2300
Accuracy without re-ranking: 0.8504347826086956
Accuracy with re-ranking: 0.8434782608695652


 72%|██████████████████████████████████████████████████▎                   | 2350/3270 [11:20<05:43,  2.68it/s]

Step 2350
Accuracy without re-ranking: 0.8506382978723405
Accuracy with re-ranking: 0.8442553191489361


 73%|███████████████████████████████████████████████████▍                  | 2400/3270 [11:38<03:53,  3.73it/s]

Step 2400
Accuracy without re-ranking: 0.85125
Accuracy with re-ranking: 0.845


 75%|████████████████████████████████████████████████████▍                 | 2450/3270 [11:49<03:46,  3.62it/s]

Step 2450
Accuracy without re-ranking: 0.8526530612244898
Accuracy with re-ranking: 0.846530612244898


 76%|█████████████████████████████████████████████████████▌                | 2501/3270 [12:03<08:41,  1.47it/s]

Step 2500
Accuracy without re-ranking: 0.8524
Accuracy with re-ranking: 0.846


 78%|██████████████████████████████████████████████████████▌               | 2550/3270 [12:15<02:05,  5.73it/s]

Step 2550
Accuracy without re-ranking: 0.8509803921568627
Accuracy with re-ranking: 0.8431372549019608


 80%|███████████████████████████████████████████████████████▋              | 2600/3270 [12:29<03:24,  3.28it/s]

Step 2600
Accuracy without re-ranking: 0.8492307692307692
Accuracy with re-ranking: 0.8430769230769231


 81%|████████████████████████████████████████████████████████▋             | 2650/3270 [12:43<02:06,  4.89it/s]

Step 2650
Accuracy without re-ranking: 0.849811320754717
Accuracy with re-ranking: 0.8422641509433962


 83%|█████████████████████████████████████████████████████████▊            | 2700/3270 [13:02<03:49,  2.48it/s]

Step 2700
Accuracy without re-ranking: 0.85
Accuracy with re-ranking: 0.8422222222222222


 84%|██████████████████████████████████████████████████████████▊           | 2750/3270 [13:15<02:16,  3.81it/s]

Step 2750
Accuracy without re-ranking: 0.8501818181818181
Accuracy with re-ranking: 0.8414545454545455


 86%|███████████████████████████████████████████████████████████▉          | 2801/3270 [13:36<01:48,  4.34it/s]

Step 2800
Accuracy without re-ranking: 0.8492857142857143
Accuracy with re-ranking: 0.8410714285714286


 87%|█████████████████████████████████████████████████████████████         | 2851/3270 [13:47<01:30,  4.61it/s]

Step 2850
Accuracy without re-ranking: 0.8501754385964913
Accuracy with re-ranking: 0.8417543859649123


 89%|██████████████████████████████████████████████████████████████        | 2900/3270 [14:00<01:12,  5.10it/s]

Step 2900
Accuracy without re-ranking: 0.8510344827586207
Accuracy with re-ranking: 0.8420689655172414


 90%|███████████████████████████████████████████████████████████████▏      | 2951/3270 [14:14<01:44,  3.06it/s]

Step 2950
Accuracy without re-ranking: 0.8501694915254238
Accuracy with re-ranking: 0.8410169491525423


 92%|████████████████████████████████████████████████████████████████▏     | 3001/3270 [14:26<00:49,  5.39it/s]

Step 3000
Accuracy without re-ranking: 0.851
Accuracy with re-ranking: 0.8426666666666667


 93%|█████████████████████████████████████████████████████████████████▎    | 3051/3270 [14:39<00:47,  4.66it/s]

Step 3050
Accuracy without re-ranking: 0.8511475409836066
Accuracy with re-ranking: 0.8426229508196721


 95%|██████████████████████████████████████████████████████████████████▎   | 3100/3270 [14:53<00:36,  4.63it/s]

Step 3100
Accuracy without re-ranking: 0.8522580645161291
Accuracy with re-ranking: 0.8425806451612903


 96%|███████████████████████████████████████████████████████████████████▍  | 3150/3270 [15:06<00:24,  4.81it/s]

Step 3150
Accuracy without re-ranking: 0.8526984126984127
Accuracy with re-ranking: 0.8431746031746031


 98%|████████████████████████████████████████████████████████████████████▌ | 3201/3270 [15:21<00:17,  4.06it/s]

Step 3200
Accuracy without re-ranking: 0.8525
Accuracy with re-ranking: 0.8425


 99%|█████████████████████████████████████████████████████████████████████▌| 3251/3270 [15:31<00:03,  5.11it/s]

Step 3250
Accuracy without re-ranking: 0.8529230769230769
Accuracy with re-ranking: 0.8427692307692307


100%|██████████████████████████████████████████████████████████████████████| 3270/3270 [15:35<00:00,  3.49it/s]


In [24]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
print(f'Accuracy with open source embedder: {raw_accuracy}')

Accuracy without re-ranking: 0.8525993883792049
