In [1]:
BASE_DIR = ""

# 0. Colab 환경 초기화 
* local에서 실행시 불필요함

In [2]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [3]:
!pip install openai
!pip install streamlit
!pip install datasets
!pip install pinecone-client
!pip install tiktoken
!pip install PyPDF2
!pip install sentence-transformers
!pip install evaluate



In [4]:
###########################################
# 1-1. Google drive mount

if IN_COLAB == True:
    from google.colab import drive
    drive.mount('/content/drive')
    
    BASE_DIR = "/content/drive/MyDrive/Colab Notebooks/quick-start-guide-to-llms/notebooks/"

In [5]:
!cd "/content/drive/MyDrive/Colab Notebooks/quick-start-guide-to-llms/notebooks"

zsh:cd:1: no such file or directory: /content/drive/MyDrive/Colab Notebooks/quick-start-guide-to-llms/notebooks


이 노트북은 최신 openai 패키지 버전을 사용하도록 업데이트되었습니다! 당시 1.6.1

# 1. OpenAI 모델로 애플리케이션 시작하기
* 독점 모델 개요
* OpenAI + 임베딩 / GPT3 / ChatGPT 소개
* 벡터 데이터베이스 소개
* 벡터 데이터베이스, BERT 및 GPT3로 신경/의미 정보 검색 시스템 구축하기

## 1.1 초기화

In [6]:
from openai import OpenAI
from datetime import datetime
import hashlib
import re
import os
from tqdm import tqdm
import numpy as np

import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [7]:
if IN_COLAB == True:
    pinecone_key = "6f77dabc-...-08fb8cd4e353"
    openai_key="sk-ctO...Mb5xgXw"
else:
    pinecone_key = os.environ.get('PINECONE_API_KEY')
    openai_key=os.environ.get("OPENAI_API_KEY")
    
client = OpenAI(
    api_key=openai_key
)

INDEX_NAME = 'semantic-search'
NAMESPACE = 'default'
ENGINE = 'text-embedding-ada-002'
ENGINE_2 = 'text-embedding-ada-002'
ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [8]:
# 기존 소스 오류 수정
# import pinecone
# pinecone.init(api_key=pinecone_key, environment="us-west1-gcp")

from pinecone import Pinecone, PodSpec
pinecone = Pinecone(api_key=pinecone_key)

## 1.2 OpenAI 
### 임베딩 함수

In [12]:
# OpenAI API에서 임베딩 목록을 가져오는 헬퍼 함수
def get_embeddings(texts, engine=ENGINE):
    response = client.embeddings.create(
        input=texts,
        model=engine
    )
    
    return [d.embedding for d in list(response.data)]

def get_embedding(text, engine=ENGINE):
    return get_embeddings([text], engine)[0]
    
len(get_embedding('hi')), len(get_embeddings(['hi', 'hello']))

(1536, 2)

### 토크나이저

In [11]:
# 틱토큰 라이브러리 가져오기
import tiktoken

# 'cl100k_base' 모델에 대한 토큰화 도구 초기화하기
# 이 토큰화 도구는 'ada-002' 임베딩 모델과 함께 작동하도록 설계되었습니다.
tokenizer = tiktoken.get_encoding("cl100k_base")

# 토큰라이저를 사용하여 'hey there' 텍스트를 인코딩하기
# 결과 출력은 인코딩된 텍스트를 나타내는 정수 목록입니다.
# 'ada-002' 모델을 사용하여 임베딩하는 데 필요한 입력 형식입니다.
tokenizer.encode('hey there')

[36661, 1070]

In [13]:
tokenizer.encode('안녕')

[31495, 230, 75265, 243]

In [14]:
print(len(tokenizer.encode('hi')))
print(len(tokenizer.encode('안녕')))

1
4


## 1.3 유사도 계산

## 1.5 문서 청킹

### 문서 청킹 함수

In [15]:
# 텍스트를 최대 토큰 수의 청크로 분할하는 함수입니다. OpenAI에서 영감을 얻음
def overlapping_chunks(text, max_tokens = 500, overlapping_factor = 5):
    '''
    max_tokens: 청크당 원하는 토큰 수
    overlapping_factor: 이전 청크와 겹치는 각 청크를 시작할 문장 수
    '''

    # 문장 부호를 사용하여 텍스트 분할
    sentences = re.split(r'[.?!]', text)

    # 각 문장의 토큰 개수 가져오기
    n_tokens = [len(tokenizer.encode(" " + sentence)) for sentence in sentences]
    
    chunks, tokens_so_far, chunk = [], 0, []

    # 튜플로 결합된 문장과 토큰을 반복합니다.
    for sentence, token in zip(sentences, n_tokens):

        # 지금까지의 토큰 수에 현재 문장의 토큰 수를 더한 수가 
        # 최대 토큰 수보다 크면 청크 목록에 청크를 추가하고 재설정합니다.
        # 청크와 지금까지의 토큰을 재설정합니다.
        if tokens_so_far + token > max_tokens:
            chunks.append(". ".join(chunk) + ".")
            if overlapping_factor > 0:
                chunk = chunk[-overlapping_factor:]
                tokens_so_far = sum([len(tokenizer.encode(c)) for c in chunk])
            else:
                chunk = []
                tokens_so_far = 0

        # 현재 문장의 토큰 수가 최대 토큰 수보다 많으면 
        # 토큰 수보다 많으면 다음 문장으로 이동합니다.
        if token > max_tokens:
            continue

        # 그렇지 않으면, 청크에 문장을 추가하고 토큰 수를 합산합니다.
        chunk.append(sentence)
        tokens_so_far += token + 1
    if chunk:
        chunks.append(". ".join(chunk) + ".")

    return chunks

### pdf 문서 읽기

In [23]:
import PyPDF2

principles_of_ko = ''
principles_of_jp1 = ''
principles_of_jp2 = ''

# 읽기 바이너리 모드로 PDF 파일 열기
with open(BASE_DIR + '../data/matrix_ko.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_ko = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_ko += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_ko = principles_of_ko.strip()


# 읽기 바이너리 모드로 PDF 파일 열기
with open(BASE_DIR + '../data/matrix_jp_1.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_jp1 = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_jp1 += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_jp1 = principles_of_jp1.strip()


# 읽기 바이너리 모드로 PDF 파일 열기
with open(BASE_DIR + '../data/matrix_jp_2.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_jp2 = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_jp2 += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_jp2 = principles_of_jp2.strip()



print(len(principles_of_ko))
print(len(principles_of_jp1))
print(len(principles_of_jp2))


100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 35.50it/s]
100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 38.01it/s]
100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 39.47it/s]

14031
15259
15275





In [22]:
principles_of_jp1 = ''

# 읽기 바이너리 모드로 PDF 파일 열기
with open(BASE_DIR + '../data/matrix_jp_1.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_jp1 = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_jp1 += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_jp1 = principles_of_jp1.strip()

print(len(principles_of_jp1))

100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 35.83it/s]

14031





In [21]:
principles_of_jp2 = ''

# 읽기 바이너리 모드로 PDF 파일 열기
with open(BASE_DIR + '../data/matrix_jp_2.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_jp2 = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_jp2 += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_jp2 = principles_of_jp2.strip()

print(len(principles_of_jp2))

100%|█████████████████████████████████████████████████████████████████████| 40/40 [00:01<00:00, 38.05it/s]

14031





In [18]:
print(principles_of_ko)

eyond Web TechnologyCreate New Business Value
W-Matrix는웹기술로모바일앱을구현할수있는하이브리드개발프레임워크를제공하며모바일환경에서필요한네이티브기능을플러그인으로제공하여비즈니스앱에쉽고편리하게통합할수있도록하는엔터프라이즈크로스앱플랫폼입니다. Mobile App Platform
for Mobile

-Matrixfor MobileMobile App PlatformUniversal App PlatformIntegrated build and deploymentUI mirroring & device sharingEdge-device managementDevelop big data using AIContent제품배경제품이력제품개요제품구성03제품일반
Architecture개발프로세스개발환경운영환경지원사양기대효과08제품기능및특장점
주요실적32적용사례회사개요인원현황Product Lineup36회사소개

품일반01
제품배경제품이력제품개요제품구성

품배경제품일반Matrix for Mobile App PlatformNative App
•모든네이티브기능활용•OS 별코드파편화•매우높은러닝커브•플랫폼별복잡한빌드배포
1stGenObjective-C/SwiftAndroid Java/KotlinWindows .NETHybrid App
•낮은러닝커브의웹표준기술•플러그인기반의네이티브기능•PC 플랫폼미지원2ndGenUniversal App
•모바일/PC 플랫폼빌드/배포지원(iOS, Android, Windows)•오프라인모드지원3thGen
웹표준기술•크로스플랫폼App•Co-Platform 간협업•쉽고편리한App 개발
•Hybrid App 장점계승•웹표준기술로Native Plugin 사용
1세대네이티브앱과2세대하이브리드앱을넘어선멀티플랫폼지원과공유와협업을제공하는3세대App 플랫폼
for Mobile

품이력제품일반
W3CHTML5
Apple iOS 3.0 발표(iPhone 3GS)안드로이드OS 1.0 발표국내아이폰3GS 정식발매삼성안드로이드기반갤럭시S 출시MS Win

In [24]:
print(principles_of_jp1)

eyond Web TechnologyCreate New Business Value
W-Matrixはウェブ技術でモバイルアプリを実装することができるハイブリッド開発フレームワークを提供しモバイル環境で必要なネイティブ機能をプラグインとして提供し、ビジネスアプリに簡単かつ便利に統合できるようにするエンタープライズクロスアプリプラットフォームです。統合できるようにするエンタープライズクロスアプリプラットフォームです。Mobile App Platform
for Mobile

-Matrixfor MobileMobile App PlatformUniversal App PlatformIntegrated build and deploymentUI mirroring & device sharingEdge-device managementDevelop big data using AIContent製品の背景製品履歴製品概要製品構成03製品一般
Architecture開発プロセス開発環境運用環境サポート仕様期待効果08製品機能及び特徴
主な実績32適用事例会社概要人員状況Product Lineup36会社紹介

品一般01
製品の背景製品履歴製品概要製品構成

品の背景製品一般Mobile App PlatformNative App
1stGenObjective-C/SwiftAndroid Java/KotlinWindows .NETHybrid App2ndGenUniversal App3thGen
웹표준기술•クロスプラットフォームアプリ•Co-Platform間のコラボレーション•簡単で便利なアプリ開発
•Hybrid Appのメリット継承•Web標準技術でNative Pluginを使用
第1世代のネイティブアプリと第2世代のハイブリッドアプリを超えたマルチプラットフォームのサポートと共有と協業を提供する第3世代Appプラットフォーム
for Mobile
•すべてのネイティブ機能を活用•OS別コードの断片化•非常に高いランニングカーブ•プラットフォーム毎の複雑なビルド配布•低ランニングカーブのウェブ標準技術•プラグインベースのネイティブ機能•PCプラットフォーム未対応•モバイル/PCプラットフォームのビルド/デプロイメ

In [25]:
print(principles_of_jp2)

eyond Web TechnologyCreate New Business Value
W-Matrixは、ウェブ技術でモバイルアプリ実装することができるハイブリッド開発フレームワークを提供し、モバイル環境で必要なネイティブ機能をプラグインとして提供して、ビジネスアプリに簡単便利に統合できるようにするエンタープライズクロスアプリプラットフォームです。MobileApp Platform
for Mobile

-Matrixfor MobileMobile App PlatformUniversal AppPlatformIntegratedbuildand deploymentUI mirroring& devicesharingEdge-devicemanagementDevelopbigdatausingAIContent製品の背景製品の履歴製品の概要製品の構成03製品⼀般
Architecture開発プロセス開発環境運⽤環境サポート仕様期待効果08製品機能及び特徴
主な実績32適⽤事例会社の概要従業員の状況Product Lineup36会社紹介

品⼀般01
製品の背景製品の履歴製品の概要製品の構成

品の背景製品⼀般Matrix for MobileApp PlatformNative App
1stGenObjective-C/SwiftAndroid Java/KotlinWindows .NETUniversal App3thGen•クロスプラットフォームApp•Co-Platform間の協業•簡単便利なApp開発
•Hybrid Appのメリット継承•ウェブ標準技術でNative Pluginを使⽤
第1世代のネイティブアプリと第2世代のハイブリッドアプリを超えたマルチラットフォームのサポートと共有と協業を提供する第3世代Appプラットフォーム
•すべてのネイティブ機能を活⽤•OS別コードの断⽚化•⾮常に⾼いランニングカーブ•プラットフォーム別の複雑なビルド配布•低いランニングカーブのウェブ標準技術•プラグインベースのネイティブ機能•PCプラットフォームは⾮サポート•モバイル・PCプラットフォームのビルド·配布をサポート(iOS, Android, Windows)•オフラインモードをサポートHybrid App2ndGen
ウェブ標準技術


In [43]:
print(principles_of_ds0[:1000])

rinciples of Data Science
Second Edition
A beginner's guide to statistical techniques and theory to
build eﬀective data-driven applications
Sinan Ozdemir
Sunil Kakade
BIRMINGHAM - MUMBAI

rinciples of Data Science
Second Edition
Copyright © 2018 Packt Publishing
All rights reserved. No part of this book may be reproduced, stored in a retrieval system, or transmitted in any form
or by any means, without the prior written permission of the publisher, except in the case of brief quotations
embedded in critical articles or reviews.
Every effort has been made in the preparation of this book to ensure the accuracy of the information presented.
However, the information contained in this book is sold without warranty, either express or implied. Neither the
authors, nor Packt Publishing or its dealers and distributors, will be held liable for any damages caused or alleged to
have been caused directly or indirectly by this book.
Packt Publishing has endeavored to provide trademark information ab

In [44]:
# 웹스퀘어 개발자 가이드 SP5
with open(BASE_DIR + '../data/sp5.pdf', 'rb') as file:

    # PDF 리더 객체를 만듭니다.
    reader = PyPDF2.PdfReader(file)

    # 텍스트를 담을 빈 문자열을 초기화합니다.
    principles_of_ds1 = ''
    # PDF 파일의 각 페이지를 반복합니다.
    for page in tqdm(reader.pages):
        text = page.extract_text()
        principles_of_ds1 += '\n\n' + text[text.find(' ]')+2:]

# PDF 파일의 모든 텍스트가 포함된 최종 문자열을 인쇄합니다.
principles_of_ds1 = principles_of_ds1.strip()

print(len(principles_of_ds1))

incorrect startxref pointer(3)
100%|████████████████████████████████████████████████████████████████████████| 1411/1411 [00:19<00:00, 73.10it/s]

749019





In [45]:
print(principles_of_ds1[:1000])

스퀘어5 SP5개발 가이드 
 Inswave Systems Co., Ltd. 
 1



차 
 iii
목차
문서이력 
파트I.소개 
1.스튜디오 
1.1.소개 
1.2.PC요구사양 
1.3.Eclipse지원 
1.4.제약사항 
1.5.인스톨러설치 
1.6.Eclipse플러그인설치 
1.7.설정 
1.8.엔진교체 
2.엔진 
2.1.설치 
2.2.사양 
2.3.브라우저지원 
2.4.설정 
2.5.설치관련문제해결 
3.동작 
3.1.코드구조 
3.2.스크립트 
3.3.W-Pack(JS변환) 
3.4.브라우저호출 
3.5.웹표준호환성 
3.6.Scope 
3.7.주요API 
4.WFrame 
4.1.Scope설정 
4.2.ID변경 
4.3.$wVs.$p 
 4.4.WFrame간의화면참조 
4.5.WFrame생성옵션전달 
4.6.WFrame사용시유의사항 
4.7.공통함수 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . iii 
 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . xiii 
 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 
 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3 
 .	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	

In [46]:
principles_of_ds = principles_of_ds0 + "\n\n" + principles_of_ds1

### 웹 문서 읽기

In [47]:
from urllib.request import urlopen

"""
곤충에 관한 교과서 (일부 내용 발췌)

The Project Gutenberg eBook of The History of Insects
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
...
"""
text = urlopen('https://www.gutenberg.org/cache/epub/10834/pg10834.txt').read().decode()
print(text[:1000])

﻿The Project Gutenberg eBook of The History of Insects
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
before using this eBook.

Title: The History of Insects

Author: Unknown

Release date: January 1, 2004 [eBook #10834]
                Most recently updated: December 21, 2020

Language: English



*** START OF THE PROJECT GUTENBERG EBOOK THE HISTORY OF INSECTS ***


E-text prepared by Internet Archive Children's Library, Garrett Alley, and
the Project Gutenberg Online Distributed Proofreading Team



Note: Project Gutenberg also has an HTML version of this
      file which includes the 


### pdf 문서 내용을 청킹하기 (overlapping_factor=0)

In [48]:
split = overlapping_chunks(principles_of_ds, overlapping_factor=0)
avg_length = sum([len(tokenizer.encode(t)) for t in split]) / len(split)
print(f'non-overlapping chunking approach has {len(split)} documents with average length {avg_length:.1f} tokens')

non-overlapping chunking approach has 1256 documents with average length 487.8 tokens


In [49]:
print(split[0])
print("=======================")
print(split[1])

rinciples of Data Science
Second Edition
A beginner's guide to statistical techniques and theory to
build eﬀective data-driven applications
Sinan Ozdemir
Sunil Kakade
BIRMINGHAM - MUMBAI

rinciples of Data Science
Second Edition
Copyright © 2018 Packt Publishing
All rights reserved.  No part of this book may be reproduced, stored in a retrieval system, or transmitted in any form
or by any means, without the prior written permission of the publisher, except in the case of brief quotations
embedded in critical articles or reviews. 
Every effort has been made in the preparation of this book to ensure the accuracy of the information presented. 
However, the information contained in this book is sold without warranty, either express or implied.  Neither the
authors, nor Packt Publishing or its dealers and distributors, will be held liable for any damages caused or alleged to
have been caused directly or indirectly by this book. 
Packt Publishing has endeavored to provide trademark informati

### pdf 문서 내용을 청킹하기 (overlapping_factor=5)

In [50]:
split = overlapping_chunks(principles_of_ds)
avg_length = sum([len(tokenizer.encode(t)) for t in split]) / len(split)
print(f'overlapping chunking approach has {len(split)} documents with average length {avg_length:.1f} tokens')

overlapping chunking approach has 1703 documents with average length 498.1 tokens


In [51]:
print(split[0])
print("=======================")
print(split[1])

rinciples of Data Science
Second Edition
A beginner's guide to statistical techniques and theory to
build eﬀective data-driven applications
Sinan Ozdemir
Sunil Kakade
BIRMINGHAM - MUMBAI

rinciples of Data Science
Second Edition
Copyright © 2018 Packt Publishing
All rights reserved.  No part of this book may be reproduced, stored in a retrieval system, or transmitted in any form
or by any means, without the prior written permission of the publisher, except in the case of brief quotations
embedded in critical articles or reviews. 
Every effort has been made in the preparation of this book to ensure the accuracy of the information presented. 
However, the information contained in this book is sold without warranty, either express or implied.  Neither the
authors, nor Packt Publishing or its dealers and distributors, will be held liable for any damages caused or alleged to
have been caused directly or indirectly by this book. 
Packt Publishing has endeavored to provide trademark informati

### 맞춤형 구분기호 찾기

In [52]:
# 카운터 및 re 라이브러리 가져오기
from collections import Counter
import re

# 'principles_of_ds'에서 하나 이상의 공백이 있는 모든 항목 찾기
matches = re.findall(r'[\s]{1,}', principles_of_ds)

# 문서에서 가장 빈번하게 발생하는 공백 10가지
most_common_spaces = Counter(matches).most_common(100)

# 가장 일반적인 공백과 그 빈도를 출력
print(most_common_spaces)

[(' ', 120939), (' \n', 20070), ('\t', 18399), ('\n', 10728), ('  ', 1592), (' \n ', 1429), (' \n\n\n', 963), ('\n\n', 335), (' \n\n', 327), (' \n \n', 295), ('\t ', 282), ('\n   ', 250), ('\n\n\n', 96), ('\n ', 75), ('\n    ', 73), (' \n \n \n', 45), (' \n \n ', 44), ('     ', 34), ('\n  ', 23), (' \n \n \n ', 23), (' \n\n\n\n\n', 22), ('       ', 19), ('          ', 19), ('   ', 17), ('      ', 16), ('    ', 15), ('         ', 12), (' \n \n \n \n', 12), (' \n \n \n \n ', 12), (' \n \n \n \n \n \n \n', 12), ('\n        ', 10), ('\n                                     ', 10), (' \n\n\n\n', 10), (' \n \n\n', 10), ('        ', 8), ('\n       ', 8), ('\n               ', 8), ('\n\n\n\n\n', 8), ('           ', 6), ('            ', 6), ('\n           ', 6), ('                      ', 5), ('             ', 4), ('                          ', 3), ('\n                   ', 3), ('              ', 3), ('\n                 ', 3), ('\n      ', 3), ('\n                                        ', 3), 

In [53]:
pattern = r'\n\n'
frequency = len(re.findall(pattern, principles_of_ds))
print(frequency)

pattern = r'\n\n\n'
frequency = len(re.findall(pattern, principles_of_ds))
print(frequency)

pattern = r'\n\n '
frequency = len(re.findall(pattern, principles_of_ds))
print(frequency)

pattern = r' \n\n'
frequency = len(re.findall(pattern, principles_of_ds))
print(frequency)

pattern = r' \n\n\n'
frequency = len(re.findall(pattern, principles_of_ds))
print(frequency)

1843
1111
14
1353
1005


### 사용자 정의 구분자로 문서 청킹하기

In [54]:
# Only keep documents of at least 100 characters split by a custom delimiter
split = list(filter(lambda x: len(x) > 50, principles_of_ds.split('\n\n')))
# split = list(principles_of_ds.split('\n\n'))

avg_length = sum([len(tokenizer.encode(t)) for t in split]) / len(split)
print(f'custom delimiter approach has {len(split)} documents with average length {avg_length:.1f} tokens')

custom delimiter approach has 1775 documents with average length 326.2 tokens


In [55]:
print(split[0])
print("=======================")
print(split[1])
print("=======================")
print(split[2])

rinciples of Data Science
Second Edition
A beginner's guide to statistical techniques and theory to
build eﬀective data-driven applications
Sinan Ozdemir
Sunil Kakade
BIRMINGHAM - MUMBAI
rinciples of Data Science
Second Edition
Copyright © 2018 Packt Publishing
All rights reserved. No part of this book may be reproduced, stored in a retrieval system, or transmitted in any form
or by any means, without the prior written permission of the publisher, except in the case of brief quotations
embedded in critical articles or reviews.
Every effort has been made in the preparation of this book to ensure the accuracy of the information presented.
However, the information contained in this book is sold without warranty, either express or implied. Neither the
authors, nor Packt Publishing or its dealers and distributors, will be held liable for any damages caused or alleged to
have been caused directly or indirectly by this book.
Packt Publishing has endeavored to provide trademark information abo

### 청킹된 문서 embedding

In [56]:
# 한글 token size를 고려하여 overlapping이 없는 500 token 문서 청킹 사용 
split = overlapping_chunks(principles_of_ds, overlapping_factor=0)

In [57]:
embeddings = None
for s in tqdm(range(0, len(split), 100)):
    if embeddings is None:
        embeddings = np.array(get_embeddings(split[s:s+100], engine=ENGINE))
    else:
        embeddings = np.vstack([embeddings, np.array(get_embeddings(split[s:s+100], engine=ENGINE))])
    

100%|████████████████████████████████████████████████████████████████████████████| 13/13 [00:26<00:00,  2.04s/it]


In [58]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# 'embeddings'라는 텍스트 임베딩 목록이 있다고 가정합니다.
# 먼저, 모든 임베딩 쌍 사이의 코사인 유사도 행렬을 계산합니다.
cosine_sim_matrix = cosine_similarity(embeddings)

# 응집 클러스터링 모델을 인스턴스화합니다.
agg_clustering = AgglomerativeClustering(
    n_clusters=None, # 알고리즘이 데이터를 기반으로 최적의 클러스터 수를 결정합니다.
    distance_threshold=0.1, # 클러스터 간의 모든 쌍별 거리가 0.1보다 커질 때까지 클러스터를 형성합니다.
    metric='precomputed', # 미리 계산된 거리 행렬(1 - 유사도 행렬)을 입력으로 제공합니다. affinity 대신 metric로 사용
    linkage='complete', # 구성 요소 간의 최대 거리를 기준으로 가장 작은 클러스터를 반복적으로 병합하여 클러스터를 형성합니다.
)

# 코사인 거리 행렬(1 - 유사도 행렬)에 모델을 맞춥니다.
agg_clustering.fit(1 - cosine_sim_matrix)

# 각 임베딩에 대한 클러스터 레이블을 가져옵니다.
cluster_labels = agg_clustering.labels_

# 각 클러스터의 임베딩 개수를 출력합니다.
unique_labels, counts = np.unique(cluster_labels, return_counts=True)
for label, count in zip(unique_labels, counts):
    print(f'Cluster {label}: {count} embeddings')


Cluster 0: 3 embeddings
Cluster 1: 2 embeddings
Cluster 2: 3 embeddings
Cluster 3: 2 embeddings
Cluster 4: 3 embeddings
Cluster 5: 2 embeddings
Cluster 6: 2 embeddings
Cluster 7: 71 embeddings
Cluster 8: 2 embeddings
Cluster 9: 4 embeddings
Cluster 10: 2 embeddings
Cluster 11: 4 embeddings
Cluster 12: 2 embeddings
Cluster 13: 2 embeddings
Cluster 14: 4 embeddings
Cluster 15: 3 embeddings
Cluster 16: 2 embeddings
Cluster 17: 2 embeddings
Cluster 18: 2 embeddings
Cluster 19: 2 embeddings
Cluster 20: 2 embeddings
Cluster 21: 2 embeddings
Cluster 22: 2 embeddings
Cluster 23: 2 embeddings
Cluster 24: 4 embeddings
Cluster 25: 3 embeddings
Cluster 26: 6 embeddings
Cluster 27: 3 embeddings
Cluster 28: 2 embeddings
Cluster 29: 4 embeddings
Cluster 30: 2 embeddings
Cluster 31: 2 embeddings
Cluster 32: 2 embeddings
Cluster 33: 2 embeddings
Cluster 34: 3 embeddings
Cluster 35: 2 embeddings
Cluster 36: 2 embeddings
Cluster 37: 2 embeddings
Cluster 38: 2 embeddings
Cluster 39: 4 embeddings
Cluster 4

In [59]:
pruned_documents = []
for _label, count in zip(unique_labels, counts):
    pruned_documents.append('\n\n'.join([text for text, label in zip(split, cluster_labels) if label == _label]))

avg_length = sum([len(tokenizer.encode(t)) for t in pruned_documents]) / len(pruned_documents)
# print(f'Our pruning approach has {len(pruned_documents)} documents with average length {avg_length:.1f} tokens')
print(f'우리의 가지치기 접근 방식에는 평균 길이 {avg_length:.1f} 토큰을 가진 {len(pruned_documents)} 문서가 있습니다.')

우리의 가지치기 접근 방식에는 평균 길이 762.1 토큰을 가진 804 문서가 있습니다.


In [60]:
print(pruned_documents[1])

xml" ,options); 


14
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_PACK의 GUIDE 프로젝트에서 보기 
(/$p/PopUp/openPopup_fixPosition_$p. xml ) 
그림21-24YouTube 동영상(https://youtu. be/v65iVnWntpk) 


2. 
유틸리티 
 815
22. 1.  WebSquare. date 
표22-1설명 요약 
●날짜및시간관리용API를제공하는유틸리티.  
22. 1. 1.  양력↔음력날짜변환 
22. 1. 1. 1.  toLunar() &toSolar() 
toLunar() 및toSolar() 함수를이용하여양력↔음력날짜간변환이가능합니다.  
코드22-1사용예 1//음력 2022년 7월15일을 양력으로 변환해서 20220812를 반환.  
2
3WebSquare. date. toSolar( 20220715 ); 
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_PACK에서 보기 
(/_WebSquare. date/toLunar_toSolar_WebSquare_date. xml ) 


16
그림22-1YouTube 동영상(https://youtu. be/vt8qnfyYq4k) 
22. 1. 2.  날짜유효성 확인 
isValidDate() 함수를사용하면,유효하지않은날짜에대해얼럿을표시하고유효성확인결과를반환할수 
있습니다.


코드22-2사용예 1varoptions ={ 
2 useAlert: "false" ,//얼럿 표시 여부 
3 returnType: "json" //반환값 표시 형식 
4} 
5varresult =WebSquare. date. isValidDate(inputCalendar1, inputCalendar1. getValue(), options ); 
6console. log( "result:" ,result); 
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_PACK의 GUIDE 프로젝트에서 보기 
(/_WebSquare.

### 결과를 pinecone에 저장

In [61]:
upload_texts_to_pinecone(split, batch_size=128)
# upload_texts_to_pinecone(pruned_documents, batch_size=128) # pruned_documents의 경우 embedding max tokensize를 초과할 수 있음

1256

## 1.6 유사 문서 청킹 조회
### embedding 조회

In [62]:
query = 'How do z scores work?'

results_from_pinecone = query_from_pinecone(query, top_k=5)

for result_from_pinecone in results_from_pinecone:
    print(f"{result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")

439eaa4b375cf7f3b48e34452ea3df58	0.84	

Let's begin by learning a very  important value 
35b7c7fb9aabb0c2dad8d808138871bc	0.84	
This chart makes it very easy to pick out the ind
3292012bbf240288b4e0cde8d6197bcc	0.83	 Using the z-score and the
empirical rule, we will
0914b8d2847049fe675f39c5680ff2e2	0.81	 It is important to note that by doing this, the p
ca34cce37134ae70556b6fd541becba5	0.80	
Everything from how we obtain/sample data to how 


In [63]:
query = '그리드는 어떤 컴포넌트인가요?'

results_from_pinecone = query_from_pinecone(query, top_k=5)

for result_from_pinecone in results_from_pinecone:
    print(f"{result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")

ce09866ca69ec215f9258032fb056598	0.83	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
95f508bd40f24cddd0148fa1b5a14729	0.82	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
029d7984aa5e95c89d96cff52ee17d61	0.82	Design 뷰에서직접마우스드래그하여 컴포넌트의 위치와크기를결정합니다.  (더블클릭할경우해
9a67165ce1508b47f0be0810c24023b4	0.82	 관련컴포넌트 
●아래와같은선택컴포넌트의 경우선택대상항목을지정해야합니다. 브라우저는 개발자
b115fe5b0485485fcbae9be03655f83f	0.82	표편집 
GridView나 TableLayout과 같은표형식컴포넌트의 경우,아래의아이콘을사


### reranking을 위한 CrossEncoder 초기화

CrossEncoder는 자연어 처리(NLP) 분야에서 사용되는 모델 중 하나로, 두 개의 문장 간의 관계를 파악하고 점수를 매기는 데 사용됩니다. 초보자를 위해 쉽게 설명하자면 다음과 같습니다:

1. 두 개의 문장을 입력으로 받습니다. 예를 들어, 질문과 답변, 텍스트와 해당 텍스트의 요약 등이 될 수 있습니다.

2. CrossEncoder는 두 문장을 하나의 시퀀스로 연결하고, 이를 사전 훈련된 언어 모델(예: BERT)에 입력합니다.

3. 언어 모델은 연결된 시퀀스를 처리하고, 두 문장 간의 관계를 파악하기 위해 학습합니다. 이때, 두 문장 간의 상호작용을 고려하여 representation을 생성합니다.

4. 최종 representation은 분류기(classifier)나 회귀기(regressor)에 전달되어, 두 문장 간의 관계를 점수로 나타냅니다. 이 점수는 두 문장이 얼마나 잘 어울리는지, 또는 한 문장이 다른 문장에 대한 적절한 응답인지 등을 나타낼 수 있습니다.

CrossEncoder는 질의응답, 문서 검색, 텍스트 유사도 측정 등 다양한 NLP 작업에 활용될 수 있습니다. 두 문장을 교차하여 encoding하기 때문에 CrossEncoder라는 이름이 붙었습니다. 단, CrossEncoder는 inference 시에 모든 문장 쌍을 일일이 계산해야 하므로, 계산 비용이 높다는 단점이 있습니다.

---

아래 코드에서 사용할 CrossEncoder 모델로는 MS MARCO 데이터셋으로 fine-tuning된 모델을 추천합니다. MS MARCO는 대규모 질의응답 및 패시지 랭킹 데이터셋으로, 이 데이터셋으로 학습된 모델은 관련성 있는 문서를 찾는 작업에 적합합니다.

특히, 'sentence-transformers' 라이브러리에서 제공하는 'cross-encoder/ms-marco-MiniLM-L-6-v2' 또는 'cross-encoder/ms-marco-MiniLM-L-12-v2' 모델을 추천합니다. 이 모델들은 MiniLM 아키텍처를 기반으로 하며, MS MARCO 데이터셋으로 fine-tuning되었습니다.

1. 'cross-encoder/ms-marco-MiniLM-L-6-v2': 6개의 레이어를 가진 MiniLM 모델로, 비교적 작은 크기와 빠른 추론 속도를 가지고 있습니다.

2. 'cross-encoder/ms-marco-MiniLM-L-12-v2': 12개의 레이어를 가진 MiniLM 모델로, L-6 버전보다 더 큰 모델 크기와 더 높은 성능을 가지고 있습니다.

모델 선택 시에는 성능과 추론 속도 사이의 trade-off를 고려해야 합니다. 만약 추론 속도가 중요한 경우에는 L-6 버전을, 성능이 더 중요한 경우에는 L-12 버전을 선택할 수 있습니다.


In [104]:
"""
이 예는 의미론적 텍스트 유사성(STS)을 위한 교차 인코더를 사용하여 쿼리와 말뭉치에서 가능한 모든
문장과 의미론적 텍스트 유사성(STS)을 위한 교차 인코더를 사용하여 점수를 계산합니다.
그런 다음 주어진 쿼리에 대해 가장 유사한 문장을 출력합니다.
"""
from sentence_transformers.cross_encoder import CrossEncoder
import numpy as np
from torch import nn

# 사전 학습된 크로스 인코더
# cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1')  # 저장소에서 삭제됨
# cross_encoder = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1')
# cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')  # Claude가 추천한 모델
# cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')  # Claude가 추천한 모델
cross_encoder = CrossEncoder('bongsoo/klue-cross-encoder-v1')  # 한국어 지원 모델

from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# KLUE RoBERTa-large 모델과 토크나이저 로드
model_name = "klue/roberta-large"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Pipeline을 사용하여 CrossEncoder와 유사한 기능 구현
pipeline_encoder = pipeline("text-classification", model=model, tokenizer=tokenizer)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### embedding 조회 + reranking

아래 코드는 Pinecone 데이터베이스에서 주어진 쿼리에 대한 결과를 가져오고, 필요에 따라 CrossEncoder를 사용하여 재순위화(re-ranking)하는 함수입니다. 함수의 동작을 단계별로 설명하겠습니다.

1. `query_from_pinecone` 함수를 사용하여 Pinecone 데이터베이스에서 쿼리에 대한 상위 `top_k`개의 결과를 가져옵니다.

2. 결과가 없으면 빈 리스트를 반환합니다.

3. `verbose` 플래그가 True인 경우, 쿼리와 Pinecone에서 가져온 각 결과의 ID, 점수, 텍스트 일부를 출력합니다.

4. `re_rank` 플래그가 True인 경우, CrossEncoder를 사용하여 결과를 재순위화합니다.
   - 쿼리와 각 결과의 텍스트를 조합하여 `sentence_combinations` 리스트를 만듭니다.
   - CrossEncoder의 `predict` 메서드를 사용하여 각 조합에 대한 유사도 점수를 계산합니다.
   - 유사도 점수를 내림차순으로 정렬합니다.
   - `verbose` 플래그가 True인 경우, 재순위화된 결과의 ID, Pinecone 점수, CrossEncoder 점수, 텍스트 일부를 출력합니다.
   - 재순위화된 결과를 `final_results` 리스트에 추가합니다.

5. `re_rank` 플래그가 False인 경우, Pinecone에서 가져온 결과를 그대로 사용합니다.
   - `verbose` 플래그가 True인 경우, 각 결과의 ID, 점수, 텍스트 일부를 출력합니다.
   - 결과를 `final_results` 리스트에 추가합니다.

6. 최종적으로 `final_results` 리스트를 반환합니다.

이 함수는 Pinecone 데이터베이스에서 관련성이 높은 결과를 가져오고, 필요에 따라 CrossEncoder를 사용하여 결과의 순위를 조정합니다. 이를 통해 쿼리와 더 잘 일치하는 결과를 얻을 수 있습니다. `verbose` 플래그를 사용하여 중간 결과를 출력할 수 있으며, `re_rank` 플래그를 사용하여 재순위화 여부를 결정할 수 있습니다.

In [108]:
def get_results_from_pinecone(query, top_k=3, re_rank=False, verbose=True, cross_encoder=cross_encoder, use_transformers=False, max_length=512):

    results_from_pinecone = query_from_pinecone(query, top_k=top_k)
    if not results_from_pinecone:
        return []

    if verbose:
        print("Query:", query)
        for result_from_pinecone in results_from_pinecone:
            print(f"Pinecone Result==> {result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")
    
    
    final_results = []

    if re_rank:
        if verbose:
            print('Document ID (Hash)\t\tRetrieval Score\tCE Score\tText')

        sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]

        # print(sentence_combinations)

        # 이러한 조합에 대한 유사도 점수를 계산합니다.
        if use_transformers:
            similarity_scores = []
            for combination in sentence_combinations:
                input_text = " ".join(combination)
                score = cross_encoder(input_text[:max_length])[0]["score"]
                # score = cross_encoder(input_text)[0]["score"]
                similarity_scores.append(score)
        else:
            similarity_scores = cross_encoder.predict(sentence_combinations, activation_fct=nn.Sigmoid())

        # 점수를 내림차순으로 정렬
        sim_scores_argsort = reversed(np.argsort(similarity_scores))

        # 점수를 인쇄합니다.
        for idx in sim_scores_argsort:
            result_from_pinecone = results_from_pinecone[idx]
            final_results.append(result_from_pinecone)
            if verbose:
                print(f"Reranked Result==> {result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{similarity_scores[idx]:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")
        return final_results

    if verbose:
        print('Document ID (Hash)\t\tRetrieval Score\tText')
    for result_from_pinecone in results_from_pinecone:
        final_results.append(result_from_pinecone)
        if verbose:
            print(f"BBB: {result_from_pinecone['id']}\t{result_from_pinecone['score']:.2f}\t{result_from_pinecone['metadata']['text'][:50]}")

    return final_results

In [109]:
query = 'How do z scores work?'
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True, verbose=True, use_transformers=False)

Query: How do z scores work?
Pinecone Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	

Let's begin by learning a very  important value 
Pinecone Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.84	
This chart makes it very easy to pick out the ind
Pinecone Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	 Using the z-score and the
empirical rule, we will
Document ID (Hash)		Retrieval Score	CE Score	Text


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Reranked Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	0.62	 Using the z-score and the
empirical rule, we will
Reranked Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	0.59	

Let's begin by learning a very  important value 
Reranked Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.84	0.36	
This chart makes it very easy to pick out the ind


In [110]:
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True, verbose=True, cross_encoder=pipeline_encoder, use_transformers=True)

Query: How do z scores work?
Pinecone Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	

Let's begin by learning a very  important value 
Pinecone Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.83	
This chart makes it very easy to pick out the ind
Pinecone Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	 Using the z-score and the
empirical rule, we will
Document ID (Hash)		Retrieval Score	CE Score	Text
Reranked Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.83	0.53	
This chart makes it very easy to pick out the ind
Reranked Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	0.53	 Using the z-score and the
empirical rule, we will
Reranked Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	0.53	

Let's begin by learning a very  important value 


In [112]:
query = '그리드는 어떤 컴포넌트인가요?'
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True, verbose=True, use_transformers=False)

Query: 그리드는 어떤 컴포넌트인가요?
Pinecone Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Pinecone Result==> ce09866ca69ec215f9258032fb056598	0.83	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
Pinecone Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Document ID (Hash)		Retrieval Score	CE Score	Text


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Reranked Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	0.60	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Reranked Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	0.48	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Reranked Result==> ce09866ca69ec215f9258032fb056598	0.83	0.23	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D


In [113]:
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True, verbose=True, cross_encoder=pipeline_encoder, use_transformers=True)

Query: 그리드는 어떤 컴포넌트인가요?
Pinecone Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Pinecone Result==> ce09866ca69ec215f9258032fb056598	0.83	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
Pinecone Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Document ID (Hash)		Retrieval Score	CE Score	Text
Reranked Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	0.54	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Reranked Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	0.54	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Reranked Result==> ce09866ca69ec215f9258032fb056598	0.83	0.54	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D


In [114]:
query = 'How do z scores work?'
final_results = get_results_from_pinecone(query, top_k=10, re_rank=True, use_transformers=False)

Query: How do z scores work?
Pinecone Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	

Let's begin by learning a very  important value 
Pinecone Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.84	
This chart makes it very easy to pick out the ind
Pinecone Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	 Using the z-score and the
empirical rule, we will
Pinecone Result==> 0914b8d2847049fe675f39c5680ff2e2	0.81	 It is important to note that by doing this, the p
Pinecone Result==> ca34cce37134ae70556b6fd541becba5	0.80	
Everything from how we obtain/sample data to how 
Pinecone Result==> dd87b4adeb2aefea7c946922b5256f04	0.79	96, meaning that we can expect something between
1
Pinecone Result==> 87ddb7aacd3c77bd18906c45b7709fa2	0.79	75
# finding the percentage of people within two s
Pinecone Result==> c658051c8f0520ed7bd8cea5d086aa0c	0.79	05 (our chosen significance level), which means th
Pinecone Result==> eb06c205bb7b419d2bba0ff1929491cf	0.79	
First, we should prove that this is a binomial se

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Reranked Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	0.62	 Using the z-score and the
empirical rule, we will
Reranked Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	0.59	

Let's begin by learning a very  important value 
Reranked Result==> 0914b8d2847049fe675f39c5680ff2e2	0.81	0.48	 It is important to note that by doing this, the p
Reranked Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.84	0.36	
This chart makes it very easy to pick out the ind
Reranked Result==> ca34cce37134ae70556b6fd541becba5	0.80	0.31	
Everything from how we obtain/sample data to how 
Reranked Result==> 87ddb7aacd3c77bd18906c45b7709fa2	0.79	0.26	75
# finding the percentage of people within two s
Reranked Result==> dd87b4adeb2aefea7c946922b5256f04	0.79	0.04	96, meaning that we can expect something between
1
Reranked Result==> c658051c8f0520ed7bd8cea5d086aa0c	0.79	0.03	05 (our chosen significance level), which means th
Reranked Result==> 04947b1d3177882fd4aadceb6108656a	0.79	0.03	 For example, if we have a die and

In [115]:
final_results = get_results_from_pinecone(query, top_k=10, re_rank=True, cross_encoder=pipeline_encoder, use_transformers=True)

Query: How do z scores work?
Pinecone Result==> 439eaa4b375cf7f3b48e34452ea3df58	0.84	

Let's begin by learning a very  important value 
Pinecone Result==> 35b7c7fb9aabb0c2dad8d808138871bc	0.84	
This chart makes it very easy to pick out the ind
Pinecone Result==> 3292012bbf240288b4e0cde8d6197bcc	0.83	 Using the z-score and the
empirical rule, we will
Pinecone Result==> 0914b8d2847049fe675f39c5680ff2e2	0.81	 It is important to note that by doing this, the p
Pinecone Result==> ca34cce37134ae70556b6fd541becba5	0.80	
Everything from how we obtain/sample data to how 
Pinecone Result==> dd87b4adeb2aefea7c946922b5256f04	0.79	96, meaning that we can expect something between
1
Pinecone Result==> 87ddb7aacd3c77bd18906c45b7709fa2	0.79	75
# finding the percentage of people within two s
Pinecone Result==> c658051c8f0520ed7bd8cea5d086aa0c	0.79	05 (our chosen significance level), which means th
Pinecone Result==> eb06c205bb7b419d2bba0ff1929491cf	0.79	
First, we should prove that this is a binomial se

In [116]:
query = '그리드는 어떤 컴포넌트인가요?'
final_results = get_results_from_pinecone(query, top_k=10, re_rank=True, use_transformers=False)

Query: 그리드는 어떤 컴포넌트인가요?
Pinecone Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Pinecone Result==> ce09866ca69ec215f9258032fb056598	0.83	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
Pinecone Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Pinecone Result==> 029d7984aa5e95c89d96cff52ee17d61	0.82	Design 뷰에서직접마우스드래그하여 컴포넌트의 위치와크기를결정합니다.  (더블클릭할경우해
Pinecone Result==> 9a67165ce1508b47f0be0810c24023b4	0.82	 관련컴포넌트 
●아래와같은선택컴포넌트의 경우선택대상항목을지정해야합니다. 브라우저는 개발자
Pinecone Result==> b115fe5b0485485fcbae9be03655f83f	0.82	표편집 
GridView나 TableLayout과 같은표형식컴포넌트의 경우,아래의아이콘을사
Pinecone Result==> 643508429198e45aa6836d86e4b5f0fc	0.82	 
■모바일환경에서셀터치로목록열기/닫기 추가.  
○모바일추가.  
○여러개의GridVie
Pinecone Result==> e57f4b73ba97e453b7a243694a97097c	0.82	 
그림16-3닫기버튼 
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_P
Pinecone Result==> cf4c0fe136cf53d8f18b241289f54714	0.82	6.  그리드 대량데이터 
 원인 해결방안 ●10만셀이상의데이터를사용하는경우 ●환경에맞게적
Pine

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Reranked Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	0.60	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Reranked Result==> 4e5e8bb9c493de4dd7d7100fa3c582e6	0.82	0.59	 
 
●TextBox 컴포넌트추가.  
●(해당없음. ) 
 
 
 

30
 *Span
Reranked Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	0.48	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Reranked Result==> b115fe5b0485485fcbae9be03655f83f	0.82	0.44	표편집 
GridView나 TableLayout과 같은표형식컴포넌트의 경우,아래의아이콘을사
Reranked Result==> cf4c0fe136cf53d8f18b241289f54714	0.82	0.43	6.  그리드 대량데이터 
 원인 해결방안 ●10만셀이상의데이터를사용하는경우 ●환경에맞게적
Reranked Result==> 029d7984aa5e95c89d96cff52ee17d61	0.82	0.40	Design 뷰에서직접마우스드래그하여 컴포넌트의 위치와크기를결정합니다.  (더블클릭할경우해
Reranked Result==> e57f4b73ba97e453b7a243694a97097c	0.82	0.38	 
그림16-3닫기버튼 
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_P
Reranked Result==> ce09866ca69ec215f9258032fb056598	0.83	0.23	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
Reranked Result==> 9a67165ce1508b47f0be0810c24023b4	0.82	0.22	 관련컴포넌트 
●아래와같은선택컴포넌트의 경우선택대상항목을지정

In [117]:
final_results = get_results_from_pinecone(query, top_k=10, re_rank=True, cross_encoder=pipeline_encoder, use_transformers=True)

Query: 그리드는 어떤 컴포넌트인가요?
Pinecone Result==> 0d33226e2d5cfc3d850e6c8acfe2611c	0.83	3. 1.  권장 
모바일웹개발에권장되는컴포넌트는 아래와같습니다.  
●CheckBox ●
Pinecone Result==> ce09866ca69ec215f9258032fb056598	0.83	 
동일한ID의컴포넌트가 여러개존재할경우미리보기를 할수없습니다.  
7. 11. 4.  D
Pinecone Result==> 95f508bd40f24cddd0148fa1b5a14729	0.82	 
그림7-28컴포넌트퀵툴바 


29
GridView의 경우,컬럼을선택한후스페이스바를 누
Pinecone Result==> 029d7984aa5e95c89d96cff52ee17d61	0.82	Design 뷰에서직접마우스드래그하여 컴포넌트의 위치와크기를결정합니다.  (더블클릭할경우해
Pinecone Result==> 9a67165ce1508b47f0be0810c24023b4	0.82	 관련컴포넌트 
●아래와같은선택컴포넌트의 경우선택대상항목을지정해야합니다. 브라우저는 개발자
Pinecone Result==> b115fe5b0485485fcbae9be03655f83f	0.82	표편집 
GridView나 TableLayout과 같은표형식컴포넌트의 경우,아래의아이콘을사
Pinecone Result==> 643508429198e45aa6836d86e4b5f0fc	0.82	 
■모바일환경에서셀터치로목록열기/닫기 추가.  
○모바일추가.  
○여러개의GridVie
Pinecone Result==> e57f4b73ba97e453b7a243694a97097c	0.82	 
그림16-3닫기버튼 
예제파일 
인터넷에서 다운로드 혹은 
WEBSQUARE_DEV_P
Pinecone Result==> cf4c0fe136cf53d8f18b241289f54714	0.82	6.  그리드 대량데이터 
 원인 해결방안 ●10만셀이상의데이터를사용하는경우 ●환경에맞게적
Pine

## 1.7 파인콘 데이터삭제

In [118]:
delete_texts_from_pinecone(pruned_documents[:800])
delete_texts_from_pinecone(pruned_documents[800:])
delete_texts_from_pinecone(split[:800])
delete_texts_from_pinecone(split[800:])

{}

# 2. BoolQ 데이터셋을 이용한 성능 검증

In [119]:
from datasets import load_dataset
from evaluate import load


dataset = load_dataset("boolq")

In [120]:
dataset['validation'][0]

{'question': 'does ethanol take more energy make that produces',
 'answer': False,
 'passage': "All biomass goes through at least some of these steps: it needs to be grown, collected, dried, fermented, distilled, and burned. All of these steps require resources and an infrastructure. The total amount of energy input into the process compared to the energy released by burning the resulting ethanol fuel is known as the energy balance (or ``energy returned on energy invested''). Figures compiled in a 2007 report by National Geographic Magazine point to modest results for corn ethanol produced in the US: one unit of fossil-fuel energy is required to create 1.3 energy units from the resulting ethanol. The energy balance for sugarcane ethanol produced in Brazil is more favorable, with one unit of fossil-fuel energy required to create 8 from the ethanol. Energy balance estimates are not easily produced, thus numerous such reports have been generated that are contradictory. For instance, a sep

## 2.1 BoolQ 데이터셋의 설명(passage)을 Pinecone에 저장

In [121]:
for idx in tqdm(range(0, len(dataset['validation']), 128)):
    data_sample = dataset['validation'][idx:idx + 128]

    passages = data_sample['passage']
    # delete_texts_from_pinecone(passages)
    upload_texts_to_pinecone(passages)

100%|████████████████████████████████████████████████████████████████████████████| 26/26 [01:25<00:00,  3.29s/it]


In [122]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)


does caesar live in dawn of the planet of the apes
Query: does caesar live in dawn of the planet of the apes
Pinecone Result==> 364688074530d43de825592b1837568b	0.80	Malcolm and Caesar acknowledge their friendship, w
Pinecone Result==> e54eeaa5d9eb793c34cc0fc6a2cf8d39	0.73	In most of the film, Joe was portrayed by creature
Pinecone Result==> e1de96dda4c5cca3d8ba85fc7883a87d	0.73	Following the events of The Rise of Cobra, Duke (C
Document ID (Hash)		Retrieval Score	CE Score	Text


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Reranked Result==> 364688074530d43de825592b1837568b	0.80	0.64	Malcolm and Caesar acknowledge their friendship, w
Reranked Result==> e54eeaa5d9eb793c34cc0fc6a2cf8d39	0.73	0.12	In most of the film, Joe was portrayed by creature
Reranked Result==> e1de96dda4c5cca3d8ba85fc7883a87d	0.73	0.10	Following the events of The Rise of Cobra, Duke (C


## 2.2 질문(question)을 키로 하여 설명(passage)에 대한 hash 값을 추출하여 저장

In [123]:
q_to_hash = {data['question']: my_hash(data['passage']) for data in dataset['validation']}

q_to_hash[query]

'364688074530d43de825592b1837568b'

In [124]:
len(dataset['validation'])

3270

In [125]:
# super_glue_metric = load('super_glue', 'boolq') # 정확도만 확인합니다.

# 1000개의 유효성 검사 데이터 포인트에 대한 성능 재순위를 테스트해 보겠습니다.
# 여기서는 속도를 높이기 위해 Pinecone을 사용할 수 없습니다.
# 하지만 Pinecone으로 파이프라인의 지연 시간을 테스트하기에도 좋은 시기입니다.
val_sample = dataset['validation'][:100]

## 2.3 embedding을 이용한 데이터 조회의 정확도 검증

In [126]:
logger.setLevel(logging.CRITICAL)

predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=1, re_rank=False, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy without re-ranking: {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:59<00:00,  1.67it/s]

Accuracy without re-ranking: 0.84





## 2.4 embedding + reranking을 이용한 데이터 조회의 정확도 검증

__rerank와 그렇지 않은 경우의 시간 차이에 유의하세요.__

* text-embedding-ada-002
    * embedding만 사용하는 경우 0.85
    * cross_encoder = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1') 를 사용하는 경우 0.84
    * cross_encoder = CrossEncoder('bongsoo/klue-cross-encoder-v1') 를 사용하는 경우 0.68
* text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
    * embedding만 사용하는 경우 0.88
    * cross_encoder = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1') 를 사용하는 경우 0.86
    * cross_encoder = CrossEncoder('bongsoo/klue-cross-encoder-v1') 를 사용하는 경우 0.68
* text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)
    * embedding만 사용하는 경우  0.93
    * cross_encoder = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1') 를 사용하는 경우 0.87
    * cross_encoder = CrossEncoder('bongsoo/klue-cross-encoder-v1') 를 사용하는 경우 0.74

In [127]:
cross_encoder1 = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1')
cross_encoder2 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')  # Claude가 추천한 모델
cross_encoder3 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')  # Claude가 추천한 모델
cross_encoder4 = CrossEncoder('bongsoo/klue-cross-encoder-v1')  # 한국어 지원 모델

In [128]:
predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=3, re_rank=True, cross_encoder=cross_encoder1, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1): {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [01:22<00:00,  1.22it/s]

Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1): 0.84





In [129]:
predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=3, re_rank=True, cross_encoder=cross_encoder2, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-6-v2): {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [01:07<00:00,  1.49it/s]

Accuracy with re-ranking(ms-marco-MiniLM-L-6-v2): 0.82





In [130]:
predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=3, re_rank=True, cross_encoder=cross_encoder3, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2): {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]

Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2): 0.85





In [131]:
predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=3, re_rank=True, cross_encoder=cross_encoder4, verbose=False)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with re-ranking(bongsoo/klue-cross-encoder-v1): {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [01:38<00:00,  1.02it/s]

Accuracy with re-ranking(bongsoo/klue-cross-encoder-v1): 0.68





In [132]:
predictions = []

# Pinecone의 지연 시간이 일관되게 유지되도록 top_k를 동일하게 유지합니다.
# 그리고 유일한 큰 시간 차이는 리랭킹에서 발생합니다.
for question in tqdm(val_sample['question']):
    retrieved_hash = get_results_from_pinecone(question, top_k=3, re_rank=True, cross_encoder=pipeline_encoder, verbose=False, use_transformers=True)[0]['id']
    correct_hash = q_to_hash[question]
    predictions.append(retrieved_hash == correct_hash)
    
accuracy = sum(predictions)/len(predictions)

print(f'Accuracy with re-ranking(klue/roberta-large): {accuracy}')

100%|██████████████████████████████████████████████████████████████████████████| 100/100 [03:12<00:00,  1.92s/it]

Accuracy with re-ranking(klue/roberta-large): 0.29





## 2.5 embedding과 reranking 비교

In [133]:
# 사전 학습된 다른 크로스 인코더 시도하기
# sentence-transformers/multi-qa-mpnet-base-cos-v1
newer_cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')

In [134]:
def eval_ranking(query, cross_encoder, top_k=3):
    results_from_pinecone = query_from_pinecone(query, top_k=top_k)
    sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]
    similarity_scores = cross_encoder.predict(sentence_combinations)
    sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))
    re_ranked_final_result = results_from_pinecone[sim_scores_argsort[0]]
    return results_from_pinecone[0]['id'], re_ranked_final_result['id']


In [135]:
len(val_sample['question'])

100

In [137]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash = eval_ranking(question, newer_cross_encoder, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking: {raw_accuracy}')
        print(f'Accuracy with re-ranking: {reranked_accuracy}')

 50%|█████████████████████████████████████▌                                     | 50/100 [00:26<00:25,  1.93it/s]

Step 50
Accuracy without re-ranking: 0.88
Accuracy with re-ranking: 0.84


100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:54<00:00,  1.84it/s]

Step 100
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.85





In [138]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

print(f'Using cross-encoder: {newer_cross_encoder.config._name_or_path}')
print(f'Accuracy without re-ranking: {raw_accuracy}')
print(f'Accuracy with re-ranking: {reranked_accuracy}')

Using cross-encoder: cross-encoder/ms-marco-MiniLM-L-12-v2
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.85


# 3. CrossEncoder 파인튜닝(reranker)

In [139]:
# https://github.com/UKPLab/sentence-transformers/blob/master/examples/training/ms_marco/train_cross-encoder_scratch.py

## 3.1 학습용 데이터 정리

In [140]:
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'passage'],
        num_rows: 9427
    })
    validation: Dataset({
        features: ['question', 'answer', 'passage'],
        num_rows: 3270
    })
})

In [141]:
dataset['train'][0]

{'question': 'do iran and afghanistan speak the same language',
 'answer': True,
 'passage': 'Persian (/ˈpɜːrʒən, -ʃən/), also known by its endonym Farsi (فارسی fārsi (fɒːɾˈsiː) ( listen)), is one of the Western Iranian languages within the Indo-Iranian branch of the Indo-European language family. It is primarily spoken in Iran, Afghanistan (officially known as Dari since 1958), and Tajikistan (officially known as Tajiki since the Soviet era), and some other regions which historically were Persianate societies and considered part of Greater Iran. It is written in the Persian alphabet, a modified variant of the Arabic script, which itself evolved from the Aramaic alphabet.'}

In [142]:
dataset['train'][1]

{'question': 'do good samaritan laws protect those who help at an accident',
 'answer': True,
 'passage': "Good Samaritan laws offer legal protection to people who give reasonable assistance to those who are, or who they believe to be, injured, ill, in peril, or otherwise incapacitated. The protection is intended to reduce bystanders' hesitation to assist, for fear of being sued or prosecuted for unintentional injury or wrongful death. An example of such a law in common-law areas of Canada: a good Samaritan doctrine is a legal principle that prevents a rescuer who has voluntarily helped a victim in distress from being successfully sued for wrongdoing. Its purpose is to keep people from being reluctant to help a stranger in need for fear of legal repercussions should they make some mistake in treatment. By contrast, a duty to rescue law requires people to offer assistance and holds those who fail to do so liable."}

In [144]:
from sentence_transformers import InputExample, losses, evaluation
from torch.utils.data import DataLoader
from random import shuffle

shuffled_training_passages = dataset['train']['passage'].copy()
shuffle(shuffled_training_passages)


train_positive_samples = [
  InputExample(texts=[d['question'], d['passage']], label=1) for d in dataset['train']
]

# 부정적인 예제 추가
train_negative_samples = [
  InputExample(texts=[d['question'], shuffled_training_passages[i]], label=0) for i, d in enumerate(dataset['train'])
]

print(len(train_positive_samples))
print(len(train_negative_samples))


# 내 데이터에 과적합의 위험이 있지만 원할 수도 있습니다. 
# 충분한 입력 및 출력 유효성 검사와 결합하면 내 데이터에 과적합한 모델을 사용하여 실행 가능한 제품을 만들 수 있습니다.

9427
9427


In [145]:
train_samples = train_positive_samples + train_negative_samples
shuffle(train_samples)
print(len(train_samples))

18854


## 3.2 파인튜닝 환경 구성

In [146]:
import torch

In [147]:
device = torch.device('cpu')
# device = torch.device('cuda')  # NVIDIA GPU

model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', num_labels=1, device=device)

In [148]:
train_samples[0].__dict__

{'guid': '',
 'texts': ['is tim mcgraw in the movie country strong',
  "Country Strong (originally titled Love Don't Let Me Down) is a 2010 drama film starring Gwyneth Paltrow, Tim McGraw, Garrett Hedlund, and Leighton Meester. The film, about an emotionally unstable country music star who attempts to resurrect her career, was directed and written by American filmmaker Shana Feste. It premiered in Nashville, Tennessee on November 8, 2010, and had a wide release in the United States on January 7, 2011. This is the second film in which McGraw and Hedlund have worked together, the first being Friday Night Lights in 2004."],
 'label': 1}

In [149]:
model.predict(train_samples[0].texts, activation_fct=nn.Sigmoid())

0.99996674

In [150]:
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator, CEBinaryClassificationEvaluator
import math
import torch
from random import sample

logger.setLevel(logging.DEBUG)  # just to get some logs

num_epochs = 2

model_save_path = './fine_tuned_ir_cross_encoder'

train_samples = sample(train_samples, 1000)

# int(len(train_samples)*.8)
train_dataloader = DataLoader(train_samples[:int(len(train_samples)*.8)], shuffle=True, batch_size=32)

# 훈련 성능을 위한 평가자
evaluator = CEBinaryClassificationEvaluator.from_input_examples(train_samples[-int(len(train_samples)*.8):], name='test')

# 워밍업 단계에 대한 경험 법칙
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 워밍업을 위한 훈련 데이터의 10%
print(f"Warmup-steps: {warmup_steps}")

Warmup-steps: 5


In [151]:
len(train_samples)

1000

## 3.3 트레이닝 실행

In [152]:
# 파인튜닝 이전에 모델을 로드하고 테스트 세트에서 평가합니다.
print(evaluator(model))

0.9995924036836289


해당 코드는 모델 학습을 수행하는 부분입니다. 주요 내용을 설명하겠습니다.

1. `model.fit()` 함수를 사용하여 모델 학습을 진행합니다. 이 함수는 `sentence-transformers` 라이브러리에서 제공하는 함수로, 모델 학습에 필요한 여러 매개변수를 설정할 수 있습니다.

2. `train_dataloader` 매개변수는 학습 데이터를 배치 단위로 제공하는 DataLoader 객체입니다. 이 DataLoader는 `sentence-transformers` 라이브러리의 `InputExample` 클래스를 사용하여 생성됩니다.

3. `loss_fct` 매개변수는 학습 시 사용할 손실 함수(loss function)를 지정합니다. 여기서는 PyTorch의 `nn.CrossEntropyLoss()`를 사용하고 있습니다. 이는 다중 클래스 분류 문제에 주로 사용되는 손실 함수입니다.

4. `activation_fct` 매개변수는 모델의 출력에 적용할 활성화 함수(activation function)를 지정합니다. 여기서는 `nn.Sigmoid()`를 사용하고 있습니다. Sigmoid 함수는 출력 값을 0과 1 사이로 압축하여 확률 해석이 가능하게 합니다.

5. `evaluator` 매개변수는 학습 중 모델의 성능을 평가하기 위한 평가 객체를 지정합니다. 이 객체는 `sentence-transformers` 라이브러리에서 제공하는 평가 클래스 중 하나를 사용하거나 사용자 정의 평가 클래스를 사용할 수 있습니다.

6. `epochs` 매개변수는 학습을 반복할 에폭(epoch) 수를 지정합니다. 에폭은 전체 학습 데이터를 한 번 순회하는 것을 의미합니다.

7. `warmup_steps` 매개변수는 학습률 warm-up을 적용할 스텝 수를 지정합니다. Warm-up은 학습 초기에 학습률을 점진적으로 증가시키는 기법으로, 모델이 안정적으로 학습할 수 있도록 도와줍니다.

8. `output_path` 매개변수는 학습된 모델을 저장할 경로를 지정합니다.

9. `use_amp` 매개변수는 Automatic Mixed Precision (AMP)을 사용할지 여부를 지정합니다. AMP는 GPU 사용 시 메모리 사용량을 줄이고 학습 속도를 높이는 기법입니다. 여기서는 GPU 사용 시 `True`로, CPU 사용 시 `False`로 설정되어 있습니다.

이 코드는 지정된 hyperparameter를 사용하여 모델을 학습하고, 학습된 모델을 지정된 경로에 저장합니다. 학습 과정에서는 지정된 평가 객체를 사용하여 모델의 성능을 평가합니다.

In [153]:
model.fit(
    train_dataloader=train_dataloader,
    # loss_fct=losses.nn.CrossEntropyLoss(),
    loss_fct= nn.CrossEntropyLoss(),
    activation_fct=nn.Sigmoid(),
    evaluator=evaluator,
    epochs=num_epochs,
    warmup_steps=warmup_steps,
    output_path=model_save_path,
    # use_amp=True, # GPU 사용시
    use_amp=False,  # CPU 사용시
)

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Iteration:   0%|          | 0/25 [00:00<?, ?it/s]

Iteration:   0%|          | 0/25 [00:00<?, ?it/s]

In [154]:
# 파인튜닝된 모델을 로드하고 테스트 세트에서 평가하기
print(evaluator(model))

0.9999474432867325


## 3.4 파인튜닝 성능 확인

In [155]:
# 오픈 소스에서도 더 미세 조정된 버전을 실행하여 일치시킬 수 있을까요?
# 여기서 더 잘 작동하는지에 따라 다릅니다.

In [156]:
finetuned = CrossEncoder(model_save_path)

print(finetuned.predict(['hello', 'hi'], activation_fct=nn.Sigmoid()))
print(finetuned.predict(['hello', 'hi'], activation_fct=nn.Identity()))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

0.9975732


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

6.018771


In [157]:
# 미세 조정된 크로스 인코더 사용해보기
logger.setLevel(logging.CRITICAL)  # just to suppress some logs
from tqdm import tqdm

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash = eval_ranking(question, finetuned, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking: {raw_accuracy}')
        print(f'Accuracy with re-ranking: {reranked_accuracy}')

 50%|█████████████████████████████████████▌                                     | 50/100 [00:42<00:24,  2.03it/s]

Step 50
Accuracy without re-ranking: 0.88
Accuracy with re-ranking: 0.84


100%|██████████████████████████████████████████████████████████████████████████| 100/100 [01:08<00:00,  1.47it/s]

Step 100
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.83





In [158]:
# 재랭킹은 2번의 에포크 이후 약간 개선되었습니다.
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

print(f'Using cross-encoder: {finetuned.config._name_or_path}')
print(f'Accuracy without re-ranking: {raw_accuracy}')
print(f'Accuracy with re-ranking: {reranked_accuracy}')


Using cross-encoder: ./fine_tuned_ir_cross_encoder
Accuracy without re-ranking: 0.84
Accuracy with re-ranking: 0.83


In [159]:
# 필요시 
# pinecone.delete_index(INDEX_NAME)  # delete the index

# 4. 오픈소스 임베딩 모델

## 4.1 모델 초기화

In [160]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

docs = ["Around 9 Million people live in London", "London is known for its financial district"]

doc_emb = model.encode(docs, batch_size=32, show_progress_bar=True)

doc_emb.shape #  == ('2, 768')

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

(2, 768)

In [161]:
len(dataset['validation']['passage'])

3270

## 4.2 문서 임베딩

https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-cos-v1 참고

In [162]:
# 문서 임베딩
docs = dataset['validation']['passage']
doc_emb = model.encode(docs, batch_size=32, show_progress_bar=True)

Batches:   0%|          | 0/103 [00:00<?, ?it/s]

## 4.3 OpenAI 임베딩 검색 (성능 비교용)

In [163]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)

can i use passport card to fly domestically
Query: can i use passport card to fly domestically
Pinecone Result==> b33e191154d815b30dcd9c7124ba5b07	0.86	The U.S. Passport Card is a limited travel documen
Pinecone Result==> 057b0a3b0b56c0ff95cd898752f3bb80	0.86	The passport card is a limited travel document, va
Pinecone Result==> 21c9ab226877a30230bd1746a6fdd272	0.85	The U.S. Passport Card is the de facto national id
Document ID (Hash)		Retrieval Score	CE Score	Text
Reranked Result==> 21c9ab226877a30230bd1746a6fdd272	0.85	0.71	The U.S. Passport Card is the de facto national id
Reranked Result==> b33e191154d815b30dcd9c7124ba5b07	0.86	0.70	The U.S. Passport Card is a limited travel documen
Reranked Result==> 057b0a3b0b56c0ff95cd898752f3bb80	0.86	0.68	The passport card is a limited travel document, va


## 4.4 오픈소스 모델 임베딩 검색

In [164]:
from sentence_transformers import util
query_emb = model.encode(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

#문서와 점수 결합
doc_score_pairs = list(zip(docs, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:3]:
    print(score, doc)

0.7097598910331726 The passport card is a limited travel document, valid only for land and sea travel within North America (Canada, the United States, Mexico, the Caribbean, and Bermuda). It cannot be used for international air travel. The Department of State indicates that this is because ``designing a card format passport for wide use, including by air travelers, would inadvertently undercut the broad based international effort to strengthen civil aviation security and travel document specifications to address the post 9/11 threat environment''.
0.6742514371871948 The U.S. Passport Card is a limited travel document issued by the federal government of the United States in the size of a credit card. It may often be used as an identity card for purposes other than international travel, such as domestic air travel. Like a U.S. passport book, the passport card is only issued to U.S. citizens and U.S. nationals exclusively by the U.S. Department of State and is compliant to the standards f

## 4.5 오픈소스 임베딩 모델을 이용한 데이터 조회의 정확도 검증

In [165]:
logger.setLevel(logging.CRITICAL)  # 일부 로그만 출력

def eval_ranking_open_source(query, cross_encoder, top_k=3):
    query_emb = model.encode(query)

    #쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
    scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

    #문서와 점수 결합
    doc_score_pairs = list(zip(docs, scores))

    #점수에 따른 내림차순 정렬
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    if cross_encoder:
        sentence_combinations = [[query, doc_score_pair[0]] for doc_score_pair in doc_score_pairs]
        similarity_scores = cross_encoder.predict(sentence_combinations)
        sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))
        reranked_hash = my_hash(doc_score_pairs[sim_scores_argsort[0]][0])
    else:
        reranked_hash = None
    return retrieved_hash, reranked_hash


In [166]:
eval_ranking_open_source(query, finetuned)

('057b0a3b0b56c0ff95cd898752f3bb80', 'b33e191154d815b30dcd9c7124ba5b07')

In [167]:
len(val_sample['question'])

100

In [168]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash = eval_ranking_open_source(question, finetuned, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking: {raw_accuracy}')
        print(f'Accuracy with re-ranking: {reranked_accuracy}')


 52%|███████████████████████████████████████                                    | 52/100 [00:20<00:06,  7.46it/s]

Step 50
Accuracy without re-ranking: 0.82
Accuracy with re-ranking: 0.84


100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:31<00:00,  3.16it/s]

Step 100
Accuracy without re-ranking: 0.83
Accuracy with re-ranking: 0.83





In [169]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy = sum([p[1] for p in predictions])/len(predictions)

print(f'Using cross-encoder: {finetuned.config._name_or_path}')
print(f'Accuracy without re-ranking: {raw_accuracy}')
print(f'Accuracy with re-ranking: {reranked_accuracy}')


Using cross-encoder: ./fine_tuned_ir_cross_encoder
Accuracy without re-ranking: 0.83
Accuracy with re-ranking: 0.83


# 5. 한국어 BoolQ를 이용한 한국어 임베딩 성능 비교
## 5.1 초기화

In [170]:
from datasets import load_dataset
from evaluate import load

dataset_ko = load_dataset("skt/kobest_v1", "boolq")

Downloading readme:   0%|          | 0.00/7.20k [00:00<?, ?B/s]

Downloading data: 100%|█████████████████████████████████████████████████████| 3.70M/3.70M [00:01<00:00, 2.72MB/s]
Downloading data: 100%|█████████████████████████████████████████████████████| 1.41M/1.41M [00:00<00:00, 1.46MB/s]
Downloading data: 100%|████████████████████████████████████████████████████████| 692k/692k [00:00<00:00, 896kB/s]


Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

In [171]:
dataset_ko

DatasetDict({
    train: Dataset({
        features: ['paragraph', 'question', 'label'],
        num_rows: 3665
    })
    test: Dataset({
        features: ['paragraph', 'question', 'label'],
        num_rows: 1404
    })
    validation: Dataset({
        features: ['paragraph', 'question', 'label'],
        num_rows: 700
    })
})

In [172]:
from datasets import concatenate_datasets

combined_dataset = concatenate_datasets([dataset_ko['train'], dataset_ko['validation'], dataset_ko['test']])

In [173]:
len(combined_dataset)

5769

In [174]:
combined_dataset[0]

{'paragraph': '로마 시대의 오리엔트의 범위는 제국 내에 동부 지방은 물론 제국 외부에 있는 다른 국가에 광범위하게 쓰이는 단어였다. 그 후에 로마 제국이 분열되고 서유럽이 그들의 중심적인 세계를 형성하는 과정에서 자신들을 옥시덴트(occident), 서방이라 부르며 오리엔트는 이와 대조되는 문화를 가진 동방세계라는 뜻이 부가되어, 인도와 중국, 일본을 이루는 광범위한 지역을 지칭하는 단어가 되었다.',
 'question': '오리엔트는 인도와 중국, 일본을 이루는 광범위한 지역을 지칭하는 단어로 쓰인다.',
 'label': 1}

In [176]:
cross_encoder_1 = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1')
cross_encoder_2 = CrossEncoder('bongsoo/klue-cross-encoder-v1')  # 한국어 지원 모델
cross_encoder_3 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
model_save_path = './fine_tuned_ir_cross_encoder'
cross_encoder_4 = CrossEncoder(model_save_path)

In [177]:
def eval_ranking_4(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    results_from_pinecone = query_from_pinecone(query, top_k=top_k)
    sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    re_ranked_final_result_1 = results_from_pinecone[sim_scores_argsort_1[0]]
    
    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    re_ranked_final_result_2 = results_from_pinecone[sim_scores_argsort_2[0]]
    
    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    re_ranked_final_result_3 = results_from_pinecone[sim_scores_argsort_3[0]]
    
    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    re_ranked_final_result_4 = results_from_pinecone[sim_scores_argsort_4[0]]
    
    return results_from_pinecone[0]['id'], re_ranked_final_result_1['id'], re_ranked_final_result_2['id'], re_ranked_final_result_3['id'], re_ranked_final_result_4['id']


In [178]:
# val_sample = dataset_ko['validation'][:]
val_sample = combined_dataset[:]

In [179]:
len(val_sample)

3

In [180]:
dataset_ko['validation'][3]

{'paragraph': '가리비의 껍데기는 부채처럼 생겼으며 표면에는 골판지처럼 골이 있다. 패각의 길이는 2~2.5cm이며, 껍데기 색은 보라색, 붉은색, 노란색, 묽은 주황색 등이다. 가리비는 껍데기를 열고 닫는 힘이 아주 세고, 껍데기를 크게 벌리고 먹이를 찾아다니는데, 불가사리 같은 적을 만나면 껍데기를 열고 닫으면서 재빨리 달아난다. 껍데기를 열고 닫으면서 몸에 담아 두었던 물을 뒤로 보내며 힘차게 앞으로 나아갈 수 있다.',
 'question': '가리비는 적을 만나면 껍데기를 닫는다.',
 'label': 0}

In [181]:
from random import sample

query = sample(dataset_ko['validation']['question'], 1)[0]
print(query)

판포르테는 수녀들이 수도원에 바쳤던 음식인가요?


In [182]:
# q_to_hash = {data['question']: my_hash(data['paragraph']) for data in dataset_ko['validation']}
q_to_hash = {data['question']: my_hash(data['paragraph']) for data in combined_dataset}

q_to_hash[query]

'6836dba56b5f9aee320a2b40e38c8719'

## 5.2 text-embedding-ada-002 한국어 성능

In [183]:
ENGINE = 'text-embedding-ada-002'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [184]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    # dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [185]:
for idx in tqdm(range(0, len(dataset_ko['validation']), 128)):
    data_sample = dataset_ko['validation'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['train']), 128)):
    data_sample = dataset_ko['train'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['test']), 128)):
    data_sample = dataset_ko['test'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

100%|██████████████████████████████████████████████████████████████████████████████| 6/6 [00:21<00:00,  3.51s/it]
100%|████████████████████████████████████████████████████████████████████████████| 29/29 [01:37<00:00,  3.36s/it]
100%|████████████████████████████████████████████████████████████████████████████| 11/11 [00:33<00:00,  3.00s/it]


In [186]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


  1%|▌                                                                       | 50/5769 [01:59<3:47:07,  2.38s/it]

Step 50
Accuracy without re-ranking                            : 0.84
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.92
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.92
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44


  2%|█▏                                                                     | 100/5769 [03:22<2:37:46,  1.67s/it]

Step 100
Accuracy without re-ranking                            : 0.77
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.84
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.87
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.49
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43


  3%|█▊                                                                     | 150/5769 [04:44<2:00:17,  1.28s/it]

Step 150
Accuracy without re-ranking                            : 0.7666666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8466666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8666666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44666666666666666


  3%|██▍                                                                    | 200/5769 [06:06<3:22:05,  2.18s/it]

Step 200
Accuracy without re-ranking                            : 0.745
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.85
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.87
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.465


  4%|███                                                                    | 250/5769 [07:18<2:01:45,  1.32s/it]

Step 250
Accuracy without re-ranking                            : 0.752
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.844
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.848
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.484
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48


  5%|███▋                                                                   | 300/5769 [08:22<1:14:09,  1.23it/s]

Step 300
Accuracy without re-ranking                            : 0.7366666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8533333333333334
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8566666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.49
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.49333333333333335


  6%|████▎                                                                  | 350/5769 [09:25<1:40:15,  1.11s/it]

Step 350
Accuracy without re-ranking                            : 0.7257142857142858
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8514285714285714
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8514285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4828571428571429


  7%|████▉                                                                  | 400/5769 [10:08<1:32:18,  1.03s/it]

Step 400
Accuracy without re-ranking                            : 0.7375
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8625
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.86
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.495


  8%|█████▌                                                                 | 450/5769 [10:59<1:22:07,  1.08it/s]

Step 450
Accuracy without re-ranking                            : 0.7333333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8533333333333334
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8511111111111112
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4866666666666667


  9%|██████▏                                                                | 500/5769 [11:47<1:21:53,  1.07it/s]

Step 500
Accuracy without re-ranking                            : 0.734
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.85
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.846
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.488
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.496


 10%|██████▊                                                                | 550/5769 [12:33<1:29:18,  1.03s/it]

Step 550
Accuracy without re-ranking                            : 0.7272727272727273
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8436363636363636
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8381818181818181
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4818181818181818
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4890909090909091


 10%|███████▍                                                               | 600/5769 [13:24<2:05:14,  1.45s/it]

Step 600
Accuracy without re-ranking                            : 0.73
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8416666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8366666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.485
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.495


 11%|███████▉                                                               | 650/5769 [14:13<1:45:09,  1.23s/it]

Step 650
Accuracy without re-ranking                            : 0.7307692307692307
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8430769230769231
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8353846153846154
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4723076923076923
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4846153846153846


 12%|████████▌                                                              | 700/5769 [14:59<1:36:20,  1.14s/it]

Step 700
Accuracy without re-ranking                            : 0.7285714285714285
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8385714285714285
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8285714285714286
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4714285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4785714285714286


 13%|█████████▏                                                             | 750/5769 [15:49<1:14:36,  1.12it/s]

Step 750
Accuracy without re-ranking                            : 0.7213333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8293333333333334
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8186666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4693333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4746666666666667


 14%|█████████▊                                                             | 800/5769 [16:42<1:03:14,  1.31it/s]

Step 800
Accuracy without re-ranking                            : 0.7275
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.83375
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.82375
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47


 15%|██████████▍                                                            | 850/5769 [17:34<3:26:57,  2.52s/it]

Step 850
Accuracy without re-ranking                            : 0.7270588235294118
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8352941176470589
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8223529411764706
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46941176470588236
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46705882352941175


 16%|███████████                                                            | 900/5769 [18:19<1:30:47,  1.12s/it]

Step 900
Accuracy without re-ranking                            : 0.7322222222222222
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8366666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8222222222222222
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4711111111111111
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4711111111111111


 16%|████████████                                                             | 950/5769 [19:01<51:27,  1.56it/s]

Step 950
Accuracy without re-ranking                            : 0.7305263157894737
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8357894736842105
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8210526315789474
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4726315789473684
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4673684210526316


 17%|████████████▏                                                         | 1000/5769 [19:43<1:11:09,  1.12it/s]

Step 1000
Accuracy without re-ranking                            : 0.732
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.836
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.822
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.475
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.466


 18%|█████████████                                                           | 1050/5769 [20:21<54:42,  1.44it/s]

Step 1050
Accuracy without re-ranking                            : 0.7304761904761905
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8371428571428572
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8238095238095238
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46476190476190476


 19%|█████████████▋                                                          | 1100/5769 [20:54<55:26,  1.40it/s]

Step 1100
Accuracy without re-ranking                            : 0.730909090909091
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8381818181818181
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8254545454545454
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4727272727272727
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4618181818181818


 20%|█████████████▉                                                        | 1150/5769 [21:32<1:45:29,  1.37s/it]

Step 1150
Accuracy without re-ranking                            : 0.7269565217391304
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.837391304347826
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8252173913043478
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4765217391304348
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46608695652173915


 21%|██████████████▉                                                         | 1200/5769 [22:15<44:59,  1.69it/s]

Step 1200
Accuracy without re-ranking                            : 0.725
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8341666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8233333333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47583333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.465


 22%|███████████████▏                                                      | 1250/5769 [22:51<1:16:28,  1.02s/it]

Step 1250
Accuracy without re-ranking                            : 0.7184
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.828
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8192
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4712
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46


 23%|████████████████▏                                                       | 1300/5769 [23:26<55:19,  1.35it/s]

Step 1300
Accuracy without re-ranking                            : 0.7169230769230769
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.83
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.82
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4684615384615385
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45615384615384613


 23%|████████████████▊                                                       | 1350/5769 [24:05<44:16,  1.66it/s]

Step 1350
Accuracy without re-ranking                            : 0.7214814814814815
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8340740740740741
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8237037037037037
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4622222222222222


 24%|█████████████████▍                                                      | 1400/5769 [24:43<45:37,  1.60it/s]

Step 1400
Accuracy without re-ranking                            : 0.7207142857142858
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.835
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8242857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4735714285714286
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46714285714285714


 25%|██████████████████                                                      | 1450/5769 [25:23<47:30,  1.52it/s]

Step 1450
Accuracy without re-ranking                            : 0.7186206896551725
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8344827586206897
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8241379310344827
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47172413793103446
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46551724137931033


 26%|██████████████████▋                                                     | 1500/5769 [25:59<47:20,  1.50it/s]

Step 1500
Accuracy without re-ranking                            : 0.7173333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.834
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8246666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.468


 27%|███████████████████▎                                                    | 1550/5769 [26:36<42:42,  1.65it/s]

Step 1550
Accuracy without re-ranking                            : 0.7180645161290322
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8348387096774194
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.824516129032258
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4703225806451613
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46838709677419355


 28%|███████████████████▉                                                    | 1600/5769 [27:09<42:38,  1.63it/s]

Step 1600
Accuracy without re-ranking                            : 0.719375
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.83625
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.825625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.465625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.465


 29%|████████████████████                                                  | 1650/5769 [27:46<1:00:12,  1.14it/s]

Step 1650
Accuracy without re-ranking                            : 0.7187878787878788
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8351515151515152
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8248484848484848
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46606060606060606
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46484848484848484


 29%|█████████████████████▏                                                  | 1700/5769 [28:19<39:32,  1.72it/s]

Step 1700
Accuracy without re-ranking                            : 0.7170588235294117
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8352941176470589
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8252941176470588
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46705882352941175
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4647058823529412


 30%|█████████████████████▊                                                  | 1750/5769 [28:55<40:00,  1.67it/s]

Step 1750
Accuracy without re-ranking                            : 0.7188571428571429
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8365714285714285
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8257142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4662857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4645714285714286


 31%|██████████████████████▍                                                 | 1800/5769 [29:28<39:30,  1.67it/s]

Step 1800
Accuracy without re-ranking                            : 0.7161111111111111
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8361111111111111
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.825
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46444444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4627777777777778


 32%|██████████████████████▍                                               | 1850/5769 [30:03<1:15:10,  1.15s/it]

Step 1850
Accuracy without re-ranking                            : 0.7162162162162162
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8362162162162162
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8254054054054054
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4643243243243243
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46054054054054056


 33%|███████████████████████▋                                                | 1900/5769 [30:53<47:23,  1.36it/s]

Step 1900
Accuracy without re-ranking                            : 0.7152631578947368
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8352631578947368
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8257894736842105
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4636842105263158
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4594736842105263


 34%|███████████████████████▋                                              | 1950/5769 [31:38<1:03:31,  1.00it/s]

Step 1950
Accuracy without re-ranking                            : 0.7138461538461538
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8343589743589743
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8251282051282052
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46205128205128204
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45897435897435895


 35%|████████████████████████▉                                               | 2000/5769 [32:15<45:38,  1.38it/s]

Step 2000
Accuracy without re-ranking                            : 0.7155
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8345
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8245
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.463
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.459


 36%|█████████████████████████▌                                              | 2050/5769 [32:53<45:06,  1.37it/s]

Step 2050
Accuracy without re-ranking                            : 0.7190243902439024
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8365853658536585
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8258536585365853
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4624390243902439
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4551219512195122


 36%|██████████████████████████▏                                             | 2100/5769 [33:28<40:36,  1.51it/s]

Step 2100
Accuracy without re-ranking                            : 0.7157142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8338095238095238
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8242857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46285714285714286
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45666666666666667


 37%|██████████████████████████▊                                             | 2150/5769 [33:59<35:58,  1.68it/s]

Step 2150
Accuracy without re-ranking                            : 0.7158139534883721
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.833953488372093
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8232558139534883
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4627906976744186
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4558139534883721


 38%|███████████████████████████▍                                            | 2200/5769 [34:31<36:13,  1.64it/s]

Step 2200
Accuracy without re-ranking                            : 0.7145454545454546
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8313636363636364
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8213636363636364
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4618181818181818
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4531818181818182


 39%|████████████████████████████                                            | 2250/5769 [35:02<37:31,  1.56it/s]

Step 2250
Accuracy without re-ranking                            : 0.7142222222222222
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8311111111111111
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8208888888888889
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46044444444444443
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4528888888888889


 40%|████████████████████████████▋                                           | 2300/5769 [35:39<40:04,  1.44it/s]

Step 2300
Accuracy without re-ranking                            : 0.7139130434782609
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.831304347826087
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8204347826086956
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4608695652173913
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4543478260869565


 41%|█████████████████████████████▎                                          | 2350/5769 [36:12<37:26,  1.52it/s]

Step 2350
Accuracy without re-ranking                            : 0.7140425531914893
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8314893617021276
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8208510638297872
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4604255319148936
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45276595744680853


 42%|█████████████████████████████▉                                          | 2400/5769 [36:43<36:30,  1.54it/s]

Step 2400
Accuracy without re-ranking                            : 0.7141666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8304166666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.82
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46208333333333335
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45416666666666666


 42%|██████████████████████████████▌                                         | 2450/5769 [37:18<40:58,  1.35it/s]

Step 2450
Accuracy without re-ranking                            : 0.7126530612244898
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8293877551020408
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8183673469387756
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4595918367346939
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45346938775510204


 43%|███████████████████████████████▏                                        | 2500/5769 [37:55<36:51,  1.48it/s]

Step 2500
Accuracy without re-ranking                            : 0.7144
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8304
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8196
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4612
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4548


 44%|███████████████████████████████▊                                        | 2550/5769 [38:29<39:30,  1.36it/s]

Step 2550
Accuracy without re-ranking                            : 0.7137254901960784
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8290196078431372
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8180392156862745
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4611764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4541176470588235


 45%|████████████████████████████████▍                                       | 2600/5769 [39:03<38:06,  1.39it/s]

Step 2600
Accuracy without re-ranking                            : 0.7115384615384616
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8276923076923077
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8173076923076923
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46192307692307694
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45346153846153847


 46%|█████████████████████████████████                                       | 2650/5769 [39:34<31:05,  1.67it/s]

Step 2650
Accuracy without re-ranking                            : 0.7120754716981132
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8271698113207547
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8162264150943396
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4588679245283019
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4520754716981132


 47%|█████████████████████████████████▋                                      | 2700/5769 [40:07<30:38,  1.67it/s]

Step 2700
Accuracy without re-ranking                            : 0.7107407407407408
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8248148148148148
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8140740740740741
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.457037037037037
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45185185185185184


 48%|██████████████████████████████████▎                                     | 2750/5769 [40:43<36:20,  1.38it/s]

Step 2750
Accuracy without re-ranking                            : 0.7105454545454546
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.824
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8134545454545454
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.45854545454545453
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45236363636363636


 49%|██████████████████████████████████▉                                     | 2800/5769 [41:46<48:05,  1.03it/s]

Step 2800
Accuracy without re-ranking                            : 0.7110714285714286
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8235714285714286
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8132142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4592857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45285714285714285


 49%|███████████████████████████████████▌                                    | 2850/5769 [42:56<33:39,  1.45it/s]

Step 2850
Accuracy without re-ranking                            : 0.7119298245614035
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8245614035087719
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8147368421052632
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4610526315789474
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4557894736842105


 50%|████████████████████████████████████▏                                   | 2900/5769 [43:27<30:08,  1.59it/s]

Step 2900
Accuracy without re-ranking                            : 0.7117241379310345
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.823103448275862
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8137931034482758
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4606896551724138
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4555172413793103


 51%|████████████████████████████████████▊                                   | 2950/5769 [44:00<28:51,  1.63it/s]

Step 2950
Accuracy without re-ranking                            : 0.7105084745762712
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8213559322033899
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8122033898305084
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46135593220338983
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4552542372881356


 52%|█████████████████████████████████████▍                                  | 3000/5769 [44:32<29:56,  1.54it/s]

Step 3000
Accuracy without re-ranking                            : 0.712
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8213333333333334
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.812
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46166666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.456


 53%|██████████████████████████████████████                                  | 3050/5769 [45:03<26:41,  1.70it/s]

Step 3050
Accuracy without re-ranking                            : 0.7127868852459016
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8213114754098361
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8121311475409836
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4616393442622951
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4560655737704918


 54%|██████████████████████████████████████▋                                 | 3100/5769 [45:35<28:59,  1.53it/s]

Step 3100
Accuracy without re-ranking                            : 0.7119354838709677
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8209677419354838
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8116129032258065
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4632258064516129
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4570967741935484


 55%|███████████████████████████████████████▎                                | 3150/5769 [46:13<28:02,  1.56it/s]

Step 3150
Accuracy without re-ranking                            : 0.713015873015873
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8206349206349206
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8117460317460318
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4641269841269841
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4577777777777778


 55%|███████████████████████████████████████▉                                | 3200/5769 [46:49<26:50,  1.59it/s]

Step 3200
Accuracy without re-ranking                            : 0.7134375
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.820625
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8121875
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.464375
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.456875


 56%|████████████████████████████████████████▌                               | 3250/5769 [47:27<27:10,  1.55it/s]

Step 3250
Accuracy without re-ranking                            : 0.7132307692307692
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.820923076923077
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.812923076923077
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4655384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4572307692307692


 57%|█████████████████████████████████████████▏                              | 3300/5769 [48:00<25:42,  1.60it/s]

Step 3300
Accuracy without re-ranking                            : 0.7124242424242424
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8203030303030303
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8121212121212121
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46545454545454545
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45636363636363636


 58%|█████████████████████████████████████████▊                              | 3350/5769 [48:36<26:19,  1.53it/s]

Step 3350
Accuracy without re-ranking                            : 0.713134328358209
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8214925373134329
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8128358208955224
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4665671641791045
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4591044776119403


 59%|██████████████████████████████████████████▍                             | 3400/5769 [49:12<26:43,  1.48it/s]

Step 3400
Accuracy without re-ranking                            : 0.7111764705882353
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8202941176470588
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8117647058823529
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4647058823529412
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4591176470588235


 60%|███████████████████████████████████████████                             | 3450/5769 [49:45<31:03,  1.24it/s]

Step 3450
Accuracy without re-ranking                            : 0.7098550724637681
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8191304347826087
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8110144927536231
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4646376811594203
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45971014492753626


 61%|███████████████████████████████████████████▋                            | 3500/5769 [50:26<26:51,  1.41it/s]

Step 3500
Accuracy without re-ranking                            : 0.7077142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8188571428571428
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8105714285714286
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4637142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45885714285714285


 62%|████████████████████████████████████████████▎                           | 3550/5769 [50:59<22:37,  1.63it/s]

Step 3550
Accuracy without re-ranking                            : 0.7064788732394366
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8180281690140845
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8095774647887324
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4628169014084507
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46


 62%|████████████████████████████████████████████▉                           | 3600/5769 [51:36<24:50,  1.46it/s]

Step 3600
Accuracy without re-ranking                            : 0.7063888888888888
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8175
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8091666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46444444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4597222222222222


 63%|█████████████████████████████████████████████▌                          | 3650/5769 [52:08<22:36,  1.56it/s]

Step 3650
Accuracy without re-ranking                            : 0.7063013698630137
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8178082191780822
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8098630136986301
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46328767123287673
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4589041095890411


 64%|██████████████████████████████████████████████▏                         | 3700/5769 [52:40<23:01,  1.50it/s]

Step 3700
Accuracy without re-ranking                            : 0.7067567567567568
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8189189189189189
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8110810810810811
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46378378378378377
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4586486486486486


 65%|██████████████████████████████████████████████▊                         | 3750/5769 [53:13<20:20,  1.65it/s]

Step 3750
Accuracy without re-ranking                            : 0.7061333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8186666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8112
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4624
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4573333333333333


 66%|███████████████████████████████████████████████▍                        | 3800/5769 [53:53<20:53,  1.57it/s]

Step 3800
Accuracy without re-ranking                            : 0.7060526315789474
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8181578947368421
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8110526315789474
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4636842105263158
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4586842105263158


 67%|████████████████████████████████████████████████                        | 3850/5769 [54:25<19:36,  1.63it/s]

Step 3850
Accuracy without re-ranking                            : 0.7059740259740259
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8174025974025974
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8103896103896104
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4633766233766234
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45896103896103896


 68%|████████████████████████████████████████████████▋                       | 3900/5769 [55:00<21:43,  1.43it/s]

Step 3900
Accuracy without re-ranking                            : 0.7046153846153846
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8166666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8097435897435897
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4635897435897436
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45974358974358975


 68%|█████████████████████████████████████████████████▎                      | 3950/5769 [55:36<19:44,  1.54it/s]

Step 3950
Accuracy without re-ranking                            : 0.7048101265822785
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8162025316455697
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.809367088607595
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46379746835443036
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45974683544303796


 69%|█████████████████████████████████████████████████▉                      | 4000/5769 [56:17<23:49,  1.24it/s]

Step 4000
Accuracy without re-ranking                            : 0.704
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.81425
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8075
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46225
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4585


 70%|██████████████████████████████████████████████████▌                     | 4050/5769 [56:49<18:35,  1.54it/s]

Step 4050
Accuracy without re-ranking                            : 0.7046913580246914
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8140740740740741
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8076543209876543
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46271604938271604
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4587654320987654


 71%|███████████████████████████████████████████████████▏                    | 4100/5769 [57:28<23:19,  1.19it/s]

Step 4100
Accuracy without re-ranking                            : 0.7041463414634146
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.813170731707317
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8070731707317074
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4631707317073171
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45951219512195124


 72%|███████████████████████████████████████████████████▊                    | 4150/5769 [58:03<17:36,  1.53it/s]

Step 4150
Accuracy without re-ranking                            : 0.7050602409638554
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8139759036144578
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8081927710843374
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46433734939759036
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46072289156626506


 73%|████████████████████████████████████████████████████▍                   | 4200/5769 [58:38<18:33,  1.41it/s]

Step 4200
Accuracy without re-ranking                            : 0.7057142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.814047619047619
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8083333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46476190476190476
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4614285714285714


 74%|█████████████████████████████████████████████████████                   | 4250/5769 [59:14<14:41,  1.72it/s]

Step 4250
Accuracy without re-ranking                            : 0.7063529411764706
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8148235294117647
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8091764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4656470588235294
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4628235294117647


 75%|█████████████████████████████████████████████████████▋                  | 4300/5769 [59:49<16:04,  1.52it/s]

Step 4300
Accuracy without re-ranking                            : 0.706046511627907
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8148837209302325
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8090697674418604
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4653488372093023
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4625581395348837


 75%|████████████████████████████████████████████████████▊                 | 4350/5769 [1:00:27<18:35,  1.27it/s]

Step 4350
Accuracy without re-ranking                            : 0.705287356321839
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8140229885057472
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.807816091954023
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46436781609195404
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46229885057471265


 76%|█████████████████████████████████████████████████████▍                | 4400/5769 [1:01:09<15:16,  1.49it/s]

Step 4400
Accuracy without re-ranking                            : 0.7068181818181818
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8143181818181818
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8084090909090909
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4647727272727273
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4636363636363636


 77%|█████████████████████████████████████████████████████▉                | 4450/5769 [1:01:45<14:39,  1.50it/s]

Step 4450
Accuracy without re-ranking                            : 0.7089887640449438
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8164044943820224
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8101123595505618
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4651685393258427
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4651685393258427


 78%|██████████████████████████████████████████████████████▌               | 4500/5769 [1:02:19<15:08,  1.40it/s]

Step 4500
Accuracy without re-ranking                            : 0.71
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.816
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8097777777777778
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4662222222222222
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4653333333333333


 79%|███████████████████████████████████████████████████████▏              | 4550/5769 [1:02:55<14:20,  1.42it/s]

Step 4550
Accuracy without re-ranking                            : 0.7112087912087912
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.816923076923077
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8105494505494506
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4674725274725275
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46593406593406594


 80%|███████████████████████████████████████████████████████▊              | 4600/5769 [1:03:38<13:28,  1.45it/s]

Step 4600
Accuracy without re-ranking                            : 0.7117391304347827
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8165217391304348
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8097826086956522
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46608695652173915
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4641304347826087


 81%|████████████████████████████████████████████████████████▍             | 4650/5769 [1:04:12<13:26,  1.39it/s]

Step 4650
Accuracy without re-ranking                            : 0.712258064516129
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.816989247311828
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8103225806451613
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4664516129032258
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46408602150537637


 81%|█████████████████████████████████████████████████████████             | 4700/5769 [1:04:47<12:13,  1.46it/s]

Step 4700
Accuracy without re-ranking                            : 0.7129787234042553
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8176595744680851
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.811063829787234
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4676595744680851
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4646808510638298


 82%|█████████████████████████████████████████████████████████▋            | 4750/5769 [1:05:25<19:15,  1.13s/it]

Step 4750
Accuracy without re-ranking                            : 0.7138947368421052
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8181052631578948
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8115789473684211
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.468
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4652631578947368


 83%|██████████████████████████████████████████████████████████▏           | 4800/5769 [1:06:03<12:04,  1.34it/s]

Step 4800
Accuracy without re-ranking                            : 0.7141666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8172916666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8110416666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.466875
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46458333333333335


 84%|██████████████████████████████████████████████████████████▊           | 4850/5769 [1:06:43<09:38,  1.59it/s]

Step 4850
Accuracy without re-ranking                            : 0.7148453608247423
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8175257731958763
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.811340206185567
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4672164948453608
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4645360824742268


 85%|███████████████████████████████████████████████████████████▍          | 4900/5769 [1:07:19<10:45,  1.35it/s]

Step 4900
Accuracy without re-ranking                            : 0.7153061224489796
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8179591836734694
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8116326530612245
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4679591836734694
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4642857142857143


 86%|████████████████████████████████████████████████████████████          | 4950/5769 [1:08:00<10:16,  1.33it/s]

Step 4950
Accuracy without re-ranking                            : 0.7151515151515152
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8177777777777778
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8113131313131313
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4672727272727273
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46444444444444444


 87%|████████████████████████████████████████████████████████████▋         | 5000/5769 [1:08:35<08:53,  1.44it/s]

Step 5000
Accuracy without re-ranking                            : 0.7164
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8188
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8122
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4638


 88%|█████████████████████████████████████████████████████████████▎        | 5050/5769 [1:09:15<07:41,  1.56it/s]

Step 5050
Accuracy without re-ranking                            : 0.7164356435643564
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8182178217821782
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8118811881188119
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46752475247524755
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4641584158415842


 88%|█████████████████████████████████████████████████████████████▉        | 5100/5769 [1:10:03<08:08,  1.37it/s]

Step 5100
Accuracy without re-ranking                            : 0.7182352941176471
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8194117647058824
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8131372549019608
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46862745098039216
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46490196078431373


 89%|██████████████████████████████████████████████████████████████▍       | 5150/5769 [1:10:55<07:50,  1.32it/s]

Step 5150
Accuracy without re-ranking                            : 0.7188349514563107
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8198058252427185
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8135922330097087
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.46873786407766993
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4652427184466019


 90%|███████████████████████████████████████████████████████████████       | 5200/5769 [1:11:33<09:30,  1.00s/it]

Step 5200
Accuracy without re-ranking                            : 0.7190384615384615
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8205769230769231
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8140384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4690384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46557692307692305


 91%|███████████████████████████████████████████████████████████████▋      | 5250/5769 [1:12:07<05:29,  1.58it/s]

Step 5250
Accuracy without re-ranking                            : 0.7198095238095238
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8211428571428572
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8148571428571428
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4695238095238095
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4657142857142857


 92%|████████████████████████████████████████████████████████████████▎     | 5300/5769 [1:12:45<05:26,  1.44it/s]

Step 5300
Accuracy without re-ranking                            : 0.720754716981132
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8215094339622642
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8152830188679245
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47094339622641507
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4671698113207547


 93%|████████████████████████████████████████████████████████████████▉     | 5350/5769 [1:13:24<05:13,  1.33it/s]

Step 5350
Accuracy without re-ranking                            : 0.7220560747663551
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.822429906542056
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8162616822429907
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4708411214953271
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46654205607476634


 94%|█████████████████████████████████████████████████████████████████▌    | 5400/5769 [1:14:10<08:51,  1.44s/it]

Step 5400
Accuracy without re-ranking                            : 0.7222222222222222
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8224074074074074
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8164814814814815
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4709259259259259
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46685185185185185


 94%|██████████████████████████████████████████████████████████████████▏   | 5450/5769 [1:14:51<04:19,  1.23it/s]

Step 5450
Accuracy without re-ranking                            : 0.7234862385321101
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8231192660550458
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8172477064220184
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4722935779816514
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46825688073394495


 95%|██████████████████████████████████████████████████████████████████▋   | 5500/5769 [1:15:28<03:04,  1.46it/s]

Step 5500
Accuracy without re-ranking                            : 0.7241818181818181
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8238181818181818
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8178181818181818
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.472
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46745454545454546


 96%|███████████████████████████████████████████████████████████████████▎  | 5550/5769 [1:16:04<02:30,  1.46it/s]

Step 5550
Accuracy without re-ranking                            : 0.7237837837837838
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.823963963963964
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8176576576576576
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4718918918918919
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4673873873873874


 97%|███████████████████████████████████████████████████████████████████▉  | 5600/5769 [1:16:42<02:10,  1.30it/s]

Step 5600
Accuracy without re-ranking                            : 0.725
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.825
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.81875
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4733928571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46875


 98%|████████████████████████████████████████████████████████████████████▌ | 5650/5769 [1:17:21<01:20,  1.48it/s]

Step 5650
Accuracy without re-ranking                            : 0.7261946902654868
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8254867256637168
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8191150442477876
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47415929203539825
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4695575221238938


 99%|█████████████████████████████████████████████████████████████████████▏| 5700/5769 [1:18:02<01:01,  1.13it/s]

Step 5700
Accuracy without re-ranking                            : 0.7273684210526316
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.826140350877193
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8198245614035088
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47368421052631576
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4691228070175439


100%|█████████████████████████████████████████████████████████████████████▊| 5750/5769 [1:18:40<00:15,  1.26it/s]

Step 5750
Accuracy without re-ranking                            : 0.7269565217391304
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8262608695652174
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.82
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4737391304347826
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4693913043478261


100%|██████████████████████████████████████████████████████████████████████| 5769/5769 [1:18:54<00:00,  1.22it/s]


In [187]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-ada-002')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

text-embedding-ada-002
Accuracy without re-ranking                            : 0.7275091003640146
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8268330733229329
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8205928237129485
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4733922690240943
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46888542208355


## 5.3 text-embedding-3-small 한국어 성능

In [188]:
ENGINE = 'text-embedding-3-small'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [189]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    # dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [190]:
for idx in tqdm(range(0, len(dataset_ko['validation']), 128)):
    data_sample = dataset_ko['validation'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['train']), 128)):
    data_sample = dataset_ko['train'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['test']), 128)):
    data_sample = dataset_ko['test'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

100%|██████████████████████████████████████████████████████████████████████████████| 6/6 [00:25<00:00,  4.33s/it]
100%|████████████████████████████████████████████████████████████████████████████| 29/29 [01:47<00:00,  3.71s/it]
100%|████████████████████████████████████████████████████████████████████████████| 11/11 [00:38<00:00,  3.49s/it]


In [191]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


  1%|▌                                                                       | 50/5769 [01:14<1:05:47,  1.45it/s]

Step 50
Accuracy without re-ranking                            : 0.92
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.96
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.96
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.56


  2%|█▏                                                                     | 100/5769 [01:56<1:22:14,  1.15it/s]

Step 100
Accuracy without re-ranking                            : 0.87
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.91
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.95
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.54
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.49


  3%|█▊                                                                     | 150/5769 [02:34<1:07:31,  1.39it/s]

Step 150
Accuracy without re-ranking                            : 0.8333333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9133333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9466666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.54
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48


  3%|██▍                                                                    | 200/5769 [03:18<1:11:20,  1.30it/s]

Step 200
Accuracy without re-ranking                            : 0.825
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.905
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.935
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.54
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47


  4%|███                                                                    | 250/5769 [04:00<1:06:41,  1.38it/s]

Step 250
Accuracy without re-ranking                            : 0.832
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.924
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.54
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5


  5%|███▋                                                                   | 300/5769 [04:46<1:10:59,  1.28it/s]

Step 300
Accuracy without re-ranking                            : 0.8333333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9066666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9233333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5433333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5


  6%|████▎                                                                  | 350/5769 [05:38<1:08:51,  1.31it/s]

Step 350
Accuracy without re-ranking                            : 0.8142857142857143
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8942857142857142
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9057142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5171428571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4742857142857143


  7%|████▉                                                                  | 400/5769 [06:17<1:20:05,  1.12it/s]

Step 400
Accuracy without re-ranking                            : 0.8175
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9075
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5225
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4725


  8%|█████▌                                                                 | 450/5769 [06:56<1:11:33,  1.24it/s]

Step 450
Accuracy without re-ranking                            : 0.8088888888888889
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8888888888888888
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8977777777777778
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5133333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46444444444444444


  9%|██████▏                                                                | 500/5769 [07:40<1:09:16,  1.27it/s]

Step 500
Accuracy without re-ranking                            : 0.808
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.892
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.898
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.522
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.476


 10%|██████▉                                                                  | 550/5769 [08:36<58:46,  1.48it/s]

Step 550
Accuracy without re-ranking                            : 0.8072727272727273
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8927272727272727
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8945454545454545
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5054545454545455
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4690909090909091


 10%|███████▍                                                               | 600/5769 [09:16<1:36:25,  1.12s/it]

Step 600
Accuracy without re-ranking                            : 0.8116666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8916666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8933333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.505
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4716666666666667


 11%|███████▉                                                               | 650/5769 [09:53<1:02:31,  1.36it/s]

Step 650
Accuracy without re-ranking                            : 0.8123076923076923
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8876923076923077
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8876923076923077
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.49846153846153846
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46307692307692305


 12%|████████▊                                                                | 700/5769 [10:27<59:41,  1.42it/s]

Step 700
Accuracy without re-ranking                            : 0.8128571428571428
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8885714285714286
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8828571428571429
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46


 13%|█████████▏                                                             | 750/5769 [11:06<1:01:52,  1.35it/s]

Step 750
Accuracy without re-ranking                            : 0.8053333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8813333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8773333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.496
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45866666666666667


 14%|██████████                                                               | 800/5769 [11:44<58:18,  1.42it/s]

Step 800
Accuracy without re-ranking                            : 0.80375
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8825
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.87875
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.495
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.45375


 15%|██████████▊                                                              | 850/5769 [12:18<51:23,  1.60it/s]

Step 850
Accuracy without re-ranking                            : 0.8047058823529412
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8858823529411765
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8811764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5011764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4541176470588235


 16%|███████████                                                            | 900/5769 [13:00<1:01:31,  1.32it/s]

Step 900
Accuracy without re-ranking                            : 0.8066666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8888888888888888
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8833333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4988888888888889
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4533333333333333


 16%|███████████▋                                                           | 950/5769 [13:47<1:03:55,  1.26it/s]

Step 950
Accuracy without re-ranking                            : 0.8
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8873684210526316
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.88
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4536842105263158


 17%|████████████▍                                                           | 1000/5769 [14:23<52:03,  1.53it/s]

Step 1000
Accuracy without re-ranking                            : 0.802
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.89
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.881
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.502
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.451


 18%|████████████▋                                                         | 1050/5769 [15:21<1:20:07,  1.02s/it]

Step 1050
Accuracy without re-ranking                            : 0.8057142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8914285714285715
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8819047619047619
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5009523809523809
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4504761904761905


 19%|█████████████▋                                                          | 1100/5769 [15:58<58:02,  1.34it/s]

Step 1100
Accuracy without re-ranking                            : 0.8054545454545454
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8881818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8790909090909091
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4990909090909091
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44727272727272727


 20%|██████████████▎                                                         | 1150/5769 [16:35<55:37,  1.38it/s]

Step 1150
Accuracy without re-ranking                            : 0.8043478260869565
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8860869565217391
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8773913043478261
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4991304347826087
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44956521739130434


 21%|██████████████▉                                                         | 1200/5769 [17:25<53:42,  1.42it/s]

Step 1200
Accuracy without re-ranking                            : 0.8008333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8841666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8766666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.49333333333333335
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4475


 22%|███████████████▌                                                        | 1250/5769 [18:06<54:29,  1.38it/s]

Step 1250
Accuracy without re-ranking                            : 0.7936
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.88
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8736
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4888
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4432


 23%|████████████████▏                                                       | 1300/5769 [18:44<55:07,  1.35it/s]

Step 1300
Accuracy without re-ranking                            : 0.7938461538461539
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8792307692307693
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8707692307692307
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48307692307692307
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43846153846153846


 23%|████████████████▊                                                       | 1350/5769 [19:27<56:08,  1.31it/s]

Step 1350
Accuracy without re-ranking                            : 0.7948148148148149
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8814814814814815
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8725925925925926
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4874074074074074
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4444444444444444


 24%|████████████████▉                                                     | 1400/5769 [20:18<3:39:50,  3.02s/it]

Step 1400
Accuracy without re-ranking                            : 0.79
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.88
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.87
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48928571428571427
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44642857142857145


 25%|█████████████████▌                                                    | 1450/5769 [21:24<1:26:24,  1.20s/it]

Step 1450
Accuracy without re-ranking                            : 0.7910344827586206
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8806896551724138
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8710344827586207
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48758620689655174
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44413793103448274


 26%|██████████████████▋                                                     | 1500/5769 [22:10<52:24,  1.36it/s]

Step 1500
Accuracy without re-ranking                            : 0.7906666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8806666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8713333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48733333333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.444


 27%|███████████████████▎                                                    | 1550/5769 [22:50<50:16,  1.40it/s]

Step 1550
Accuracy without re-ranking                            : 0.7916129032258065
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8812903225806452
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8716129032258064
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4838709677419355
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44129032258064516


 28%|███████████████████▉                                                    | 1600/5769 [23:28<53:06,  1.31it/s]

Step 1600
Accuracy without re-ranking                            : 0.7925
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.880625
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.871875
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.478125
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43625


 29%|████████████████████▌                                                   | 1650/5769 [24:13<56:04,  1.22it/s]

Step 1650
Accuracy without re-ranking                            : 0.7933333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8812121212121212
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8721212121212121
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43696969696969695


 29%|█████████████████████▏                                                  | 1700/5769 [24:53<49:01,  1.38it/s]

Step 1700
Accuracy without re-ranking                            : 0.7923529411764706
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8811764705882353
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8723529411764706
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48176470588235293
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4370588235294118


 30%|█████████████████████▊                                                  | 1750/5769 [25:38<55:52,  1.20it/s]

Step 1750
Accuracy without re-ranking                            : 0.792
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8822857142857143
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.872
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48228571428571426
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.436


 31%|██████████████████████▍                                                 | 1800/5769 [26:21<46:11,  1.43it/s]

Step 1800
Accuracy without re-ranking                            : 0.79
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.88
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.87
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4777777777777778
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43444444444444447


 32%|██████████████████████▍                                               | 1850/5769 [26:58<1:05:10,  1.00it/s]

Step 1850
Accuracy without re-ranking                            : 0.7875675675675675
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8783783783783784
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8686486486486487
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47675675675675677
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43189189189189187


 33%|███████████████████████▋                                                | 1900/5769 [27:36<54:10,  1.19it/s]

Step 1900
Accuracy without re-ranking                            : 0.7873684210526316
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8768421052631579
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8678947368421053
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4768421052631579
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4305263157894737


 34%|████████████████████████▎                                               | 1950/5769 [28:14<47:13,  1.35it/s]

Step 1950
Accuracy without re-ranking                            : 0.7866666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8748717948717949
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8661538461538462
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47435897435897434
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4297435897435897


 35%|████████████████████████▎                                             | 2000/5769 [29:05<1:00:03,  1.05it/s]

Step 2000
Accuracy without re-ranking                            : 0.785
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8755
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.866
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.475
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.429


 36%|█████████████████████████▌                                              | 2050/5769 [29:44<49:53,  1.24it/s]

Step 2050
Accuracy without re-ranking                            : 0.7873170731707317
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8760975609756098
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8663414634146341
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4721951219512195
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42585365853658536


 36%|██████████████████████████▏                                             | 2100/5769 [30:27<47:18,  1.29it/s]

Step 2100
Accuracy without re-ranking                            : 0.7866666666666666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8752380952380953
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8661904761904762
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4738095238095238
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4280952380952381


 37%|██████████████████████████▊                                             | 2150/5769 [31:06<48:01,  1.26it/s]

Step 2150
Accuracy without re-ranking                            : 0.7869767441860465
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8753488372093023
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8655813953488372
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47488372093023257
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42790697674418604


 38%|██████████████████████████▋                                           | 2200/5769 [31:50<2:33:24,  2.58s/it]

Step 2200
Accuracy without re-ranking                            : 0.7872727272727272
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8745454545454545
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8654545454545455
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47363636363636363
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42727272727272725


 39%|████████████████████████████                                            | 2250/5769 [32:27<43:18,  1.35it/s]

Step 2250
Accuracy without re-ranking                            : 0.7857777777777778
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8737777777777778
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8635555555555555
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4266666666666667


 40%|███████████████████████████▉                                          | 2300/5769 [33:08<1:05:26,  1.13s/it]

Step 2300
Accuracy without re-ranking                            : 0.7856521739130434
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8743478260869565
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8634782608695653
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47347826086956524
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4260869565217391


 41%|█████████████████████████████▎                                          | 2350/5769 [33:50<43:54,  1.30it/s]

Step 2350
Accuracy without re-ranking                            : 0.7846808510638298
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8736170212765958
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8629787234042553
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4714893617021277
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4238297872340426


 42%|█████████████████████████████▉                                          | 2400/5769 [34:39<48:27,  1.16it/s]

Step 2400
Accuracy without re-ranking                            : 0.78375
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8725
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8616666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47458333333333336
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42625


 42%|██████████████████████████████▌                                         | 2450/5769 [35:15<38:42,  1.43it/s]

Step 2450
Accuracy without re-ranking                            : 0.7804081632653062
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8722448979591837
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8604081632653061
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4710204081632653
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4253061224489796


 43%|███████████████████████████████▏                                        | 2500/5769 [35:48<36:43,  1.48it/s]

Step 2500
Accuracy without re-ranking                            : 0.7816
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8728
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8612
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4712
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4244


 44%|██████████████████████████████▉                                       | 2550/5769 [37:07<3:31:30,  3.94s/it]

Step 2550
Accuracy without re-ranking                            : 0.779607843137255
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8721568627450981
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8603921568627451
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47215686274509805
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4243137254901961


 45%|████████████████████████████████▍                                       | 2600/5769 [38:37<52:16,  1.01it/s]

Step 2600
Accuracy without re-ranking                            : 0.7788461538461539
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8707692307692307
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8596153846153847
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47115384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4226923076923077


 46%|████████████████████████████████▏                                     | 2650/5769 [39:41<1:12:05,  1.39s/it]

Step 2650
Accuracy without re-ranking                            : 0.7784905660377358
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.869811320754717
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8584905660377359
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.469811320754717
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42


 47%|█████████████████████████████████▋                                      | 2700/5769 [40:35<41:02,  1.25it/s]

Step 2700
Accuracy without re-ranking                            : 0.7788888888888889
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8696296296296296
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8585185185185186
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42185185185185187


 48%|██████████████████████████████████▎                                     | 2750/5769 [41:20<40:45,  1.23it/s]

Step 2750
Accuracy without re-ranking                            : 0.7796363636363637
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8701818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.858909090909091
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.472
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.424


 49%|██████████████████████████████████▉                                     | 2800/5769 [42:09<37:52,  1.31it/s]

Step 2800
Accuracy without re-ranking                            : 0.7803571428571429
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8703571428571428
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8589285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4732142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42642857142857143


 49%|███████████████████████████████████▌                                    | 2850/5769 [42:50<37:42,  1.29it/s]

Step 2850
Accuracy without re-ranking                            : 0.7817543859649123
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8701754385964913
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8592982456140351
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4743859649122807
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4280701754385965


 50%|████████████████████████████████████▏                                   | 2900/5769 [43:31<45:10,  1.06it/s]

Step 2900
Accuracy without re-ranking                            : 0.7820689655172414
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.87
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8586206896551725
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47517241379310343
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4286206896551724


 51%|████████████████████████████████████▊                                   | 2950/5769 [44:27<45:16,  1.04it/s]

Step 2950
Accuracy without re-ranking                            : 0.7806779661016949
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8691525423728813
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8583050847457627
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47491525423728814
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.428135593220339


 52%|█████████████████████████████████████▍                                  | 3000/5769 [45:10<32:56,  1.40it/s]

Step 3000
Accuracy without re-ranking                            : 0.7803333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8686666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8583333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47533333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.429


 53%|██████████████████████████████████████                                  | 3050/5769 [45:47<32:04,  1.41it/s]

Step 3050
Accuracy without re-ranking                            : 0.781311475409836
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8688524590163934
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8583606557377049
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47508196721311474
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4278688524590164


 54%|██████████████████████████████████████▋                                 | 3100/5769 [46:29<33:59,  1.31it/s]

Step 3100
Accuracy without re-ranking                            : 0.7812903225806451
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8690322580645161
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8580645161290322
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4754838709677419
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42838709677419357


 55%|███████████████████████████████████████▎                                | 3150/5769 [47:09<32:09,  1.36it/s]

Step 3150
Accuracy without re-ranking                            : 0.780952380952381
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8688888888888889
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8577777777777778
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4746031746031746
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.42793650793650795


 55%|███████████████████████████████████████▉                                | 3200/5769 [47:48<29:47,  1.44it/s]

Step 3200
Accuracy without re-ranking                            : 0.7796875
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.86875
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.858125
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4740625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.426875


 56%|████████████████████████████████████████▌                               | 3250/5769 [48:33<35:54,  1.17it/s]

Step 3250
Accuracy without re-ranking                            : 0.7781538461538462
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8686153846153846
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8584615384615385
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47446153846153843
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4276923076923077


 57%|█████████████████████████████████████████▏                              | 3300/5769 [49:19<33:39,  1.22it/s]

Step 3300
Accuracy without re-ranking                            : 0.776969696969697
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8678787878787879
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8575757575757575
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47575757575757577
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4290909090909091


 58%|█████████████████████████████████████████▊                              | 3350/5769 [50:02<34:26,  1.17it/s]

Step 3350
Accuracy without re-ranking                            : 0.7773134328358209
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8680597014925373
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8576119402985075
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4764179104477612
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43044776119402983


 59%|██████████████████████████████████████████▍                             | 3400/5769 [50:58<52:27,  1.33s/it]

Step 3400
Accuracy without re-ranking                            : 0.7761764705882352
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8685294117647059
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8579411764705882
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4767647058823529
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4308823529411765


 60%|███████████████████████████████████████████                             | 3450/5769 [51:41<30:24,  1.27it/s]

Step 3450
Accuracy without re-ranking                            : 0.7773913043478261
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8692753623188406
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8585507246376811
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4771014492753623
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4327536231884058


 61%|███████████████████████████████████████████▋                            | 3500/5769 [52:17<24:33,  1.54it/s]

Step 3500
Accuracy without re-ranking                            : 0.7745714285714286
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.868
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8571428571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47514285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43257142857142855


 62%|████████████████████████████████████████████▎                           | 3550/5769 [52:53<24:58,  1.48it/s]

Step 3550
Accuracy without re-ranking                            : 0.7743661971830986
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8676056338028169
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.856338028169014
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47464788732394364
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4332394366197183


 62%|████████████████████████████████████████████▉                           | 3600/5769 [53:25<23:31,  1.54it/s]

Step 3600
Accuracy without re-ranking                            : 0.7752777777777777
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8680555555555556
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8566666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47555555555555556
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4336111111111111


 63%|█████████████████████████████████████████████▌                          | 3650/5769 [54:01<23:31,  1.50it/s]

Step 3650
Accuracy without re-ranking                            : 0.7747945205479452
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8687671232876713
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8575342465753425
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47506849315068495
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4347945205479452


 64%|██████████████████████████████████████████████▏                         | 3700/5769 [54:36<23:01,  1.50it/s]

Step 3700
Accuracy without re-ranking                            : 0.7754054054054054
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8691891891891892
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8581081081081081
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4756756756756757
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4362162162162162


 65%|██████████████████████████████████████████████▊                         | 3750/5769 [55:11<23:24,  1.44it/s]

Step 3750
Accuracy without re-ranking                            : 0.7749333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8690666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8584
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47573333333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.436


 66%|███████████████████████████████████████████████▍                        | 3800/5769 [55:44<21:44,  1.51it/s]

Step 3800
Accuracy without re-ranking                            : 0.776578947368421
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8697368421052631
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8592105263157894
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47710526315789475
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4363157894736842


 67%|████████████████████████████████████████████████                        | 3850/5769 [56:16<19:02,  1.68it/s]

Step 3850
Accuracy without re-ranking                            : 0.7766233766233767
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8690909090909091
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8584415584415584
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47584415584415585
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43636363636363634


 68%|████████████████████████████████████████████████▋                       | 3900/5769 [56:48<20:39,  1.51it/s]

Step 3900
Accuracy without re-ranking                            : 0.7769230769230769
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8692307692307693
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8587179487179487
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4756410256410256
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43666666666666665


 68%|█████████████████████████████████████████████████▎                      | 3950/5769 [57:22<25:51,  1.17it/s]

Step 3950
Accuracy without re-ranking                            : 0.7767088607594936
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8688607594936709
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8584810126582278
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4769620253164557
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43746835443037974


 69%|█████████████████████████████████████████████████▉                      | 4000/5769 [58:00<27:53,  1.06it/s]

Step 4000
Accuracy without re-ranking                            : 0.776
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.86825
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.858
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47675
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43675


 70%|██████████████████████████████████████████████████▌                     | 4050/5769 [58:32<16:06,  1.78it/s]

Step 4050
Accuracy without re-ranking                            : 0.7760493827160494
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8676543209876543
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8577777777777778
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4775308641975309
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4372839506172839


 71%|███████████████████████████████████████████████████▏                    | 4100/5769 [59:15<25:16,  1.10it/s]

Step 4100
Accuracy without re-ranking                            : 0.7760975609756098
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8668292682926829
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8573170731707317
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4775609756097561
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4375609756097561


 72%|███████████████████████████████████████████████████▊                    | 4150/5769 [59:50<19:52,  1.36it/s]

Step 4150
Accuracy without re-ranking                            : 0.7775903614457831
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.867710843373494
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8585542168674699
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.47879518072289157
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.43903614457831325


 73%|██████████████████████████████████████████████████▉                   | 4200/5769 [1:00:25<17:47,  1.47it/s]

Step 4200
Accuracy without re-ranking                            : 0.7778571428571428
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8680952380952381
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8590476190476191
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4788095238095238
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44047619047619047


 74%|███████████████████████████████████████████████████▌                  | 4250/5769 [1:01:09<18:17,  1.38it/s]

Step 4250
Accuracy without re-ranking                            : 0.7781176470588236
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8684705882352941
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.859764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4797647058823529
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4416470588235294


 75%|████████████████████████████████████████████████████▏                 | 4300/5769 [1:01:43<15:26,  1.59it/s]

Step 4300
Accuracy without re-ranking                            : 0.7779069767441861
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.868139534883721
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8593023255813953
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48023255813953486
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44232558139534883


 75%|████████████████████████████████████████████████████▊                 | 4350/5769 [1:02:20<16:27,  1.44it/s]

Step 4350
Accuracy without re-ranking                            : 0.7777011494252873
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8680459770114942
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8588505747126437
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44229885057471263


 76%|█████████████████████████████████████████████████████▍                | 4400/5769 [1:02:52<14:28,  1.58it/s]

Step 4400
Accuracy without re-ranking                            : 0.7784090909090909
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8681818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8590909090909091
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4797727272727273
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4431818181818182


 77%|█████████████████████████████████████████████████████▉                | 4450/5769 [1:03:27<15:02,  1.46it/s]

Step 4450
Accuracy without re-ranking                            : 0.7793258426966292
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8696629213483146
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8602247191011236
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4793258426966292
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44269662921348313


 78%|██████████████████████████████████████████████████████▌               | 4500/5769 [1:04:03<15:25,  1.37it/s]

Step 4500
Accuracy without re-ranking                            : 0.7793333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8695555555555555
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.86
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4428888888888889


 79%|███████████████████████████████████████████████████████▏              | 4550/5769 [1:04:37<13:45,  1.48it/s]

Step 4550
Accuracy without re-ranking                            : 0.78
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8701098901098901
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8606593406593407
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.481978021978022
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4441758241758242


 80%|███████████████████████████████████████████████████████▊              | 4600/5769 [1:05:14<12:09,  1.60it/s]

Step 4600
Accuracy without re-ranking                            : 0.7797826086956522
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8697826086956522
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8602173913043478
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48195652173913045
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4439130434782609


 81%|████████████████████████████████████████████████████████▍             | 4650/5769 [1:05:49<12:44,  1.46it/s]

Step 4650
Accuracy without re-ranking                            : 0.78
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8701075268817204
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8606451612903225
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48193548387096774
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.443010752688172


 81%|█████████████████████████████████████████████████████████             | 4700/5769 [1:06:29<11:59,  1.49it/s]

Step 4700
Accuracy without re-ranking                            : 0.7808510638297872
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8704255319148936
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8610638297872341
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4823404255319149
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4431914893617021


 82%|█████████████████████████████████████████████████████████▋            | 4750/5769 [1:07:03<11:57,  1.42it/s]

Step 4750
Accuracy without re-ranking                            : 0.7821052631578947
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8711578947368421
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8618947368421053
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48273684210526313
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4433684210526316


 83%|██████████████████████████████████████████████████████████▏           | 4800/5769 [1:07:35<11:23,  1.42it/s]

Step 4800
Accuracy without re-ranking                            : 0.7822916666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.870625
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8616666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48333333333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44333333333333336


 84%|██████████████████████████████████████████████████████████▊           | 4850/5769 [1:08:16<10:06,  1.51it/s]

Step 4850
Accuracy without re-ranking                            : 0.7826804123711341
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8715463917525773
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8628865979381444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48329896907216496
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44268041237113404


 85%|███████████████████████████████████████████████████████████▍          | 4900/5769 [1:09:00<21:45,  1.50s/it]

Step 4900
Accuracy without re-ranking                            : 0.7824489795918367
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8724489795918368
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8630612244897959
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.483265306122449
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44285714285714284


 86%|████████████████████████████████████████████████████████████          | 4950/5769 [1:09:42<11:10,  1.22it/s]

Step 4950
Accuracy without re-ranking                            : 0.7818181818181819
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8725252525252525
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.863030303030303
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4824242424242424
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44242424242424244


 87%|████████████████████████████████████████████████████████████▋         | 5000/5769 [1:10:16<08:32,  1.50it/s]

Step 5000
Accuracy without re-ranking                            : 0.7832
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8732
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8634
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4826
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4422


 88%|█████████████████████████████████████████████████████████████▎        | 5050/5769 [1:10:50<07:55,  1.51it/s]

Step 5050
Accuracy without re-ranking                            : 0.783960396039604
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8732673267326733
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8635643564356436
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48277227722772276
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4415841584158416


 88%|█████████████████████████████████████████████████████████████▉        | 5100/5769 [1:11:28<07:26,  1.50it/s]

Step 5100
Accuracy without re-ranking                            : 0.7843137254901961
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8735294117647059
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.863921568627451
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48274509803921567
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4415686274509804


 89%|██████████████████████████████████████████████████████████████▍       | 5150/5769 [1:12:01<06:50,  1.51it/s]

Step 5150
Accuracy without re-ranking                            : 0.7848543689320389
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8743689320388349
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8650485436893204
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48330097087378643
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4413592233009709


 90%|███████████████████████████████████████████████████████████████       | 5200/5769 [1:12:36<06:42,  1.41it/s]

Step 5200
Accuracy without re-ranking                            : 0.7846153846153846
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8748076923076923
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.865
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4828846153846154
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4411538461538462


 91%|███████████████████████████████████████████████████████████████▋      | 5250/5769 [1:13:11<05:43,  1.51it/s]

Step 5250
Accuracy without re-ranking                            : 0.7847619047619048
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8746666666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8653333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4824761904761905
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44133333333333336


 92%|████████████████████████████████████████████████████████████████▎     | 5300/5769 [1:13:51<15:48,  2.02s/it]

Step 5300
Accuracy without re-ranking                            : 0.7852830188679245
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8750943396226415
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8658490566037735
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4839622641509434
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4432075471698113


 93%|████████████████████████████████████████████████████████████████▉     | 5350/5769 [1:14:35<04:35,  1.52it/s]

Step 5350
Accuracy without re-ranking                            : 0.7859813084112149
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8751401869158878
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8657943925233644
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48411214953271026
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44299065420560746


 94%|█████████████████████████████████████████████████████████████████▌    | 5400/5769 [1:15:10<04:10,  1.47it/s]

Step 5400
Accuracy without re-ranking                            : 0.7862962962962963
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8755555555555555
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8664814814814815
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4840740740740741
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44296296296296295


 94%|██████████████████████████████████████████████████████████████████▏   | 5450/5769 [1:15:48<03:36,  1.48it/s]

Step 5450
Accuracy without re-ranking                            : 0.7871559633027523
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8759633027522936
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8669724770642202
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48495412844036695
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44330275229357796


 95%|██████████████████████████████████████████████████████████████████▋   | 5500/5769 [1:16:24<03:23,  1.32it/s]

Step 5500
Accuracy without re-ranking                            : 0.7881818181818182
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8765454545454545
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8676363636363637
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4850909090909091
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4430909090909091


 96%|███████████████████████████████████████████████████████████████████▎  | 5550/5769 [1:17:01<02:35,  1.41it/s]

Step 5550
Accuracy without re-ranking                            : 0.7884684684684685
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8765765765765766
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8675675675675676
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4845045045045045
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44288288288288286


 97%|███████████████████████████████████████████████████████████████████▉  | 5600/5769 [1:17:40<03:31,  1.25s/it]

Step 5600
Accuracy without re-ranking                            : 0.7894642857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8775
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8685714285714285
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.485
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44339285714285714


 98%|████████████████████████████████████████████████████████████████████▌ | 5650/5769 [1:18:20<01:39,  1.19it/s]

Step 5650
Accuracy without re-ranking                            : 0.7902654867256638
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8782300884955753
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8692035398230088
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48619469026548673
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44442477876106196


 99%|█████████████████████████████████████████████████████████████████████▏| 5700/5769 [1:18:54<00:50,  1.36it/s]

Step 5700
Accuracy without re-ranking                            : 0.7908771929824562
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8782456140350877
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8694736842105263
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.4856140350877193
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.443859649122807


100%|█████████████████████████████████████████████████████████████████████▊| 5750/5769 [1:19:29<00:13,  1.44it/s]

Step 5750
Accuracy without re-ranking                            : 0.790608695652174
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8780869565217392
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8693913043478261
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48591304347826086
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44417391304347825


100%|██████████████████████████████████████████████████████████████████████| 5769/5769 [1:19:42<00:00,  1.21it/s]


In [192]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-3-small')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

text-embedding-3-small
Accuracy without re-ranking                            : 0.7911249783324666
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.8784884728722482
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.8698214595250476
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.48569942797711907
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.44427110417750043


## 5.4 text-embedding-3-large 한국어 성능

In [193]:
ENGINE = 'text-embedding-3-large'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [194]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    # dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [195]:
for idx in tqdm(range(0, len(dataset_ko['validation']), 128)):
    data_sample = dataset_ko['validation'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['train']), 128)):
    data_sample = dataset_ko['train'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset_ko['test']), 128)):
    data_sample = dataset_ko['test'][idx:idx + 128]

    passages = data_sample['paragraph']
    upload_texts_to_pinecone(passages, engine=ENGINE)

100%|██████████████████████████████████████████████████████████████████████████████| 6/6 [00:36<00:00,  6.16s/it]
100%|████████████████████████████████████████████████████████████████████████████| 29/29 [02:17<00:00,  4.75s/it]
100%|████████████████████████████████████████████████████████████████████████████| 11/11 [00:49<00:00,  4.51s/it]


In [196]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


  1%|▌                                                                       | 50/5769 [01:14<2:13:36,  1.40s/it]

Step 50
Accuracy without re-ranking                            : 0.92
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.96
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.96
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.56
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5


  2%|█▏                                                                     | 100/5769 [02:04<1:31:25,  1.03it/s]

Step 100
Accuracy without re-ranking                            : 0.9
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.92
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.95
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.6
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.51


  3%|█▊                                                                     | 150/5769 [02:50<1:20:12,  1.17it/s]

Step 150
Accuracy without re-ranking                            : 0.8866666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9266666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.96
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.6
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5266666666666666


  3%|██▍                                                                    | 200/5769 [03:37<1:18:54,  1.18it/s]

Step 200
Accuracy without re-ranking                            : 0.885
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.93
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.96
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.585
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.54


  4%|███                                                                    | 250/5769 [04:23<1:27:36,  1.05it/s]

Step 250
Accuracy without re-ranking                            : 0.892
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.932
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.948
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.58
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.54


  5%|███▋                                                                   | 300/5769 [05:08<1:46:23,  1.17s/it]

Step 300
Accuracy without re-ranking                            : 0.8933333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9333333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9466666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.57
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.55


  6%|████▎                                                                  | 350/5769 [05:51<1:12:41,  1.24it/s]

Step 350
Accuracy without re-ranking                            : 0.8885714285714286
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9285714285714286
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9371428571428572
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.56
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5285714285714286


  7%|████▉                                                                  | 400/5769 [06:33<1:14:19,  1.20it/s]

Step 400
Accuracy without re-ranking                            : 0.8875
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9375
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.945
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.55
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5225


  8%|█████▌                                                                 | 450/5769 [07:17<1:15:22,  1.18it/s]

Step 450
Accuracy without re-ranking                            : 0.88
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9333333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9422222222222222
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.54
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5155555555555555


  9%|██████▏                                                                | 500/5769 [07:58<1:18:54,  1.11it/s]

Step 500
Accuracy without re-ranking                            : 0.884
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.936
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.944
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.552
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.524


 10%|██████▊                                                                | 550/5769 [08:44<1:22:20,  1.06it/s]

Step 550
Accuracy without re-ranking                            : 0.88
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9345454545454546
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9381818181818182
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5454545454545454
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5127272727272727


 10%|███████▍                                                               | 600/5769 [09:27<1:14:07,  1.16it/s]

Step 600
Accuracy without re-ranking                            : 0.8733333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9333333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9366666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5466666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5083333333333333


 11%|███████▉                                                               | 650/5769 [10:10<1:12:47,  1.17it/s]

Step 650
Accuracy without re-ranking                            : 0.8753846153846154
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9353846153846154
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9384615384615385
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5307692307692308
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4938461538461538


 12%|████████▌                                                              | 700/5769 [10:53<1:09:59,  1.21it/s]

Step 700
Accuracy without re-ranking                            : 0.8728571428571429
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9357142857142857
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9342857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5314285714285715
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.5014285714285714


 13%|█████████▏                                                             | 750/5769 [11:36<1:07:31,  1.24it/s]

Step 750
Accuracy without re-ranking                            : 0.8666666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.932
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.932
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.532
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.49866666666666665


 14%|█████████▊                                                             | 800/5769 [12:24<1:16:10,  1.09it/s]

Step 800
Accuracy without re-ranking                            : 0.87125
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.935
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.93375
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.49125


 15%|██████████▍                                                            | 850/5769 [13:05<1:07:01,  1.22it/s]

Step 850
Accuracy without re-ranking                            : 0.8717647058823529
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9341176470588235
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9329411764705883
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5270588235294118
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.49176470588235294


 16%|███████████                                                            | 900/5769 [13:47<1:04:11,  1.26it/s]

Step 900
Accuracy without re-ranking                            : 0.8766666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9377777777777778
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9355555555555556
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4955555555555556


 16%|███████████▋                                                           | 950/5769 [14:28<1:05:27,  1.23it/s]

Step 950
Accuracy without re-ranking                            : 0.8768421052631579
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9368421052631579
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9336842105263158
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5305263157894737
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4936842105263158


 17%|████████████▏                                                         | 1000/5769 [15:18<2:12:17,  1.66s/it]

Step 1000
Accuracy without re-ranking                            : 0.877
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.937
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.933
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.535
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.492


 18%|████████████▋                                                         | 1050/5769 [15:59<1:14:42,  1.05it/s]

Step 1050
Accuracy without re-ranking                            : 0.8761904761904762
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9380952380952381
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5352380952380953
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4895238095238095


 19%|█████████████▎                                                        | 1100/5769 [16:46<1:03:28,  1.23it/s]

Step 1100
Accuracy without re-ranking                            : 0.8763636363636363
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9381818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9327272727272727
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5363636363636364
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48363636363636364


 20%|█████████████▉                                                        | 1150/5769 [17:28<1:01:33,  1.25it/s]

Step 1150
Accuracy without re-ranking                            : 0.8756521739130435
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9382608695652174
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9339130434782609
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5373913043478261
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48434782608695653


 21%|██████████████▌                                                       | 1200/5769 [18:15<1:09:29,  1.10it/s]

Step 1200
Accuracy without re-ranking                            : 0.8725
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9366666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5366666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4875


 22%|███████████████▌                                                        | 1250/5769 [18:56<57:41,  1.31it/s]

Step 1250
Accuracy without re-ranking                            : 0.872
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9344
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9304
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5336
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4832


 23%|███████████████▊                                                      | 1300/5769 [19:39<1:01:19,  1.21it/s]

Step 1300
Accuracy without re-ranking                            : 0.87
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.933076923076923
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.926923076923077
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5269230769230769
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4776923076923077


 23%|████████████████▍                                                     | 1350/5769 [20:26<1:00:00,  1.23it/s]

Step 1350
Accuracy without re-ranking                            : 0.8696296296296296
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9340740740740741
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9274074074074075
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5288888888888889
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48


 24%|████████████████▉                                                     | 1400/5769 [21:10<1:03:04,  1.15it/s]

Step 1400
Accuracy without re-ranking                            : 0.8721428571428571
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.935
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9278571428571428
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.53
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4835714285714286


 25%|█████████████████▌                                                    | 1450/5769 [21:52<1:01:47,  1.16it/s]

Step 1450
Accuracy without re-ranking                            : 0.8737931034482759
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9351724137931035
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9282758620689655
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5303448275862069
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.48344827586206895


 26%|██████████████████▋                                                     | 1500/5769 [22:33<57:45,  1.23it/s]

Step 1500
Accuracy without re-ranking                            : 0.8746666666666667
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.934
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9266666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5286666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4786666666666667


 27%|██████████████████▊                                                   | 1550/5769 [23:17<1:02:30,  1.12it/s]

Step 1550
Accuracy without re-ranking                            : 0.8729032258064516
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9329032258064516
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9251612903225807
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5277419354838709
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47870967741935483


 28%|███████████████████▍                                                  | 1600/5769 [24:04<1:23:15,  1.20s/it]

Step 1600
Accuracy without re-ranking                            : 0.875
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.93375
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.92625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52625
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47625


 29%|████████████████████▌                                                   | 1650/5769 [24:49<59:52,  1.15it/s]

Step 1650
Accuracy without re-ranking                            : 0.8739393939393939
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9327272727272727
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9242424242424242
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5284848484848484
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4775757575757576


 29%|█████████████████████▏                                                  | 1700/5769 [25:31<56:59,  1.19it/s]

Step 1700
Accuracy without re-ranking                            : 0.8741176470588236
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9329411764705883
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9247058823529412
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5294117647058824
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4788235294117647


 30%|█████████████████████▏                                                | 1750/5769 [26:15<1:04:33,  1.04it/s]

Step 1750
Accuracy without re-ranking                            : 0.8731428571428571
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9331428571428572
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9234285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5291428571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47714285714285715


 31%|██████████████████████▍                                                 | 1800/5769 [27:00<55:24,  1.19it/s]

Step 1800
Accuracy without re-ranking                            : 0.8722222222222222
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9316666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9222222222222223
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.525
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.475


 32%|██████████████████████▍                                               | 1850/5769 [27:45<1:01:48,  1.06it/s]

Step 1850
Accuracy without re-ranking                            : 0.8724324324324324
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9302702702702703
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9210810810810811
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5286486486486487
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4751351351351351


 33%|███████████████████████▋                                                | 1900/5769 [28:29<54:50,  1.18it/s]

Step 1900
Accuracy without re-ranking                            : 0.8710526315789474
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9289473684210526
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.92
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5268421052631579
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47421052631578947


 34%|███████████████████████▋                                              | 1950/5769 [29:15<1:03:51,  1.00s/it]

Step 1950
Accuracy without re-ranking                            : 0.8717948717948718
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9297435897435897
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9205128205128205
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5251282051282051
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4764102564102564


 35%|████████████████████████▉                                               | 2000/5769 [29:58<49:56,  1.26it/s]

Step 2000
Accuracy without re-ranking                            : 0.87
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9295
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9195
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.524
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.475


 36%|█████████████████████████▌                                              | 2050/5769 [30:40<52:13,  1.19it/s]

Step 2050
Accuracy without re-ranking                            : 0.8702439024390244
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9292682926829269
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9185365853658537
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5219512195121951
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4717073170731707


 36%|██████████████████████████▏                                             | 2100/5769 [31:26<48:55,  1.25it/s]

Step 2100
Accuracy without re-ranking                            : 0.8690476190476191
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9280952380952381
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9185714285714286
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5214285714285715
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47333333333333333


 37%|██████████████████████████▊                                             | 2150/5769 [32:09<48:56,  1.23it/s]

Step 2150
Accuracy without re-ranking                            : 0.867906976744186
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9288372093023256
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9181395348837209
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47162790697674417


 38%|███████████████████████████▍                                            | 2200/5769 [32:52<48:12,  1.23it/s]

Step 2200
Accuracy without re-ranking                            : 0.8677272727272727
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9277272727272727
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9177272727272727
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.519090909090909
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47


 39%|████████████████████████████                                            | 2250/5769 [33:34<46:03,  1.27it/s]

Step 2250
Accuracy without re-ranking                            : 0.868
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9275555555555556
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9164444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5173333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4688888888888889


 40%|████████████████████████████▋                                           | 2300/5769 [34:15<47:45,  1.21it/s]

Step 2300
Accuracy without re-ranking                            : 0.8678260869565217
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.928695652173913
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9165217391304348
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5182608695652174
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4682608695652174


 41%|█████████████████████████████▎                                          | 2350/5769 [34:59<48:30,  1.17it/s]

Step 2350
Accuracy without re-ranking                            : 0.8685106382978723
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9280851063829787
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.916595744680851
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5161702127659574
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46638297872340423


 42%|█████████████████████████████▉                                          | 2400/5769 [35:41<46:42,  1.20it/s]

Step 2400
Accuracy without re-ranking                            : 0.8675
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9279166666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9158333333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5183333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4683333333333333


 42%|██████████████████████████████▌                                         | 2450/5769 [36:22<45:24,  1.22it/s]

Step 2450
Accuracy without re-ranking                            : 0.8665306122448979
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9277551020408163
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9146938775510204
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5155102040816326
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46775510204081633


 43%|███████████████████████████████▏                                        | 2500/5769 [37:03<44:55,  1.21it/s]

Step 2500
Accuracy without re-ranking                            : 0.8668
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.928
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9156
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5152
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4676


 44%|███████████████████████████████▊                                        | 2550/5769 [37:45<45:07,  1.19it/s]

Step 2550
Accuracy without re-ranking                            : 0.8635294117647059
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9274509803921569
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9149019607843137
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5149019607843137
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4666666666666667


 45%|████████████████████████████████▍                                       | 2600/5769 [38:26<42:36,  1.24it/s]

Step 2600
Accuracy without re-ranking                            : 0.8630769230769231
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9261538461538461
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9146153846153846
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5153846153846153
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4665384615384615


 46%|█████████████████████████████████                                       | 2650/5769 [39:07<42:37,  1.22it/s]

Step 2650
Accuracy without re-ranking                            : 0.8618867924528302
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9256603773584906
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9135849056603773
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5150943396226415
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4660377358490566


 47%|████████████████████████████████▊                                     | 2700/5769 [39:54<1:17:42,  1.52s/it]

Step 2700
Accuracy without re-ranking                            : 0.8625925925925926
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9255555555555556
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9133333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5155555555555555
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4662962962962963


 48%|██████████████████████████████████▎                                     | 2750/5769 [40:38<45:14,  1.11it/s]

Step 2750
Accuracy without re-ranking                            : 0.8636363636363636
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9261818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9138181818181819
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5163636363636364
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4672727272727273


 49%|██████████████████████████████████▉                                     | 2800/5769 [41:25<53:51,  1.09s/it]

Step 2800
Accuracy without re-ranking                            : 0.8632142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9257142857142857
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9132142857142858
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5142857142857142
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4675


 49%|███████████████████████████████████▌                                    | 2850/5769 [42:11<59:12,  1.22s/it]

Step 2850
Accuracy without re-ranking                            : 0.8635087719298246
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9256140350877193
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9133333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5157894736842106
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4673684210526316


 50%|████████████████████████████████████▏                                   | 2900/5769 [42:51<40:23,  1.18it/s]

Step 2900
Accuracy without re-ranking                            : 0.8637931034482759
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9251724137931034
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9127586206896552
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5155172413793103
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46793103448275863


 51%|████████████████████████████████████▊                                   | 2950/5769 [43:34<37:27,  1.25it/s]

Step 2950
Accuracy without re-ranking                            : 0.8616949152542372
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9247457627118644
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.912542372881356
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5169491525423728
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46983050847457625


 52%|█████████████████████████████████████▍                                  | 3000/5769 [44:16<43:50,  1.05it/s]

Step 3000
Accuracy without re-ranking                            : 0.8613333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9243333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9123333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5163333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.469


 53%|██████████████████████████████████████                                  | 3050/5769 [45:01<41:13,  1.10it/s]

Step 3050
Accuracy without re-ranking                            : 0.861639344262295
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9242622950819672
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9121311475409836
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5160655737704918
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.46852459016393444


 54%|██████████████████████████████████████▋                                 | 3100/5769 [45:44<37:14,  1.19it/s]

Step 3100
Accuracy without re-ranking                            : 0.8622580645161291
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9245161290322581
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9119354838709678
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5180645161290323
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4706451612903226


 55%|███████████████████████████████████████▎                                | 3150/5769 [46:25<35:38,  1.22it/s]

Step 3150
Accuracy without re-ranking                            : 0.8615873015873016
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9234920634920635
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9111111111111111
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5184126984126984
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4711111111111111


 55%|███████████████████████████████████████▉                                | 3200/5769 [47:08<35:43,  1.20it/s]

Step 3200
Accuracy without re-ranking                            : 0.860625
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.923125
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.91125
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.518125
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4709375


 56%|████████████████████████████████████████▌                               | 3250/5769 [47:50<34:53,  1.20it/s]

Step 3250
Accuracy without re-ranking                            : 0.86
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9227692307692308
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9110769230769231
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5175384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4704615384615385


 57%|█████████████████████████████████████████▏                              | 3300/5769 [48:33<33:57,  1.21it/s]

Step 3300
Accuracy without re-ranking                            : 0.8612121212121212
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9233333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9112121212121213
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5181818181818182
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4709090909090909


 58%|█████████████████████████████████████████▊                              | 3350/5769 [49:19<42:41,  1.06s/it]

Step 3350
Accuracy without re-ranking                            : 0.8611940298507462
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9229850746268656
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9110447761194029
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5182089552238806
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4722388059701493


 59%|██████████████████████████████████████████▍                             | 3400/5769 [50:06<41:13,  1.04s/it]

Step 3400
Accuracy without re-ranking                            : 0.8597058823529412
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9229411764705883
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9111764705882353
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5176470588235295
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4723529411764706


 60%|███████████████████████████████████████████                             | 3450/5769 [50:53<30:42,  1.26it/s]

Step 3450
Accuracy without re-ranking                            : 0.8597101449275363
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9234782608695652
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9115942028985508
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.518840579710145
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47246376811594204


 61%|███████████████████████████████████████████▋                            | 3500/5769 [51:35<35:57,  1.05it/s]

Step 3500
Accuracy without re-ranking                            : 0.8582857142857143
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9231428571428572
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9117142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5171428571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4714285714285714


 62%|████████████████████████████████████████████▎                           | 3550/5769 [52:16<32:15,  1.15it/s]

Step 3550
Accuracy without re-ranking                            : 0.8571830985915493
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9225352112676056
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9109859154929577
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5154929577464789
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4715492957746479


 62%|████████████████████████████████████████████▉                           | 3600/5769 [52:58<28:28,  1.27it/s]

Step 3600
Accuracy without re-ranking                            : 0.8575
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9230555555555555
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9119444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5169444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4727777777777778


 63%|█████████████████████████████████████████████▌                          | 3650/5769 [53:40<30:16,  1.17it/s]

Step 3650
Accuracy without re-ranking                            : 0.8575342465753425
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9235616438356165
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9123287671232877
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5158904109589041
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47342465753424656


 64%|██████████████████████████████████████████████▏                         | 3700/5769 [54:21<28:13,  1.22it/s]

Step 3700
Accuracy without re-ranking                            : 0.857027027027027
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9235135135135135
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9124324324324324
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5162162162162162
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4737837837837838


 65%|██████████████████████████████████████████████▊                         | 3750/5769 [55:03<28:02,  1.20it/s]

Step 3750
Accuracy without re-ranking                            : 0.8568
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9229333333333334
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9122666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5168
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4728


 66%|███████████████████████████████████████████████▍                        | 3800/5769 [55:46<27:09,  1.21it/s]

Step 3800
Accuracy without re-ranking                            : 0.8576315789473684
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9234210526315789
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9128947368421053
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5168421052631579
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4731578947368421


 67%|████████████████████████████████████████████████                        | 3850/5769 [56:28<27:50,  1.15it/s]

Step 3850
Accuracy without re-ranking                            : 0.8568831168831169
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9225974025974026
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.911948051948052
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5166233766233767
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47324675324675325


 68%|████████████████████████████████████████████████▋                       | 3900/5769 [57:13<31:00,  1.00it/s]

Step 3900
Accuracy without re-ranking                            : 0.8569230769230769
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9225641025641026
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9117948717948718
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5169230769230769
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4746153846153846


 68%|█████████████████████████████████████████████████▎                      | 3950/5769 [57:56<26:46,  1.13it/s]

Step 3950
Accuracy without re-ranking                            : 0.8569620253164557
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9220253164556962
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9116455696202531
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5169620253164557
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47468354430379744


 69%|█████████████████████████████████████████████████▉                      | 4000/5769 [58:39<24:50,  1.19it/s]

Step 4000
Accuracy without re-ranking                            : 0.85675
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9215
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.91125
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5165
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47375


 70%|██████████████████████████████████████████████████▌                     | 4050/5769 [59:22<24:17,  1.18it/s]

Step 4050
Accuracy without re-ranking                            : 0.8562962962962963
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9202469135802469
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9103703703703704
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5182716049382716
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4748148148148148


 71%|█████████████████████████████████████████████████▋                    | 4100/5769 [1:00:05<21:57,  1.27it/s]

Step 4100
Accuracy without re-ranking                            : 0.855609756097561
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9192682926829269
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9097560975609756
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5182926829268293
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4748780487804878


 72%|██████████████████████████████████████████████████▎                   | 4150/5769 [1:00:51<23:13,  1.16it/s]

Step 4150
Accuracy without re-ranking                            : 0.8568674698795181
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9195180722891566
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9106024096385542
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5183132530120482
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47518072289156627


 73%|██████████████████████████████████████████████████▉                   | 4200/5769 [1:01:34<22:08,  1.18it/s]

Step 4200
Accuracy without re-ranking                            : 0.8571428571428571
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9195238095238095
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9111904761904762
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5190476190476191
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4754761904761905


 74%|███████████████████████████████████████████████████▌                  | 4250/5769 [1:02:19<29:36,  1.17s/it]

Step 4250
Accuracy without re-ranking                            : 0.8574117647058823
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9197647058823529
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9112941176470588
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5195294117647059
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47623529411764703


 75%|████████████████████████████████████████████████████▏                 | 4300/5769 [1:03:04<20:13,  1.21it/s]

Step 4300
Accuracy without re-ranking                            : 0.8574418604651163
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.92
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9113953488372093
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5195348837209303
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47674418604651164


 75%|████████████████████████████████████████████████████▊                 | 4350/5769 [1:03:50<20:25,  1.16it/s]

Step 4350
Accuracy without re-ranking                            : 0.8574712643678161
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9197701149425287
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9110344827586206
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5197701149425288
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4763218390804598


 76%|█████████████████████████████████████████████████████▍                | 4400/5769 [1:04:33<21:08,  1.08it/s]

Step 4400
Accuracy without re-ranking                            : 0.8581818181818182
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9197727272727273
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9111363636363636
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5195454545454545
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4756818181818182


 77%|█████████████████████████████████████████████████████▉                | 4450/5769 [1:05:17<18:40,  1.18it/s]

Step 4450
Accuracy without re-ranking                            : 0.8584269662921349
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204494382022472
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9114606741573034
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5179775280898876
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47393258426966295


 78%|██████████████████████████████████████████████████████▌               | 4500/5769 [1:06:01<17:16,  1.22it/s]

Step 4500
Accuracy without re-ranking                            : 0.8577777777777778
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204444444444444
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9115555555555556
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5188888888888888
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.474


 79%|███████████████████████████████████████████████████████▏              | 4550/5769 [1:06:43<17:03,  1.19it/s]

Step 4550
Accuracy without re-ranking                            : 0.8578021978021978
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9206593406593406
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9118681318681319
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5215384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47494505494505496


 80%|███████████████████████████████████████████████████████▊              | 4600/5769 [1:07:28<16:33,  1.18it/s]

Step 4600
Accuracy without re-ranking                            : 0.8578260869565217
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9206521739130434
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9117391304347826
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5223913043478261
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4752173913043478


 81%|████████████████████████████████████████████████████████▍             | 4650/5769 [1:08:10<15:37,  1.19it/s]

Step 4650
Accuracy without re-ranking                            : 0.8580645161290322
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204301075268817
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9118279569892473
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5217204301075269
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47419354838709676


 81%|█████████████████████████████████████████████████████████             | 4700/5769 [1:08:52<15:09,  1.18it/s]

Step 4700
Accuracy without re-ranking                            : 0.8580851063829787
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204255319148936
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9119148936170213
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5223404255319148
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47425531914893615


 82%|█████████████████████████████████████████████████████████▋            | 4750/5769 [1:09:37<15:11,  1.12it/s]

Step 4750
Accuracy without re-ranking                            : 0.8585263157894737
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9206315789473685
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9122105263157895
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5221052631578947
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4747368421052632


 83%|██████████████████████████████████████████████████████████▏           | 4800/5769 [1:10:22<13:34,  1.19it/s]

Step 4800
Accuracy without re-ranking                            : 0.858125
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204166666666667
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9120833333333334
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5216666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4741666666666667


 84%|██████████████████████████████████████████████████████████▊           | 4850/5769 [1:11:04<13:25,  1.14it/s]

Step 4850
Accuracy without re-ranking                            : 0.8579381443298969
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9202061855670103
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9119587628865979
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5212371134020618
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4734020618556701


 85%|███████████████████████████████████████████████████████████▍          | 4900/5769 [1:11:47<12:38,  1.15it/s]

Step 4900
Accuracy without re-ranking                            : 0.8581632653061224
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9202040816326531
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9114285714285715
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5212244897959184
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4736734693877551


 86%|████████████████████████████████████████████████████████████          | 4950/5769 [1:12:29<11:18,  1.21it/s]

Step 4950
Accuracy without re-ranking                            : 0.8573737373737373
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9202020202020202
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9113131313131313
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5202020202020202
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47353535353535353


 87%|████████████████████████████████████████████████████████████▋         | 5000/5769 [1:13:20<11:04,  1.16it/s]

Step 5000
Accuracy without re-ranking                            : 0.858
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9204
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9112
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.52
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.473


 88%|█████████████████████████████████████████████████████████████▎        | 5050/5769 [1:14:04<09:52,  1.21it/s]

Step 5050
Accuracy without re-ranking                            : 0.8584158415841584
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9203960396039604
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9114851485148515
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5205940594059406
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47386138613861384


 88%|█████████████████████████████████████████████████████████████▉        | 5100/5769 [1:14:50<12:00,  1.08s/it]

Step 5100
Accuracy without re-ranking                            : 0.8586274509803922
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9205882352941176
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9117647058823529
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5205882352941177
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47352941176470587


 89%|██████████████████████████████████████████████████████████████▍       | 5150/5769 [1:15:44<09:30,  1.08it/s]

Step 5150
Accuracy without re-ranking                            : 0.8586407766990292
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9207766990291262
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9122330097087379
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5207766990291263
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47436893203883496


 90%|███████████████████████████████████████████████████████████████       | 5200/5769 [1:16:32<07:59,  1.19it/s]

Step 5200
Accuracy without re-ranking                            : 0.8588461538461538
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9211538461538461
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.911923076923077
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5209615384615385
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47384615384615386


 91%|███████████████████████████████████████████████████████████████▋      | 5250/5769 [1:17:16<07:47,  1.11it/s]

Step 5250
Accuracy without re-ranking                            : 0.8590476190476191
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.920952380952381
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9121904761904762
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5211428571428571
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47295238095238096


 92%|████████████████████████████████████████████████████████████████▎     | 5300/5769 [1:18:00<06:23,  1.22it/s]

Step 5300
Accuracy without re-ranking                            : 0.8594339622641509
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9213207547169812
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9126415094339623
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5220754716981132
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47358490566037736


 93%|████████████████████████████████████████████████████████████████▉     | 5350/5769 [1:18:45<06:03,  1.15it/s]

Step 5350
Accuracy without re-ranking                            : 0.86
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9220560747663551
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9134579439252336
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5218691588785047
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4730841121495327


 94%|█████████████████████████████████████████████████████████████████▌    | 5400/5769 [1:19:33<05:45,  1.07it/s]

Step 5400
Accuracy without re-ranking                            : 0.8596296296296296
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.922037037037037
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9137037037037037
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5218518518518519
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4735185185185185


 94%|██████████████████████████████████████████████████████████████████▏   | 5450/5769 [1:20:19<06:01,  1.13s/it]

Step 5450
Accuracy without re-ranking                            : 0.8598165137614678
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9222018348623853
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9139449541284403
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5227522935779817
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47394495412844034


 95%|██████████████████████████████████████████████████████████████████▋   | 5500/5769 [1:21:05<03:44,  1.20it/s]

Step 5500
Accuracy without re-ranking                            : 0.8605454545454545
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9227272727272727
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9145454545454546
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.523090909090909
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4743636363636364


 96%|███████████████████████████████████████████████████████████████████▎  | 5550/5769 [1:21:52<03:07,  1.17it/s]

Step 5550
Accuracy without re-ranking                            : 0.8605405405405405
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9227027027027027
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9144144144144144
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.523063063063063
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4751351351351351


 97%|███████████████████████████████████████████████████████████████████▉  | 5600/5769 [1:22:35<02:17,  1.23it/s]

Step 5600
Accuracy without re-ranking                            : 0.8608928571428571
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9233928571428571
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9151785714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5239285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.4757142857142857


 98%|████████████████████████████████████████████████████████████████████▌ | 5650/5769 [1:23:18<01:43,  1.16it/s]

Step 5650
Accuracy without re-ranking                            : 0.8617699115044247
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9238938053097345
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9155752212389381
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5249557522123893
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47646017699115045


 99%|█████████████████████████████████████████████████████████████████████▏| 5700/5769 [1:24:00<00:56,  1.21it/s]

Step 5700
Accuracy without re-ranking                            : 0.8621052631578947
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9240350877192982
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9159649122807018
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5247368421052632
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47649122807017547


100%|█████████████████████████████████████████████████████████████████████▊| 5750/5769 [1:24:42<00:16,  1.18it/s]

Step 5750
Accuracy without re-ranking                            : 0.8620869565217392
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9243478260869565
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9163478260869565
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5259130434782608
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47756521739130436


100%|██████████████████████████████████████████████████████████████████████| 5769/5769 [1:24:58<00:00,  1.13it/s]


In [197]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-3-large')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

text-embedding-3-large
Accuracy without re-ranking                            : 0.8623678280464552
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.9245969838793552
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.9166233315999307
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.5260877101750737
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.47737909516380655


## 5.5 오픈소스 임베딩 한국어 성능 (multi-qa-mpnet-base-cos-v1)

In [198]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

In [199]:
validation_paragraphs = dataset_ko['validation'].remove_columns(['question', 'label'])
train_paragraphs = dataset_ko['train'].remove_columns(['question', 'label'])
test_paragraphs = dataset_ko['test'].remove_columns(['question', 'label'])
combined_paragraphs = concatenate_datasets([validation_paragraphs, train_paragraphs, test_paragraphs])


In [200]:
# 문서 임베딩
# docs_ko = dataset_ko['validation']['paragraph']
docs_ko = combined_paragraphs['paragraph']
doc_ko_emb = model.encode(docs_ko, batch_size=32, show_progress_bar=True)

Batches:   0%|          | 0/181 [00:00<?, ?it/s]

In [201]:
from random import sample

query = sample(dataset_ko['validation']['question'], 1)[0]
print(query)
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)

봉룡은 숙정을 죽였는가?
Query: 봉룡은 숙정을 죽였는가?
Pinecone Result==> af810b9c9f9716f84c5592fcfec5d112	0.53	구한말, 통영 안뒤산 기슭 간창골에 김봉제 형제가 살았다. 김봉제는 관약국을 경영하며 부를
Pinecone Result==> f348d9af9988690a984fca69f83f3b13	0.46	이후 이순신은 붙잡힌 임준영 대신, 준사의 도움으로 풀려난 수봉에게 전갈을 받고 왜군의 전
Pinecone Result==> c52688fa9fe06e2699f18f2bf0feea29	0.45	숙종은 궁궐 근처에 살았던 길고양이를 불쌍히 여겨 자신의 궐로 데려와 금손이라는 이름을 붙
Document ID (Hash)		Retrieval Score	CE Score	Text
Reranked Result==> af810b9c9f9716f84c5592fcfec5d112	0.53	0.64	구한말, 통영 안뒤산 기슭 간창골에 김봉제 형제가 살았다. 김봉제는 관약국을 경영하며 부를
Reranked Result==> f348d9af9988690a984fca69f83f3b13	0.46	0.02	이후 이순신은 붙잡힌 임준영 대신, 준사의 도움으로 풀려난 수봉에게 전갈을 받고 왜군의 전
Reranked Result==> c52688fa9fe06e2699f18f2bf0feea29	0.45	0.02	숙종은 궁궐 근처에 살았던 길고양이를 불쌍히 여겨 자신의 궐로 데려와 금손이라는 이름을 붙


In [202]:
from sentence_transformers import util
query_emb = model.encode(query)
print(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_ko_emb)[0].cpu().tolist()

#문서와 점수 결합
doc_score_pairs = list(zip(docs_ko, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:30]:
    print(score, doc)

봉룡은 숙정을 죽였는가?
0.782538890838623 슬링백은 여성화의 일종으로, 발목 뒤에서 끈이 가로질러 받쳐주는 신발류를 말한다. 앵클 스트랩은 발목을 끈이 완전히 감싸주지만, 슬링백은 뒤에서만 받쳐준다.
0.7788514494895935 또한, 러시아에서는 스토브를 이용하여 집을 데우기도 한다. 러시아 스토브는 음식을 골고루 익힐 수 있어 음식의 질을 향상시킨다. 그러나 이는 이제 더 이상 사용되지 않고 있으며 시골 지역에서도 찾아볼 수 없다.
0.7784056663513184 베어링은 축을 정확하고 매끄럽게 회전시키기 위해서 사용된다. 마찰에 의한 에너지 손실이나 발열을 줄여, 부품의 손상을 감소시킨다. 수송 기계부터 시작해서 각종 기계에 널리 사용되며, 회전하는 부분이 있는 기계장치에는 반드시 존재한다. 
0.776402473449707 슬(瑟)은 중국 고대 악기의 하나로 금과 더불어 아악에서 쓰이는 발현 악기이다. 한국에도 전래되어 사용되었다.
0.7732049226760864 낙뢰(落雷) 또는 벼락은 번개와 천둥을 동반하는 급격한 방전현상이다. 일반적으로 강한 소나기를 내리며, 우박을 동반하는 경우도 있다. 낙뢰는 주로 적란운 안에서 발생한다.
0.7711448073387146 베트남 요리는 기본적으로 지역에 따라서 세 가지로 분류될 수 있다. 북부 지방의 요리는 중부나 남부 지방에 비해서 준비가 간단하다. 프라이팬에 볶은 요리가 많고 간장을 많이 쓴다. 북부 지방의 요리는 중국의 영향을 많이 받았다. 
0.7707719802856445 서인 세력은 정원군의 장남 능양군을 받들어 ‘반정’을 단행하고 궁궐을 장악했다. 이들은 광해군을 인목왕후 앞으로 끌고 가 정죄한 뒤 유배시켰다. 인목왕후는 광해군과 폐세자에 대한 처형을 주장하였으나, 인조와 반정 세력은 반정의 명분인 폐모살제(廢母殺弟) 때문에 이를 받아들이지 않고 유배를 보내는 선에서 반정을 마무리 짓게 된다. 곧 반정군에게 이이첨, 정인홍, 김개시 등은 죽고, 40여 명의 관리가 구금된다.

In [203]:
logger.setLevel(logging.CRITICAL)  # 일부 로그만 출력

def eval_ranking_4_open_source(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    query_emb = model.encode(query)

    #쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
    scores = util.dot_score(query_emb, doc_ko_emb)[0].cpu().tolist()

    #문서와 점수 결합
    doc_score_pairs = list(zip(docs_ko, scores))

    #점수에 따른 내림차순 정렬
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    sentence_combinations = [[query, doc_score_pair[0]] for doc_score_pair in doc_score_pairs]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    reranked_hash_1 = my_hash(doc_score_pairs[sim_scores_argsort_1[0]][0])

    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    reranked_hash_2 = my_hash(doc_score_pairs[sim_scores_argsort_2[0]][0])

    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    reranked_hash_3 = my_hash(doc_score_pairs[sim_scores_argsort_3[0]][0])

    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    reranked_hash_4 = my_hash(doc_score_pairs[sim_scores_argsort_4[0]][0])

    return retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4


In [None]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4_open_source(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


  1%|▋                                                                       | 51/5769 [02:18<3:17:13,  2.07s/it]

Step 50
Accuracy without re-ranking                            : 0.04
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.1
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.1
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.06


  2%|█▏                                                                     | 101/5769 [03:27<1:25:25,  1.11it/s]

Step 100
Accuracy without re-ranking                            : 0.05
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.1
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.1
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


  3%|█▉                                                                       | 150/5769 [03:46<25:06,  3.73it/s]

Step 150
Accuracy without re-ranking                            : 0.06
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.1
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.1
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.08666666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05333333333333334


  3%|██▌                                                                      | 201/5769 [04:08<11:58,  7.75it/s]

Step 200
Accuracy without re-ranking                            : 0.05
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.065
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04


  4%|███▏                                                                     | 251/5769 [04:27<12:09,  7.56it/s]

Step 250
Accuracy without re-ranking                            : 0.052
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.084
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.084
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.064
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.048


  5%|███▊                                                                     | 301/5769 [04:44<15:38,  5.83it/s]

Step 300
Accuracy without re-ranking                            : 0.05333333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.09666666666666666
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.09666666666666666
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.07
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


  6%|████▍                                                                    | 351/5769 [04:56<10:03,  8.97it/s]

Step 350
Accuracy without re-ranking                            : 0.054285714285714284
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.1
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.1
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06857142857142857
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05142857142857143


  7%|█████                                                                    | 399/5769 [05:07<09:16,  9.65it/s]

Step 400
Accuracy without re-ranking                            : 0.0525
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.095
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.095
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.065
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


  8%|█████▋                                                                   | 450/5769 [05:16<09:35,  9.23it/s]

Step 450
Accuracy without re-ranking                            : 0.05333333333333334
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.09111111111111111
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.09111111111111111
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06444444444444444
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04888888888888889


  9%|██████▎                                                                  | 501/5769 [05:34<14:26,  6.08it/s]

Step 500
Accuracy without re-ranking                            : 0.054
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.088
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.088
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.062
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.048


 10%|██████▉                                                                  | 551/5769 [05:52<19:27,  4.47it/s]

Step 550
Accuracy without re-ranking                            : 0.05090909090909091
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.0890909090909091
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08727272727272728
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04727272727272727


 10%|███████▌                                                                 | 601/5769 [06:04<12:17,  7.01it/s]

Step 600
Accuracy without re-ranking                            : 0.05
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08833333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08666666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06166666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


 11%|████████▏                                                                | 651/5769 [06:18<12:05,  7.06it/s]

Step 650
Accuracy without re-ranking                            : 0.047692307692307694
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08307692307692308
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08153846153846153
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05692307692307692
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.046153846153846156


 12%|████████▊                                                                | 701/5769 [06:29<20:33,  4.11it/s]

Step 700
Accuracy without re-ranking                            : 0.04857142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08285714285714285
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08142857142857143
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.055714285714285716
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.047142857142857146


 13%|█████████▍                                                               | 750/5769 [06:38<10:33,  7.92it/s]

Step 750
Accuracy without re-ranking                            : 0.048
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.084
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08266666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05466666666666667
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04666666666666667


 14%|██████████▏                                                              | 801/5769 [06:49<09:57,  8.32it/s]

Step 800
Accuracy without re-ranking                            : 0.0475
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08375
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.0825
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.055
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04625


 15%|██████████▊                                                              | 851/5769 [06:55<09:33,  8.58it/s]

Step 850
Accuracy without re-ranking                            : 0.04941176470588235
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08470588235294117
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08352941176470588
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05764705882352941
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04941176470588235


 16%|███████████▍                                                             | 901/5769 [07:01<08:31,  9.51it/s]

Step 900
Accuracy without re-ranking                            : 0.04888888888888889
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08444444444444445
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08333333333333333
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.058888888888888886
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


 16%|████████████                                                             | 951/5769 [07:12<13:50,  5.80it/s]

Step 950
Accuracy without re-ranking                            : 0.049473684210526316
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08526315789473685
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08421052631578947
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05157894736842105


 17%|████████████▍                                                           | 1001/5769 [07:19<10:04,  7.89it/s]

Step 1000
Accuracy without re-ranking                            : 0.051
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.085
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.084
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.06
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.051


 18%|█████████████                                                           | 1051/5769 [07:25<12:32,  6.27it/s]

Step 1050
Accuracy without re-ranking                            : 0.04857142857142857
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08095238095238096
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05714285714285714
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04857142857142857


 19%|█████████████▋                                                          | 1101/5769 [07:36<08:46,  8.86it/s]

Step 1100
Accuracy without re-ranking                            : 0.05
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08181818181818182
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.0809090909090909
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05909090909090909
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05090909090909091


 20%|██████████████▎                                                         | 1151/5769 [07:42<08:08,  9.46it/s]

Step 1150
Accuracy without re-ranking                            : 0.04869565217391304
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.0808695652173913
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.057391304347826085
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.04869565217391304


 21%|██████████████▉                                                         | 1201/5769 [07:54<08:49,  8.63it/s]

Step 1200
Accuracy without re-ranking                            : 0.04833333333333333
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08083333333333333
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.056666666666666664
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.049166666666666664


 22%|███████████████▌                                                        | 1250/5769 [08:16<57:14,  1.32it/s]

Step 1250
Accuracy without re-ranking                            : 0.0472
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.0808
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.08
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.0568
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.0504


 23%|████████████████▏                                                       | 1301/5769 [08:24<11:22,  6.55it/s]

Step 1300
Accuracy without re-ranking                            : 0.047692307692307694
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.08
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.07923076923076923
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.05615384615384615
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05


 23%|████████████████▊                                                       | 1351/5769 [08:33<16:20,  4.51it/s]

Step 1350
Accuracy without re-ranking                            : 0.047407407407407405
Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : 0.07925925925925927
Accuracy with re-ranking(klue-cross-encoder-v1)        : 0.07851851851851852
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : 0.056296296296296296
Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): 0.05037037037037037


 24%|█████████████████▏                                                      | 1375/5769 [08:37<16:29,  4.44it/s]

In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('Opensource embedding model(sentence-transformers/multi-qa-mpnet-base-cos-v1)')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 5.6 오픈소스 임베딩 한국어 성능 (BAAI/bge-m3)

In [None]:
!pip install FlagEmbedding

In [None]:
from FlagEmbedding import BGEM3FlagModel
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

In [None]:
# 문서 임베딩
docs_ko = combined_paragraphs['paragraph']
# docs_ko = dataset_ko['validation']['paragraph']
doc_ko_emb_m3 = model.encode(docs_ko, batch_size=32, max_length=8192)['dense_vecs']

In [None]:
from sentence_transformers import util
query_emb = model.encode(query, batch_size=32, max_length=8192)['dense_vecs']
print(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_ko_emb_m3)[0].cpu().tolist()
#문서와 점수 결합
doc_score_pairs = list(zip(docs_ko, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:3]:
    print(score, doc)

In [None]:
logger.setLevel(logging.CRITICAL)  # 일부 로그만 출력

def eval_ranking_4_open_source_m3(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    query_emb = model.encode(query, batch_size=32, max_length=8192)['dense_vecs']

    #쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
    scores = util.dot_score(query_emb, doc_ko_emb_m3)[0].cpu().tolist()

    #문서와 점수 결합
    doc_score_pairs = list(zip(docs_ko, scores))

    #점수에 따른 내림차순 정렬
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    sentence_combinations = [[query, doc_score_pair[0]] for doc_score_pair in doc_score_pairs]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    reranked_hash_1 = my_hash(doc_score_pairs[sim_scores_argsort_1[0]][0])

    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    reranked_hash_2 = my_hash(doc_score_pairs[sim_scores_argsort_2[0]][0])

    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    reranked_hash_3 = my_hash(doc_score_pairs[sim_scores_argsort_3[0]][0])

    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    reranked_hash_4 = my_hash(doc_score_pairs[sim_scores_argsort_4[0]][0])

    return retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4


In [None]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4_open_source_m3(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('Opensource embedding model(BAAI/bge-m3)')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 5.7 오픈소스 임베딩 한국어 성능 (acge_text_embedding)

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('aspire/acge_text_embedding')

In [None]:
validation_paragraphs = dataset_ko['validation'].remove_columns(['question', 'label'])
train_paragraphs = dataset_ko['train'].remove_columns(['question', 'label'])
test_paragraphs = dataset_ko['test'].remove_columns(['question', 'label'])
combined_paragraphs = concatenate_datasets([validation_paragraphs, train_paragraphs, test_paragraphs])


In [None]:
combined_paragraphs

In [None]:
# 문서 임베딩
# docs_ko = dataset_ko['validation']['paragraph']
docs_ko = combined_paragraphs['paragraph']
doc_ko_emb = model.encode(docs_ko, batch_size=32, normalize_embeddings=True, show_progress_bar=True)

In [None]:
from sentence_transformers import util
query_emb = model.encode(query)
print(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_ko_emb)[0].cpu().tolist()

#문서와 점수 결합
doc_score_pairs = list(zip(docs_ko, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:30]:
    print(score, doc)

In [None]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4_open_source(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('Opensource embedding model(aspire/acge_text_embedding)')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

# 6. BoolQ를 이용한 영어 임베딩 성능 비교
## 6.1 초기화

In [None]:
from datasets import load_dataset
from evaluate import load

dataset = load_dataset("boolq")

In [None]:
dataset

In [None]:
from datasets import concatenate_datasets

combined_dataset = concatenate_datasets([dataset['train'], dataset['validation']])

In [None]:
len(combined_dataset)

In [None]:
combined_dataset[0]

In [None]:
cross_encoder_1 = CrossEncoder('jeffwan/mmarco-mMiniLMv2-L12-H384-v1')
cross_encoder_2 = CrossEncoder('bongsoo/klue-cross-encoder-v1')  # 한국어 지원 모델
cross_encoder_3 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
model_save_path = './fine_tuned_ir_cross_encoder_01'
cross_encoder_4 = CrossEncoder(model_save_path)

In [None]:
def eval_ranking_4(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    results_from_pinecone = query_from_pinecone(query, top_k=top_k)
    sentence_combinations = [[query, result_from_pinecone['metadata']['text']] for result_from_pinecone in results_from_pinecone]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    re_ranked_final_result_1 = results_from_pinecone[sim_scores_argsort_1[0]]
    
    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    re_ranked_final_result_2 = results_from_pinecone[sim_scores_argsort_2[0]]
    
    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    re_ranked_final_result_3 = results_from_pinecone[sim_scores_argsort_3[0]]
    
    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    re_ranked_final_result_4 = results_from_pinecone[sim_scores_argsort_4[0]]
    
    return results_from_pinecone[0]['id'], re_ranked_final_result_1['id'], re_ranked_final_result_2['id'], re_ranked_final_result_3['id'], re_ranked_final_result_4['id']


In [None]:
# val_sample = dataset['validation'][:700]
# val_sample = dataset_ko['validation'][:]
val_sample = combined_dataset[:]

In [None]:
len(val_sample)

In [None]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)

In [None]:
# q_to_hash = {data['question']: my_hash(data['passage']) for data in dataset['validation']}
q_to_hash = {data['question']: my_hash(data['passage']) for data in combined_dataset}

q_to_hash[query]

## 6.2 text-embedding-ada-002 영어 성능

In [None]:
ENGINE = 'text-embedding-ada-002'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [None]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    # dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [None]:
for idx in tqdm(range(0, len(dataset['train']), 128)):
    data_sample = dataset['train'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset['validation']), 128)):
    data_sample = dataset['validation'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

In [None]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-ada-002')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 6.3 text-embedding-3-small 영어 성능

In [None]:
ENGINE = 'text-embedding-3-small'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [None]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    # dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [None]:
for idx in tqdm(range(0, len(dataset['train']), 128)):
    data_sample = dataset['train'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset['validation']), 128)):
    data_sample = dataset['validation'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

In [None]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-3-small')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 6.4 text-embedding-3-large 영어 성능

In [None]:
ENGINE = 'text-embedding-3-large'
# ENGINE_2 = 'text-embedding-ada-002'
# ENGINE_3_S = 'text-embedding-3-small'  # OpenAI의 3세대 embedding 모델 (소형)
# ENGINE_3_L = 'text-embedding-3-large'  # OpenAI의 3세대 embedding 모델 (대형)

In [None]:
pinecone.delete_index(INDEX_NAME)  # delete the index

pinecone.create_index(
    INDEX_NAME, # 인덱스 이름
    # dimension=1536, # 벡터의 치수, text-embedding-ada-002, text-embedding-3-small
    dimension=3072, # 벡터의 치수, text-embedding-3-large
    
    metric='cosine', # 인덱스를 검색할 때 사용할 유사성 메트릭
    spec=PodSpec(
      environment="gcp-starter"
    )
    # pod_type="p1" # 파인콘 파드의 유형
)

# 인덱스를 변수로 저장
index = pinecone.Index(INDEX_NAME)

In [None]:
for idx in tqdm(range(0, len(dataset['train']), 128)):
    data_sample = dataset['train'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

for idx in tqdm(range(0, len(dataset['validation']), 128)):
    data_sample = dataset['validation'][idx:idx + 128]

    passages = data_sample['passage']
    upload_texts_to_pinecone(passages, engine=ENGINE)

In [None]:
i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('text-embedding-3-large')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 6.5 오픈소스 임베딩 영어 성능 (multi-qa-mpnet-base-cos-v1)

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

In [None]:
validation_paragraphs = dataset['validation'].remove_columns(['question', 'answer'])
train_paragraphs = dataset['train'].remove_columns(['question', 'answer'])
combined_paragraphs = concatenate_datasets([validation_paragraphs, train_paragraphs])

In [None]:
# 문서 임베딩
# docs = dataset['validation']['passage']
docs = combined_paragraphs['passage']
doc_emb = model.encode(docs, batch_size=32, show_progress_bar=True)

In [None]:
from random import sample

query = sample(dataset['validation']['question'], 1)[0]
print(query)
final_results = get_results_from_pinecone(query, top_k=3, re_rank=True)

In [None]:
from sentence_transformers import util
query_emb = model.encode(query)
print(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

#문서와 점수 결합
doc_score_pairs = list(zip(docs, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:30]:
    print(score, doc)

In [None]:
logger.setLevel(logging.CRITICAL)  # 일부 로그만 출력

def eval_ranking_4_open_source_en(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    query_emb = model.encode(query)

    #쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
    scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()

    #문서와 점수 결합
    doc_score_pairs = list(zip(docs, scores))

    #점수에 따른 내림차순 정렬
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    sentence_combinations = [[query, doc_score_pair[0]] for doc_score_pair in doc_score_pairs]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    reranked_hash_1 = my_hash(doc_score_pairs[sim_scores_argsort_1[0]][0])

    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    reranked_hash_2 = my_hash(doc_score_pairs[sim_scores_argsort_2[0]][0])

    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    reranked_hash_3 = my_hash(doc_score_pairs[sim_scores_argsort_3[0]][0])

    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    reranked_hash_4 = my_hash(doc_score_pairs[sim_scores_argsort_4[0]][0])

    return retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4


In [None]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4_open_source_en(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('Opensource embedding model(sentence-transformers/multi-qa-mpnet-base-cos-v1)')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')

## 6.6 오픈소스 임베딩 영어 성능 (BAAI/bge-m3)

In [None]:
!pip install FlagEmbedding

In [None]:
from FlagEmbedding import BGEM3FlagModel
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

In [None]:
# 문서 임베딩
docs = combined_paragraphs['passage']
# docs = dataset['validation']['passage']
doc_emb_m3 = model.encode(docs, batch_size=32, max_length=8192)['dense_vecs']

In [None]:
from sentence_transformers import util
query_emb = model.encode(query, batch_size=32, max_length=8192)['dense_vecs']
print(query)

#쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
scores = util.dot_score(query_emb, doc_emb_m3)[0].cpu().tolist()
#문서와 점수 결합
doc_score_pairs = list(zip(docs, scores))

#점수에 따른 내림차순 정렬
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)

#구절 및 점수 출력
for doc, score in doc_score_pairs[:3]:
    print(score, doc)

In [None]:
logger.setLevel(logging.CRITICAL)  # 일부 로그만 출력

def eval_ranking_4_open_source_m3_en(query, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3):
    query_emb = model.encode(query, batch_size=32, max_length=8192)['dense_vecs']

    #쿼리와 모든 문서 임베딩 사이의 도트 점수를 계산합니다.
    scores = util.dot_score(query_emb, doc_emb_m3)[0].cpu().tolist()

    #문서와 점수 결합
    doc_score_pairs = list(zip(docs, scores))

    #점수에 따른 내림차순 정렬
    doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)[:top_k]

    retrieved_hash = my_hash(doc_score_pairs[0][0])
    sentence_combinations = [[query, doc_score_pair[0]] for doc_score_pair in doc_score_pairs]

    similarity_scores_1 = cross_encoder_1.predict(sentence_combinations)
    sim_scores_argsort_1 = list(reversed(np.argsort(similarity_scores_1)))
    reranked_hash_1 = my_hash(doc_score_pairs[sim_scores_argsort_1[0]][0])

    similarity_scores_2 = cross_encoder_2.predict(sentence_combinations)
    sim_scores_argsort_2 = list(reversed(np.argsort(similarity_scores_2)))
    reranked_hash_2 = my_hash(doc_score_pairs[sim_scores_argsort_2[0]][0])

    similarity_scores_3 = cross_encoder_3.predict(sentence_combinations)
    sim_scores_argsort_3 = list(reversed(np.argsort(similarity_scores_3)))
    reranked_hash_3 = my_hash(doc_score_pairs[sim_scores_argsort_3[0]][0])

    similarity_scores_4 = cross_encoder_4.predict(sentence_combinations)
    sim_scores_argsort_4 = list(reversed(np.argsort(similarity_scores_4)))
    reranked_hash_4 = my_hash(doc_score_pairs[sim_scores_argsort_4[0]][0])

    return retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4


In [None]:
logger.setLevel(logging.CRITICAL)

i = 0
print_every = 50
predictions = []
for question in tqdm(val_sample['question']):
    retrieved_hash, reranked_hash_1, reranked_hash_2, reranked_hash_3, reranked_hash_4 = eval_ranking_4_open_source_m3_en(question, cross_encoder_1, cross_encoder_2, cross_encoder_3, cross_encoder_4, top_k=3)
    correct_hash = q_to_hash[question]
    predictions.append((retrieved_hash == correct_hash, reranked_hash_1 == correct_hash, reranked_hash_2 == correct_hash, reranked_hash_3 == correct_hash, reranked_hash_4 == correct_hash))
    i += 1
    if i % print_every == 0:
        print(f'Step {i}')
        raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
        reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
        reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
        reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
        reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

        print(f'Accuracy without re-ranking                            : {raw_accuracy}')
        print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
        print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
        print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')


In [None]:
raw_accuracy = sum([p[0] for p in predictions])/len(predictions)
reranked_accuracy_1 = sum([p[1] for p in predictions])/len(predictions)
reranked_accuracy_2 = sum([p[2] for p in predictions])/len(predictions)
reranked_accuracy_3 = sum([p[3] for p in predictions])/len(predictions)
reranked_accuracy_4 = sum([p[4] for p in predictions])/len(predictions)

print('Opensource embedding model(BAAI/bge-m3)')
print(f'Accuracy without re-ranking                            : {raw_accuracy}')
print(f'Accuracy with re-ranking(mmarco-mMiniLMv2-L12-H384-v1) : {reranked_accuracy_1}')
print(f'Accuracy with re-ranking(klue-cross-encoder-v1)        : {reranked_accuracy_2}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2)      : {reranked_accuracy_3}')
print(f'Accuracy with re-ranking(ms-marco-MiniLM-L-12-v2 tuning): {reranked_accuracy_4}')