In [56]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np

def calculate_similarity(sentence1, sentence2, model_name="all-mpnet-base-v2"):
    """
    두 문장의 유사도를 계산하는 함수

    Args:
        sentence1 (str): 첫 번째 문장
        sentence2 (str): 두 번째 문장
        model_name (str): 사용할 Sentence Transformer 모델 이름 (기본값: "all-mpnet-base-v2")

    Returns:
        float: 두 문장의 유사도 (dot product 값)
    """

    # Sentence Transformer 모델 로드
    model = SentenceTransformer(model_name)

    # 문장 임베딩
    embedding1 = model.encode(sentence1)
    embedding2 = model.encode(sentence2)
    

    # dot product 계산
    similarity = np.dot(embedding1, embedding2)

    return similarity

In [74]:
df_1 = pd.read_csv("results/gemini_results.csv")
df_2 = pd.read_csv("results/qwen_inference_results.csv")

In [75]:
df_1 = df_1[df_1.image != 'image_23.webp'].reset_index(drop=True)
df_2_1 = df_2[(df_2.model == 'Qwen/Qwen2.5-VL-3B-Instruct') & (df_2.turn == 1)].reset_index(drop=True)
df_2_2 = df_2[(df_2.model == 'Qwen/Qwen2.5-VL-7B-Instruct') & (df_2.turn == 1)].reset_index(drop=True)

In [112]:
similarity_score = []

for i in range(len(df_1.response)):
    similarity1 = calculate_similarity(df_1.response[i], df_2_1.response[i])
    similarity2 = calculate_similarity(df_1.response[i], df_2_2.response[i])
    
    similarity_score.append([similarity1, similarity2])

In [None]:
similarity_score[:5]