# Model fusion
$S_{fusion} = w_1 * S_{dense} + w_2 * S_{sparse}$

In [8]:
BM25_RESULT_DIR = '../data/evaluation/results/evaluation_queries/text_code/bm25'
MODEL1_RESULT_DIR = '../data/evaluation/results/evaluation_queries/text_code/model1'
MODEL2_RESULT_DIR = '../data/evaluation/results/evaluation_queries/text_code/model2'
# FUSION_RESULT_DIR = '../data/evaluation/results/evaluation_queries/fusion/bm25_model2'
# FUSION_RESULT_DIR = '../data/evaluation/results/evaluation_queries/fusion/bm25_model1'
FUSION_RESULT_DIR = '../data/evaluation/results/evaluation_queries/fusion/model1_model2'


In [9]:
import os
import json

# Directory paths containing the ranking results from each model
directory_model1 = MODEL2_RESULT_DIR
directory_model2 = MODEL1_RESULT_DIR
output_dir = FUSION_RESULT_DIR
os.makedirs(output_dir, exist_ok=True)

# Assign weights to each model's rankings
w1 = 1
w2 = 1

# Iterate through the JSON files in both directories
for filename in os.listdir(directory_model1):
    if filename.endswith('.json'):
        file_path_model1 = os.path.join(directory_model1, filename)
        query_id = os.path.splitext(filename)[0]  # Extract the query ID from the filename

        # Load the ranking results from Model 1
        with open(file_path_model1, 'r') as file:
            ranking_model1 = json.load(file)

        # Load the corresponding JSON file from Model 2
        file_path_model2 = os.path.join(directory_model2, filename)
        with open(file_path_model2, 'r') as file:
            ranking_model2 = json.load(file)

        # Extract the document lists from both rankings
        docs_model1 = ranking_model1['docs']
        docs_model2 = ranking_model2['docs']

        fused_docs = []

        # Fuse the ranking results for the query
        for item1 in docs_model1:
            docid = item1['docid']
            score_model1 = item1['score']
            for item2 in docs_model2:
                if item2['docid'] == docid: 
                    score_model2 = item2['score']
                else: 
                    continue
            if score_model1 and score_model2: 
                fused_score = w1 * score_model1 + w2 * score_model2
                fused_docs.append({
                    'docid': docid,
                    'score': fused_score
                })

        # Sort the fused documents based on the fused scores in descending order
        fused_docs = sorted(fused_docs, key=lambda x: x['score'], reverse=True)

        fused_ranking = {
            'qid': query_id,
            'docs': fused_docs
        }

        # Output the fused ranking results to a JSON file
        output_path = f'{output_dir}/{query_id}.json'

        with open(output_path, 'w') as output_file:
            json.dump(fused_ranking, output_file, indent=4)
