In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Define file paths
touche_file = '../dataset/Touche.csv'
manual_file = '../dataset/manual_topics.csv'
random_file = '../dataset/random.csv'

# Load each CSV into a DataFrame and add a new column for the filename
touche_df = pd.read_csv(touche_file)
touche_df['source_file'] = 'Touche'

manual_df = pd.read_csv(manual_file)
manual_df['source_file'] = 'manual'

random_df = pd.read_csv(random_file)
random_df['source_file'] = 'random'

# Concatenate the DataFrames into one
combined_df = pd.concat([touche_df, manual_df, random_df], ignore_index=True)

# Optionally, save the combined DataFrame to a new CSV file
# combined_df.to_csv('../dataset/combined_files.csv', index=False)

# Display the combined DataFrame
combined_df

Unnamed: 0,Topic,Sector,source_file
0,Are gas prices too high?,Economy,Touche
1,Should the government allow illegal immigrants...,Politics,Touche
2,Are social media platforms doing enough to pre...,Technology,Touche
3,Is a college education worth it?,Education,Touche
4,Should abortion be legal?,Abortion,Touche
...,...,...,...
85,Do Russians regret the breakup of the Soviet U...,Politics,random
86,Should newspapers shift entirely to digital fo...,Media,random
87,Is anti-Semitism on the rise globally?,Politics,random
88,Is US's economic influence declining globally?,Economy,random


In [2]:
import os
import json
import pandas as pd

# Path to the folder containing the JSON files
folder_path = '../dataset/annotations/'  # Update this path if needed

# List to store the extracted data
data = []

# Iterate over each JSON file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.json'):
        file_path = os.path.join(folder_path, filename)
        
        # Open and load each JSON file
        with open(file_path, 'r') as file:
            json_data = json.load(file)
            
            # Extract relevant fields
            record = {
                "query": json_data.get("query"),
                "BM25-llava_documents_ndcg@1": json_data.get("BM25-llava_documents_ndcg@1"),
                "BM25_documents_ndcg@1": json_data.get("BM25_documents_ndcg@1"),
                "Clip_documents_ndcg@1": json_data.get("Clip_documents_ndcg@1"),
                "Mistral-llava_documents_ndcg@1": json_data.get("Mistral-llava_documents_ndcg@1"),
                "Mistral_documents_ndcg@1": json_data.get("Mistral_documents_ndcg@1"),
                "Qwen2_documents_ndcg@1": json_data.get("Qwen2_documents_ndcg@1"),
                "BM25-llava_documents_ndcg@2": json_data.get("BM25-llava_documents_ndcg@2"),
                "BM25_documents_ndcg@2": json_data.get("BM25_documents_ndcg@2"),
                "Clip_documents_ndcg@2": json_data.get("Clip_documents_ndcg@2"),
                "Mistral-llava_documents_ndcg@2": json_data.get("Mistral-llava_documents_ndcg@2"),
                "Mistral_documents_ndcg@2": json_data.get("Mistral_documents_ndcg@2"),
                "Qwen2_documents_ndcg@2": json_data.get("Qwen2_documents_ndcg@2"),
                "BM25-llava_documents_ndcg@3": json_data.get("BM25-llava_documents_ndcg@3"),
                "BM25_documents_ndcg@3": json_data.get("BM25_documents_ndcg@3"),
                "Clip_documents_ndcg@3": json_data.get("Clip_documents_ndcg@3"),
                "Mistral-llava_documents_ndcg@3": json_data.get("Mistral-llava_documents_ndcg@3"),
                "Mistral_documents_ndcg@3": json_data.get("Mistral_documents_ndcg@3"),
                "Qwen2_documents_ndcg@3": json_data.get("Qwen2_documents_ndcg@3")
            }
            
            # Append the record to the data list
            data.append(record)

# Convert the list of records into a pandas DataFrame
df = pd.DataFrame(data)

# Merge result_df with combined_df on the query and Topic columns
df = pd.merge(df, combined_df[['Topic', 'source_file']], 
                     left_on='query', right_on='Topic', how='left')
df = df.drop(columns=['Topic'])

# Saving the aggregated DataFrame to a CSV file
file_path = '../dataset/results/mean_ndcg.csv'  # You can modify the file path as needed

df.to_csv(file_path, index=False)

In [3]:
import pandas as pd

def aggregate_ndcg_values(df):
    """
    Function to aggregate mean NDCG values for different models across NDCG@1, NDCG@2, and NDCG@3.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the NDCG values for different models.
        
    Returns:
        pd.DataFrame: Aggregated mean NDCG values for each model.
    """
    # Extract relevant columns for each model and calculate the mean values
    aggregated_data = {
        "Row Labels": ["BM25", "Clip", "Mistral", "Mistral-llava", "Qwen2", "BM25-llava"],
        "_ndcg@1": [
            df["BM25_documents_ndcg@1"].mean(),  # BM25
            df["Clip_documents_ndcg@1"].mean(),  # Clip
            df["Mistral_documents_ndcg@1"].mean(),  # Mistral
            df["Mistral-llava_documents_ndcg@1"].mean(),  # Mistral-llava
            df["Qwen2_documents_ndcg@1"].mean(),  # Qwen2
            df["BM25-llava_documents_ndcg@1"].mean()  # BM25-llava
        ],
        "_ndcg@2": [
            df["BM25_documents_ndcg@2"].mean(),  # BM25
            df["Clip_documents_ndcg@2"].mean(),  # Clip
            df["Mistral_documents_ndcg@2"].mean(),  # Mistral
            df["Mistral-llava_documents_ndcg@2"].mean(),  # Mistral-llava
            df["Qwen2_documents_ndcg@2"].mean(),  # Qwen2
            df["BM25-llava_documents_ndcg@2"].mean()  # BM25-llava
        ],
        "_ndcg@3": [
            df["BM25_documents_ndcg@3"].mean(),  # BM25
            df["Clip_documents_ndcg@3"].mean(),  # Clip
            df["Mistral_documents_ndcg@3"].mean(),  # Mistral
            df["Mistral-llava_documents_ndcg@3"].mean(),  # Mistral-llava
            df["Qwen2_documents_ndcg@3"].mean(),  # Qwen2
            df["BM25-llava_documents_ndcg@3"].mean()  # BM25-llava
        ]
    }

    # Convert to DataFrame for a clean display
    df_aggregated = pd.DataFrame(aggregated_data)
    return df_aggregated


# Assuming df is your dataset
df_aggregated = aggregate_ndcg_values(df)
print('Mean NDCG values for different methods on entire queries:')
print(df_aggregated)


Mean NDCG values for different methods on entire queries:
      Row Labels   _ndcg@1   _ndcg@2   _ndcg@3
0           BM25  0.507778  0.502874  0.491567
1           Clip  0.380278  0.365012  0.363469
2        Mistral  0.710000  0.718859  0.743631
3  Mistral-llava  0.538889  0.545364  0.529887
4          Qwen2  0.822500  0.834709  0.851063
5     BM25-llava  0.270556  0.267077  0.268633


In [132]:
# Filter the dataset for 'Touche' in the source_file column
df_touche = df[df['source_file'].str.contains("Touche", case=False, na=False)]

# Aggregate NDCG values for the filtered dataset
df_aggregated_touche = aggregate_ndcg_values(df_touche)

# Print the results
print('Mean NDCG values for different methods on Touche queries:')
print(df_aggregated_touche)

#print a space of 2 lines
print('\n')

# Filter the dataset for 'Touche' in the source_file column
df_manual = df[df['source_file'].str.contains("manual", case=False, na=False)]
# Aggregate NDCG values for the filtered dataset
df_aggregated_manual = aggregate_ndcg_values(df_manual)
# Print the results
print('Mean NDCG values for different methods on manual queries:')
print(df_aggregated_manual)

#print the same for random queries
print('\n')
df_random = df[df['source_file'].str.contains("random", case=False, na=False)]
df_aggregated_random = aggregate_ndcg_values(df_random)
print('Mean NDCG values for different methods on random queries:')
print(df_aggregated_random)

Mean NDCG values for different methods on Touche queries:
      Row Labels   _ndcg@1   _ndcg@2   _ndcg@3
0           BM25  0.477500  0.428623  0.429513
1           Clip  0.373333  0.362238  0.370031
2        Mistral  0.667500  0.624582  0.661656
3  Mistral-llava  0.502500  0.469321  0.476005
4          Qwen2  0.769167  0.775849  0.808715
5     BM25-llava  0.182500  0.190192  0.207037


Mean NDCG values for different methods on manual queries:
      Row Labels   _ndcg@1   _ndcg@2   _ndcg@3
0           BM25  0.507500  0.540951  0.524422
1           Clip  0.360000  0.361867  0.362885
2        Mistral  0.674167  0.749101  0.798852
3  Mistral-llava  0.510000  0.520264  0.510694
4          Qwen2  0.842500  0.864265  0.873174
5     BM25-llava  0.297500  0.288317  0.264908


Mean NDCG values for different methods on random queries:
      Row Labels   _ndcg@1   _ndcg@2   _ndcg@3
0           BM25  0.538333  0.539050  0.520765
1           Clip  0.407500  0.370931  0.357489
2        Mistral  0.788

In [135]:
import pandas as pd

def calculate_ndcg_frequencies(df):
    """
    Function to calculate the frequency of top models across NDCG@1, NDCG@2, and NDCG@3 levels.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing NDCG scores for different models.
        
    Returns:
        pd.DataFrame: A DataFrame showing the frequency of each model being the top model across NDCG@1, NDCG@2, and NDCG@3.
    """
    # Define model columns for each NDCG level
    model_columns = {
        "NDCG@1": [
            "BM25-llava_documents_ndcg@1", "BM25_documents_ndcg@1", "Clip_documents_ndcg@1",
            "Mistral-llava_documents_ndcg@1", "Mistral_documents_ndcg@1", "Qwen2_documents_ndcg@1"
        ],
        "NDCG@2": [
            "BM25-llava_documents_ndcg@2", "BM25_documents_ndcg@2", "Clip_documents_ndcg@2",
            "Mistral-llava_documents_ndcg@2", "Mistral_documents_ndcg@2", "Qwen2_documents_ndcg@2"
        ],
        "NDCG@3": [
            "BM25-llava_documents_ndcg@3", "BM25_documents_ndcg@3", "Clip_documents_ndcg@3",
            "Mistral-llava_documents_ndcg@3", "Mistral_documents_ndcg@3", "Qwen2_documents_ndcg@3"
        ]
    }

    # Initialize a dictionary to store frequency counts for each model across NDCG levels
    model_frequencies = {
        "BM25-llava": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0},
        "BM25": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0},
        "Clip": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0},
        "Mistral-llava": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0},
        "Mistral": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0},
        "Qwen2": {"NDCG@1": 0, "NDCG@2": 0, "NDCG@3": 0}
    }

    # Helper function to calculate the frequency for a given row and NDCG column set
    def calculate_frequency(row, columns, ndcg_level):
        max_value = row[columns].max()
        top_models = row[columns] == max_value
        num_top_models = top_models.sum()

        if num_top_models > 0:
            split_value = 1.0 / num_top_models
            for col in columns:
                if top_models[col]:
                    model_name = col.split("_")[0]  # Get model name (e.g., BM25-llava)
                    model_frequencies[model_name][ndcg_level] += split_value

    # Apply the helper function to each row for NDCG@1, NDCG@2, and NDCG@3
    for ndcg_level, columns in model_columns.items():
        df.apply(lambda row: calculate_frequency(row, columns, ndcg_level), axis=1)

    # Convert the frequency dictionary into a DataFrame
    df_frequencies = pd.DataFrame.from_dict(model_frequencies, orient='index').reset_index()

    # Rename the columns for clarity
    df_frequencies.columns = ["Model", "NDCG@1 Frequency", "NDCG@2 Frequency", "NDCG@3 Frequency"]

    return df_frequencies


In [136]:
# Example usage
df_frequencies = calculate_ndcg_frequencies(df)  # where df is your dataset
print(df_frequencies)

           Model  NDCG@1 Frequency  NDCG@2 Frequency  NDCG@3 Frequency
0     BM25-llava          3.400000          2.200000              0.00
1           BM25          6.100000          7.616667              4.75
2           Clip          6.866667          3.416667              2.00
3  Mistral-llava          9.100000          5.866667              3.25
4        Mistral         13.100000         10.700000             13.75
5          Qwen2         21.433333         30.200000             36.25


In [139]:
# Assuming df is your dataset with a column 'source_file' indicating the categories like "Touche", "manual", and "random"

# Filter for Touche queries
df_touche = df[df['source_file'].str.contains("Touche", case=False, na=False)]
df_frequencies_touche = calculate_ndcg_frequencies(df_touche)
print("Frequencies for Touche queries:")
print(df_frequencies_touche)

# Filter for manual queries
print('\n')
df_manual = df[df['source_file'].str.contains("manual", case=False, na=False)]
df_frequencies_manual = calculate_ndcg_frequencies(df_manual)
print("\nFrequencies for manual queries:")
print(df_frequencies_manual)

# Filter for random queries
print('\n')
df_random = df[df['source_file'].str.contains("random", case=False, na=False)]
df_frequencies_random = calculate_ndcg_frequencies(df_random)
print("\nFrequencies for random queries:")
print(df_frequencies_random)


Frequencies for Touche queries:
           Model  NDCG@1 Frequency  NDCG@2 Frequency  NDCG@3 Frequency
0     BM25-llava          0.700000          0.666667               0.0
1           BM25          2.533333          3.000000               1.5
2           Clip          1.916667          0.750000               0.0
3  Mistral-llava          2.783333          2.416667               2.0
4        Mistral          4.783333          1.916667               3.5
5          Qwen2          7.283333         11.250000              13.0



Frequencies for manual queries:
           Model  NDCG@1 Frequency  NDCG@2 Frequency  NDCG@3 Frequency
0     BM25-llava          1.033333          0.533333              0.00
1           BM25          1.866667          3.533333              3.25
2           Clip          2.666667          0.666667              0.00
3  Mistral-llava          3.533333          1.700000              0.25
4        Mistral          2.366667          4.700000              5.75
5         