In [None]:
import pandas as pd
import csv

In [None]:
data = pd.read_csv('Woosung.csv')

In [None]:
# Define a function to compute vote rates for DDS vs other models
def compute_vote_rates(processed_data, ground_truth, selection_control, model_names, audio_type_map):
    """ Compute DDS vote rates for different model comparisons. """
    # Initialize vote counts
    votes = {name: 0 for name in model_names}
    totals = {name: 0 for name in model_names}

    # Loop through the processed data and calculate vote rates
    for _, row in processed_data.iterrows():
        q_index = row["question"]
        selected = row["selection"]

        if q_index >= len(ground_truth):
            continue  # Skip if question index is out of bounds

        true_label = ground_truth[q_index]  # 0 means dds is first, 1 means dds is second
        model_pair = selection_control[q_index % len(selection_control)]  # Get comparison pair

        # Check which model is compared to DDS
        if 0 in model_pair:  # If DDS is in the pair
            dds_position = model_pair.index(0)  # Find position of DDS (0=first, 1=second)

            # Update vote counts for DDS vs selected model
            if selected == dds_position:
                for i, model in enumerate(model_names):
                    if audio_type_map[model] in model_pair and model != "dds":
                        votes[model] += 1

            for i, model in enumerate(model_names):
                if audio_type_map[model] in model_pair and model != "dds":
                    totals[model] += 1

    return votes, totals

# Example function call with your data
def analyze_question_set(processed_data, ground_truth, selection_control, model_names, audio_type_map):
    # Get the vote rates for the specified comparison
    votes, totals = compute_vote_rates(
        processed_data, ground_truth, selection_control, model_names, audio_type_map
    )

    # Print results dynamically
    for model in model_names:
        if model != "dds":
            rate = votes[model] / totals[model] if totals[model] > 0 else 0
            print(f"DDS vs. {model.capitalize()}: {rate:.2%} ({votes[model]}/{totals[model]})")


In [None]:
# Call the function to analyze the first question set
processed_data = data[data['question_type'] == 'test1'].drop_duplicates()
# Sort by 'value1' in ascending order
processed_data = processed_data.sort_values(by=["question"])
# Convert float to int
processed_data["question"] = processed_data["question"].astype(int)
processed_data["selection"] = processed_data["selection"].astype(int)

# Define your first ground truth and selection control
ground_truth_1 = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
selection_control_1 = [
    [0, 2],  # Compare DDS vs. Textinv
    [4, 0]   # Compare DreamSound vs. DDS
]

# Load your DataFrame (ensure the data is already processed)
# Example: processed_data = pd.read_csv('your_data.csv')

# Define model names
model_names = ["dds", "musicmagus", "zeta"]

# Define audio type map dynamically
audio_type_map = {
    "dds": 0,
    "ddim": 1,
    "musicmagus": 2,
    "sdedit": 3,
    "zeta": 4,
}

analyze_question_set(processed_data, ground_truth_1, selection_control_1, model_names, audio_type_map)


In [None]:
# Call the function to analyze the first question set
processed_data = data[data['question_type'] == 'test2'].drop_duplicates()
# Sort by 'value1' in ascending order
processed_data = processed_data.sort_values(by=["question"])
# Convert float to int
processed_data["question"] = processed_data["question"].astype(int)
processed_data["selection"] = processed_data["selection"].astype(int)

# Define your second ground truth and selection control (if different set of questions)
ground_truth_2 = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
selection_control_2 = [
    [0, 2],  # Compare DDS vs. DreamSound
    [1, 0]   # Compare Textinv vs. DDS
]

# Define model names
model_names = ["dds", "textinv", "dreamsound"]

# Define audio type map dynamically
audio_type_map = {
    "dds": 0,
    "textinv": 1,
    "dreamsound": 2
}

# Call the function to analyze the second question set
analyze_question_set(processed_data, ground_truth_2, selection_control_2, model_names, audio_type_map)
