In [4]:
import pandas as pd
import subprocess
from pathlib import Path
import os

In [5]:
def get_git_root():
    try:
        git_root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'],
                                           stderr=subprocess.STDOUT).decode().strip()
        return Path(git_root)
    except subprocess.CalledProcessError:
        print("Warning: Not in a git repository. Using current working directory.")
        return Path.cwd()

In [7]:
filename = 'postselection_meme_ratings.csv'
output_dir = get_git_root() / 'user_study_effort'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"Created directory: {output_dir}")
else:
    print(f"Directory already exists: {output_dir}")
df = pd.read_csv(get_git_root() / 'llm_selection' / 'data' / 'postselection_meme_ratings_processed.csv')
print(f'Number of answers pre filtering: {len(df)}')

Directory already exists: /Users/sergiopinto/Desktop/MemeFact/user_study_effort
Number of answers pre filtering: 110


In [None]:
# I want to calculate for every claim, which were the memes with the three worst ratings( average for coherence, clarity, hilarity, persuasiveness and template appropriateness) and 3 best ratings. I want you to print the meme numbers for every claim.

In [8]:
NUM_CLAIMS = 12
NUM_MEMES = 8

for claim_i in range(1, NUM_CLAIMS + 1):
    # We'll store (meme_number, average_score) tuples here
    meme_scores = []

    for meme_i in range(1, NUM_MEMES + 1):
        # Columns for each sub-rating of this particular meme
        coherence_col     = f"claim{claim_i}_meme{meme_i}_coherence"
        clarity_col       = f"claim{claim_i}_meme{meme_i}_clarity"
        hilarity_col      = f"claim{claim_i}_meme{meme_i}_hilarity"
        persuasiveness_col= f"claim{claim_i}_meme{meme_i}_persuasiveness"
        template_col      = f"claim{claim_i}_meme{meme_i}_template_conveyance"


        if (coherence_col not in df.columns 
            or clarity_col not in df.columns 
            or hilarity_col not in df.columns 
            or persuasiveness_col not in df.columns
            or template_col not in df.columns):
            # If any column is missing, skip
            continue

        # 1) For each participant (each row), get the average across these 5 sub-ratings
        # 2) Then take the mean across all participants (rows)
        #    so that we get one final average rating for this meme
        rowwise_means = df[[coherence_col, clarity_col, 
                            hilarity_col, persuasiveness_col, 
                            template_col]].mean(axis=1)
        meme_avg_score = rowwise_means.mean()  # average across participants

        # Store (meme_number, that average score) so we can sort later
        meme_scores.append((meme_i, meme_avg_score))

    # Sort the memes by average rating
    meme_scores.sort(key=lambda x: x[1])  # ascending order

    # The three worst memes (lowest averages)
    worst_3 = meme_scores[:3]

    # The three best memes (highest averages)
    best_3 = meme_scores[-3:]

    print(f"=== Claim {claim_i} ===")
    print("Worst 3 memes by average rating:")
    for meme_num, score in worst_3:
        print(f"  Meme {meme_num}: {score:.2f}")

    print("Best 3 memes by average rating:")
    for meme_num, score in best_3:
        print(f"  Meme {meme_num}: {score:.2f}")

    print()  # blank line for readability


=== Claim 1 ===
Worst 3 memes by average rating:
  Meme 5: 3.40
  Meme 4: 3.43
  Meme 6: 3.46
Best 3 memes by average rating:
  Meme 3: 4.06
  Meme 1: 4.08
  Meme 7: 4.30

=== Claim 2 ===
Worst 3 memes by average rating:
  Meme 6: 3.20
  Meme 5: 3.27
  Meme 2: 3.54
Best 3 memes by average rating:
  Meme 4: 3.82
  Meme 7: 3.90
  Meme 3: 4.00

=== Claim 3 ===
Worst 3 memes by average rating:
  Meme 7: 1.20
  Meme 6: 3.00
  Meme 4: 3.20
Best 3 memes by average rating:
  Meme 1: 3.60
  Meme 5: 3.84
  Meme 8: 3.89

=== Claim 4 ===
Worst 3 memes by average rating:
  Meme 4: 2.10
  Meme 3: 3.40
  Meme 5: 3.50
Best 3 memes by average rating:
  Meme 7: 3.87
  Meme 2: 3.93
  Meme 8: 3.97

=== Claim 5 ===
Worst 3 memes by average rating:
  Meme 7: 2.70
  Meme 2: 3.10
  Meme 3: 3.28
Best 3 memes by average rating:
  Meme 8: 3.71
  Meme 5: 3.71
  Meme 1: 3.73

=== Claim 6 ===
Worst 3 memes by average rating:
  Meme 5: 2.47
  Meme 3: 3.58
  Meme 7: 3.75
Best 3 memes by average rating:
  Meme 8: 3.89

In [9]:
import pandas as pd

# Assuming your raw data is already in a DataFrame named df

NUM_CLAIMS = 12
NUM_MEMES = 8

results = []  # we will store rows for our CSV here

for claim_i in range(1, NUM_CLAIMS + 1):
    meme_scores = []

    for meme_i in range(1, NUM_MEMES + 1):
        # Construct column names
        coherence_col      = f"claim{claim_i}_meme{meme_i}_coherence"
        clarity_col        = f"claim{claim_i}_meme{meme_i}_clarity"
        hilarity_col       = f"claim{claim_i}_meme{meme_i}_hilarity"
        persuasiveness_col = f"claim{claim_i}_meme{meme_i}_persuasiveness"
        template_col       = f"claim{claim_i}_meme{meme_i}_template_conveyance"

        # Skip if any columns don't exist in df
        columns_exist = all(col in df.columns for col in [
            coherence_col, clarity_col, hilarity_col, persuasiveness_col, template_col
        ])
        if not columns_exist:
            continue

        # Average rating per participant (row) for these 5 sub-ratings
        rowwise_means = df[[coherence_col, clarity_col, 
                            hilarity_col, persuasiveness_col, 
                            template_col]].mean(axis=1)
        # Overall mean across participants
        meme_avg_score = rowwise_means.mean()

        meme_scores.append((meme_i, meme_avg_score))

    # Sort from lowest to highest average
    meme_scores.sort(key=lambda x: x[1])

    # The three worst
    worst_3 = meme_scores[:3]   # first 3
    # The three best
    best_3  = meme_scores[-3:]  # last 3

    # Save these to our plots list
    for meme_num, score in worst_3:
        results.append({
            'claim': claim_i,
            'meme': meme_num,
            'average_score': score,
            'evaluation_type': 'worst'
        })
    for meme_num, score in best_3:
        results.append({
            'claim': claim_i,
            'meme': meme_num,
            'average_score': score,
            'evaluation_type': 'best'
        })

# Convert to DataFrame and save as CSV
df_results = pd.DataFrame(results)
df_results.to_csv('worst_best_memes.csv', index=False)

print("CSV saved: 'worst_best_memes.csv'")


CSV saved: 'worst_best_memes.csv'


In [10]:
import pandas as pd
import numpy as np

# Assuming your raw data is already in a DataFrame named 'df'
NUM_CLAIMS = 12
NUM_MEMES = 8

results = []

for claim_i in range(1, NUM_CLAIMS + 1):
    meme_records = []

    for meme_i in range(1, NUM_MEMES + 1):
        # Build column names for this claim+meme
        coherence_col      = f"claim{claim_i}_meme{meme_i}_coherence"
        clarity_col        = f"claim{claim_i}_meme{meme_i}_clarity"
        hilarity_col       = f"claim{claim_i}_meme{meme_i}_hilarity"
        persuasiveness_col = f"claim{claim_i}_meme{meme_i}_persuasiveness"
        template_col       = f"claim{claim_i}_meme{meme_i}_template_conveyance"

        # Check if all columns exist
        columns_exist = all(col in df.columns for col in [
            coherence_col, clarity_col, hilarity_col, 
            persuasiveness_col, template_col
        ])
        if not columns_exist:
            # Skip if any dimension column missing
            continue

        # Calculate the mean rating across participants for each of the 5 dimensions
        coherence_avg      = df[coherence_col].mean()
        clarity_avg        = df[clarity_col].mean()
        hilarity_avg       = df[hilarity_col].mean()
        persuasiveness_avg = df[persuasiveness_col].mean()
        template_avg       = df[template_col].mean()

        # Overall average for this meme is the mean of the 5 dimension averages
        overall_avg = np.mean([
            coherence_avg, clarity_avg, 
            hilarity_avg, persuasiveness_avg, 
            template_avg
        ])

        # Store a record for sorting later
        meme_records.append({
            'meme': meme_i,
            'overall_avg': overall_avg,
            'coherence_avg': coherence_avg,
            'clarity_avg': clarity_avg,
            'hilarity_avg': hilarity_avg,
            'persuasiveness_avg': persuasiveness_avg,
            'template_avg': template_avg
        })

    # Sort by the overall average in ascending order
    meme_records.sort(key=lambda x: x['overall_avg'])

    # Take the 3 worst (lowest averages) and 3 best (highest averages)
    worst_3 = meme_records[:3]
    best_3  = meme_records[-3:]

    # Build plots rows
    # We'll record claim, meme, dimension-level averages, overall average, and "worst"/"best"
    for item in worst_3:
        results.append({
            'claim': claim_i,
            'meme': item['meme'],
            'coherence_avg': item['coherence_avg'],
            'clarity_avg': item['clarity_avg'],
            'hilarity_avg': item['hilarity_avg'],
            'persuasiveness_avg': item['persuasiveness_avg'],
            'template_avg': item['template_avg'],
            'overall_avg': item['overall_avg'],
            'evaluation_type': 'worst'
        })

    for item in best_3:
        results.append({
            'claim': claim_i,
            'meme': item['meme'],
            'coherence_avg': item['coherence_avg'],
            'clarity_avg': item['clarity_avg'],
            'hilarity_avg': item['hilarity_avg'],
            'persuasiveness_avg': item['persuasiveness_avg'],
            'template_avg': item['template_avg'],
            'overall_avg': item['overall_avg'],
            'evaluation_type': 'best'
        })

# Convert to a DataFrame and save as CSV
df_results = pd.DataFrame(results)
df_results.to_csv('worst_best_memes_with_dimensions.csv', index=False)

print("CSV saved as 'worst_best_memes_with_dimensions.csv'")


CSV saved as 'worst_best_memes_with_dimensions.csv'


In [11]:
import pandas as pd
import numpy as np

# Assume your raw data is in a DataFrame named 'df'
NUM_CLAIMS = 12
NUM_MEMES = 8

results = []

for claim_i in range(1, NUM_CLAIMS + 1):
    meme_stats = []

    for meme_i in range(1, NUM_MEMES + 1):
        # Build column names for this (claim, meme)
        coherence_col      = f"claim{claim_i}_meme{meme_i}_coherence"
        clarity_col        = f"claim{claim_i}_meme{meme_i}_clarity"
        hilarity_col       = f"claim{claim_i}_meme{meme_i}_hilarity"
        persuasiveness_col = f"claim{claim_i}_meme{meme_i}_persuasiveness"
        template_col       = f"claim{claim_i}_meme{meme_i}_template_conveyance"

        # Ensure columns exist
        required_cols = [
            coherence_col, clarity_col, 
            hilarity_col, persuasiveness_col, 
            template_col
        ]
        if not all(col in df.columns for col in required_cols):
            continue

        # 1) Compute the dimension-level mean across participants
        coherence_avg      = df[coherence_col].mean()
        clarity_avg        = df[clarity_col].mean()
        hilarity_avg       = df[hilarity_col].mean()
        persuasiveness_avg = df[persuasiveness_col].mean()
        template_avg       = df[template_col].mean()

        # 2) Overall average = average of these 5 dimension means
        overall_avg = np.mean([
            coherence_avg, clarity_avg, 
            hilarity_avg, persuasiveness_avg, 
            template_avg
        ])

        meme_stats.append({
            'meme_number': meme_i,
            'overall_avg': overall_avg,
            'coherence_col': coherence_col,
            'clarity_col': clarity_col,
            'hilarity_col': hilarity_col,
            'persuasiveness_col': persuasiveness_col,
            'template_col': template_col
        })

    # Sort memes for this claim by overall average rating
    meme_stats.sort(key=lambda x: x['overall_avg'])

    # 3) The 3 worst (lowest overall average) and 3 best (highest overall average)
    worst_3 = meme_stats[:3]
    best_3  = meme_stats[-3:]

    # 4) For each group, find dimension-level min (worst) or max (best) among participants
    #    and build rows for the final CSV.

    # --- WORS(T) GROUP ---
    for item in worst_3:
        mem = item['meme_number']
        # dimension columns
        coh_col = item['coherence_col']
        cla_col = item['clarity_col']
        hil_col = item['hilarity_col']
        per_col = item['persuasiveness_col']
        tem_col = item['template_col']

        # For the "worst" group, we take the minimum rating among all participants
        # for each dimension.
        coherence_worst = df[coh_col].min()
        clarity_worst   = df[cla_col].min()
        hilarity_worst  = df[hil_col].min()
        persua_worst    = df[per_col].min()
        template_worst  = df[tem_col].min()

        results.append({
            'claim_number': claim_i,
            'meme_number': mem,
            'overall_average': item['overall_avg'],
            'evaluation_type': 'worst',
            # The actual min rating for each dimension:
            'coherence': coherence_worst,
            'clarity': clarity_worst,
            'hilarity': hilarity_worst,
            'persuasiveness': persua_worst,
            'template_appropriateness': template_worst
        })

    # --- BEST GROUP ---
    for item in best_3:
        mem = item['meme_number']
        # dimension columns
        coh_col = item['coherence_col']
        cla_col = item['clarity_col']
        hil_col = item['hilarity_col']
        per_col = item['persuasiveness_col']
        tem_col = item['template_col']

        # For the "best" group, we take the maximum rating among all participants
        # for each dimension.
        coherence_best = df[coh_col].max()
        clarity_best   = df[cla_col].max()
        hilarity_best  = df[hil_col].max()
        persua_best    = df[per_col].max()
        template_best  = df[tem_col].max()

        results.append({
            'claim_number': claim_i,
            'meme_number': mem,
            'overall_average': item['overall_avg'],
            'evaluation_type': 'best',
            # The actual max rating for each dimension:
            'coherence': coherence_best,
            'clarity': clarity_best,
            'hilarity': hilarity_best,
            'persuasiveness': persua_best,
            'template_appropriateness': template_best
        })

# Convert to DataFrame and save
df_results = pd.DataFrame(results)
df_results.to_csv('worst_best_memes_min_max.csv', index=False)

print("CSV saved: 'worst_best_memes_min_max.csv'")


CSV saved: 'worst_best_memes_min_max.csv'
