In [2]:
import pandas as pd
import os


In [3]:

# df.columns = Index(['Unnamed: 0', 'group1', 'group2', 'num_group1', 'num_group2', 'preference_group1', 'preference_group2', 'preference_diff'],


def determine_winner(file_name: str):
    df = pd.read_csv(file_name)

    pref_2 = (df['preference_diff'] > 0).sum() # group 2 is preferred 
    pref_1 = (df['preference_diff'] < 0).sum() # group 1 is preferred 

    group1_name = df['group1'].iloc[0]
    group2_name = df['group2'].iloc[0]
    
    print(f"Number of times {group1_name} (group 1) preferred in preference_diff: {pref_1}")
    print(f"Number of times {group2_name} (group 2) preferred in preference_diff: {pref_2}")
    
    group1_larger = (df['num_group1'] > df['num_group2']).sum() 
    group2_larger = (df['num_group1'] < df['num_group2']).sum()
    equal_groups = (df['num_group1'] == df['num_group2']).sum()

    print(f"\nNumber of times {group1_name} count > {group2_name} count: {group1_larger}")
    print(f"Number of times {group1_name} count < {group2_name} count: {group2_larger}")
    print(f"Number of times {group1_name} count = {group2_name} count: {equal_groups}")

    group_1_score = pref_1 - group1_larger
    group_2_score = pref_2 - group2_larger

    if group_1_score > group_2_score:
        print(f"\n{group1_name} (group 1) wins with score: {group_1_score} vs {group_2_score}")
        return 1
    elif group_2_score > group_1_score:
        print(f"\n{group2_name} (group 2) wins with score: {group_2_score} vs {group_1_score}")
        return 2
    else:
        print(f"\nTie between {group1_name} and {group2_name} with scores: {group_1_score}")
        return 0


In [5]:
determine_winner("outputs/voting/nigerians_mexicans_voting.csv")

Number of times nigerians (group 1) preferred in preference_diff: 4
Number of times mexicans (group 2) preferred in preference_diff: 46

Number of times nigerians count > mexicans count: 25
Number of times nigerians count < mexicans count: 25
Number of times nigerians count = mexicans count: 0

mexicans (group 2) wins with score: 21 vs -21


2

In [6]:

def calculate_elo_ratings(k_factor=32, initial_rating=1000, directory="trolley"):
    """
    Calculate ELO ratings for nationalities based on pairwise comparisons.
    
    Args:
        k_factor: ELO k-factor (how much ratings change per game)
        initial_rating: Starting rating for all nationalities
    
    Returns:
        dict: Nationality -> ELO rating
    """
    import re
    
    # Initialize ELO ratings dictionary
    elo_ratings = {}
    
    # Get all trolley files except the combined one
    csv_files = [f for f in os.listdir(f"outputs/{directory}") if f.endswith(f'{directory}.csv') and f != f"all_data_{directory}.csv"]
    
    # Extract nationality pairs and initialize ratings
    for file in csv_files:
        # Extract nationalities from filename (assuming format like "americans_nigerians_trolley.csv")
        match = re.match(rf'([^_]+)_([^_]+)_{directory}\.csv', file)
        if match:
            nat1, nat2 = match.groups()
            if nat1 not in elo_ratings:
                elo_ratings[nat1] = initial_rating
            if nat2 not in elo_ratings:
                elo_ratings[nat2] = initial_rating
    print("Initial ELO ratings:")
    for nat, rating in sorted(elo_ratings.items()):
        print(f"{nat}: {rating}")
    print()
    
    for file in csv_files:
        match = re.match(rf'([^_]+)_([^_]+)_{directory}\.csv', file)
        if match:
            nat1, nat2 = match.groups()
            
            print(f"Processing {file}...")
            winner = determine_winner(file)
            
            # Get current ratings
            rating1 = elo_ratings[nat1]
            rating2 = elo_ratings[nat2]
            
            # Calculate expected scores
            expected1 = 1 / (1 + 10**((rating2 - rating1) / 400))
            expected2 = 1 / (1 + 10**((rating1 - rating2) / 400))
            
            # Determine actual scores based on winner
            if winner == 1:  # nat1 wins
                actual1, actual2 = 1, 0
            elif winner == 2:  # nat2 wins
                actual1, actual2 = 0, 1
            else:  # tie
                actual1, actual2 = 0.5, 0.5
            
            # Update ratings
            new_rating1 = rating1 + k_factor * (actual1 - expected1)
            new_rating2 = rating2 + k_factor * (actual2 - expected2)
            
            elo_ratings[nat1] = new_rating1
            elo_ratings[nat2] = new_rating2
            
            print(f"  {nat1}: {rating1:.1f} -> {new_rating1:.1f}")
            print(f"  {nat2}: {rating2:.1f} -> {new_rating2:.1f}")
            print()
    

    csv_data = []
    for nationality, rating in elo_ratings.items():
        csv_data.append({
            'group': nationality,
            'rating': rating,
            'scenario': directory
        })
    
    # Create DataFrame and save to CSV
    df = pd.DataFrame(csv_data)
    csv_filename = f'{directory}.csv'
    df.to_csv(csv_filename, index=False)
    print(f"ELO ratings saved to {csv_filename}")
    return elo_ratings


In [7]:

# Calculate ELO ratings
final_elo_ratings = calculate_elo_ratings()


Initial ELO ratings:
americans: 1000
brits: 1000
canadians: 1000
chinese: 1000
filipinos: 1000
french: 1000
germans: 1000
indians: 1000
indonesians: 1000
iranians: 1000
israelis: 1000
japanese: 1000
mexicans: 1000
nigerians: 1000
palestinians: 1000
russians: 1000
ukrainians: 1000

Processing americans_brits_trolley.csv...


FileNotFoundError: [Errno 2] No such file or directory: 'americans_brits_trolley.csv'

In [9]:
final_elo_ratings

{'americans': 1120.5551762676712,
 'brits': 976.8080274823873,
 'canadians': 1031.2093603780886,
 'chinese': 876.8902560267295,
 'french': 945.9616575564396,
 'germans': 1099.0683862070473,
 'indians': 1045.6006322117275,
 'japanese': 956.5154450330125,
 'mexicans': 919.0649144520585,
 'nigerians': 1028.3261443848378}

In [5]:
print(determine_winner("americans_nigerians_trolley.csv"))

Number of times americans (group 1) preferred in preference_diff: 24
Number of times nigerians (group 2) preferred in preference_diff: 26

Number of times americans count > nigerians count: 26
Number of times americans count < nigerians count: 24
Number of times americans count = nigerians count: 0

nigerians (group 2) wins with score: 2 vs -2
2


In [6]:

trolley_files = [f for f in os.listdir() if f.endswith('trolley.csv')]
print("Files ending with 'trolley.csv':")
for file in trolley_files:
    
    print(f"- {file}")


Files ending with 'trolley.csv':
- all_data_trolley.csv
- americans_brits_trolley.csv
- americans_canadians_trolley.csv
- americans_chinese_trolley.csv
- americans_french_trolley.csv
- americans_germans_trolley.csv
- americans_indians_trolley.csv
- americans_japanese_trolley.csv
- americans_mexicans_trolley.csv
- americans_nigerians_trolley.csv
- brits_canadians_trolley.csv
- brits_mexicans_trolley.csv
- canadians_mexicans_trolley.csv
- chinese_brits_trolley.csv
- chinese_canadians_trolley.csv
- chinese_french_trolley.csv
- chinese_germans_trolley.csv
- chinese_indians_trolley.csv
- chinese_japanese_trolley.csv
- chinese_mexicans_trolley.csv
- french_brits_trolley.csv
- french_canadians_trolley.csv
- french_germans_trolley.csv
- french_mexicans_trolley.csv
- germans_brits_trolley.csv
- germans_canadians_trolley.csv
- germans_mexicans_trolley.csv
- indians_brits_trolley.csv
- indians_canadians_trolley.csv
- indians_french_trolley.csv
- indians_germans_trolley.csv
- indians_japanese_trol