## Variance analysis
Matthew Sun, 11/18/2021

This notebook investigates the outcomes that occur when there is significant variance between users, in attributes _and_ arrival times. We then examine variance in outcomes - namely, MSE. 

Todo:
- Modify user attribute sampling strategy 
- Extend outcome metrics to look at other metrics (such utility, homogenization, a la Chaney)

In [2]:
import trecs
import numpy as np
from scipy.spatial.distance import pdist
from trecs.models import ContentFiltering, PopularityRecommender, ImplicitMF, SocialFiltering
from trecs.components import Users, Items, Creators, ActualUserScores
from trecs.metrics import InteractionSimilarity, Measurement, MSEMeasurement
from trecs.random import Generator
from collections import defaultdict
import os
import warnings
warnings.simplefilter("ignore")

In [3]:
arg_dict = {
    "num_attrs" : 10,
    "latent_factors" : 5,
    "maj_size" : 250, # 250 members of majority group
    "min_size" : 50, # 50 members of minority group
    "num_users" : 300,
    "num_items" : 200,
    "num_sims" : 10,
    "startup_iters" : 50,
    "sim_iters" : 100,
    "num_items_per_iter" : 10,
    "random_items_per_iter" : 3,
    "repeated_training" : True, # observing the results when systems are repeatedly trained
}
rng = Generator(seed=1234)

In [8]:
def sample_users_and_items(rng, num_maj_users, num_min_users, num_items, num_attrs, num_sims):
    # hardcoded distributions for majority / minority group users
    item_params = rng.dirichlet(np.ones(num_attrs) * 100, size=num_sims) * 0.1
    
    # each element in users is the users vector in one simulation
    users, items = [], []
    for sim_index in range(num_sims):
        # generate user preferences and item attributes
        maj_user_rep = rng.normal(1, 2, size=(num_maj_users, num_attrs))
        min_user_rep = rng.normal(0.5, 1.25, size=(num_min_users, num_attrs))
        actual_user_representation = np.vstack((maj_user_rep, min_user_rep))
        # remember, item attributes are (num_attr x num_items) in dimension
        item_attrs = rng.dirichlet(item_params[sim_index, :], size=num_items).T
        
        # add all synthetic data to list
        users.append(actual_user_representation)
        items.append(item_attrs)

    return users, items

def init_sim_state(arg_dict):
    # simpler way to pass common arguments to simulations
    init_params = {
        "num_items_per_iter": arg_dict["num_items_per_iter"],
    }

    run_params = {
        "random_items_per_iter": arg_dict["random_items_per_iter"],
    }

    return init_params, run_params

def run_content_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    content = ContentFiltering(
        num_attributes=args["num_attrs"],
        actual_item_representation=item_attrs,
        actual_user_representation=user_attrs)
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    content.add_metrics(*metrics)
    content.startup_and_train(timesteps=args["startup_iters"]) # update user representations, but only serve random items
    content.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    content.close() # end logging
    return content

def run_mf_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    mf = ImplicitMF(
        actual_item_representation=item_attrs,
        actual_user_representation=user_attrs,
        num_latent_factors=args["latent_factors"],
        **init_params
    )
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    mf.add_metrics(*metrics)
    mf.startup_and_train(timesteps=args["startup_iters"], no_new_items=False) # update user representations, but only serve random items
    mf.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], reset_interactions=False, **run_params)
    mf.close() # end logging
    return mf

def run_pop_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    p = PopularityRecommender(
        actual_item_representation=item_attrs,
        actual_user_representation=user_attrs,
        **init_params
    )
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    p.add_metrics(*metrics)
    p.startup_and_train(timesteps=args["startup_iters"])
    p.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    p.close() # end logging
    return p

In [5]:
# print majority / minority outcome stats
def majority_minority_outcomes(metric, split_index):
    maj_mean = metric.last_observation[:split_index].mean()
    maj_std = metric.last_observation[:split_index].std()

    min_mean = metric.last_observation[split_index:].mean()
    min_std = metric.last_observation[split_index:].std()

    print("Majority group statistics: ", maj_mean, "(mean), ", maj_std, "(std)")
    print("Minority group statistics: ", min_mean, "(mean), ", min_std, "(std)")
    print()


In [6]:
users, items = sample_users_and_items(
    rng, 
    arg_dict["maj_size"], 
    arg_dict["min_size"], 
    arg_dict["num_items"], 
    arg_dict["num_attrs"], 
    arg_dict["num_sims"]
)

for i in range(arg_dict["num_sims"]):
    filtering = run_content_sim(items[i], users[i], arg_dict, rng)
    mf = run_mf_sim(items[i], users[i], arg_dict, rng)
    pop = run_pop_sim(items[i], users[i], arg_dict, rng)
    
    print("-------------------------------------")
    print(f"TRIAL {i}")
    print("Content filtering:")
    
    majority_minority_outcomes(filtering.metrics[0], arg_dict["maj_size"])
    print("\nMF:")
    majority_minority_outcomes(mf.metrics[0], arg_dict["maj_size"])
    print("\nPopularity:")
    majority_minority_outcomes(pop.metrics[0], arg_dict["maj_size"])
    # run training procedures
    # measure mean + var of MSE for majority vs minority users
    # make some plots? with 1 sim should be doable
    # graph MSE for minority vs majority users?

100%|██████████| 50/50 [00:00<00:00, 207.53it/s]
100%|██████████| 100/100 [00:02<00:00, 44.71it/s]
100%|██████████| 50/50 [00:00<00:00, 173.50it/s]
Numba is using threading layer omp - consider TBB
BLAS using multiple threads - can cause oversubscription
found 2 potential runtime problems - see https://boi.st/lkpy-perf
100%|██████████| 100/100 [00:01<00:00, 50.67it/s]
100%|██████████| 50/50 [00:00<00:00, 224.01it/s]
100%|██████████| 100/100 [00:02<00:00, 49.58it/s]


-------------------------------------
TRIAL 0
Content filtering:
Majority group statistics:  0.7900147880258338 (mean),  0.23339412425317374 (std)
Minority group statistics:  0.843277501818771 (mean),  0.22019283054146258 (std)


MF:
Majority group statistics:  0.008373919387676848 (mean),  0.01117854803861663 (std)
Minority group statistics:  0.0065899323533620815 (mean),  0.007525811745618825 (std)


Popularity:
Majority group statistics:  5605.012910606906 (mean),  12.50233381731303 (std)
Minority group statistics:  5606.52799736373 (mean),  12.008710066620234 (std)



100%|██████████| 50/50 [00:00<00:00, 191.26it/s]
100%|██████████| 100/100 [00:02<00:00, 45.20it/s]
100%|██████████| 50/50 [00:00<00:00, 163.79it/s]
100%|██████████| 100/100 [00:02<00:00, 49.90it/s]
100%|██████████| 50/50 [00:00<00:00, 213.45it/s]
100%|██████████| 100/100 [00:01<00:00, 53.09it/s]


-------------------------------------
TRIAL 1
Content filtering:
Majority group statistics:  0.7862630343337955 (mean),  0.24053756977799115 (std)
Minority group statistics:  0.8071193043792507 (mean),  0.2732070553920995 (std)


MF:
Majority group statistics:  0.009143718815290126 (mean),  0.01258750208913354 (std)
Minority group statistics:  0.007308410520345365 (mean),  0.009862826805312936 (std)


Popularity:
Majority group statistics:  5604.6461093824655 (mean),  12.966189816114815 (std)
Minority group statistics:  5607.643695723745 (mean),  11.960820906679547 (std)



100%|██████████| 50/50 [00:00<00:00, 195.02it/s]
100%|██████████| 100/100 [00:02<00:00, 45.87it/s]
100%|██████████| 50/50 [00:00<00:00, 166.55it/s]
100%|██████████| 100/100 [00:02<00:00, 49.42it/s]
100%|██████████| 50/50 [00:00<00:00, 205.60it/s]
100%|██████████| 100/100 [00:01<00:00, 50.71it/s]


-------------------------------------
TRIAL 2
Content filtering:
Majority group statistics:  0.8144026363988576 (mean),  0.26117418353095473 (std)
Minority group statistics:  0.7595951962309779 (mean),  0.24927430894029193 (std)


MF:
Majority group statistics:  0.008519733083041837 (mean),  0.011320926515071954 (std)
Minority group statistics:  0.006204205746219996 (mean),  0.006406831252619985 (std)


Popularity:
Majority group statistics:  5605.625590536637 (mean),  13.044184047843256 (std)
Minority group statistics:  5607.251400545618 (mean),  10.917415377578287 (std)



100%|██████████| 50/50 [00:00<00:00, 187.32it/s]
100%|██████████| 100/100 [00:02<00:00, 44.23it/s]
100%|██████████| 50/50 [00:00<00:00, 175.83it/s]
100%|██████████| 100/100 [00:01<00:00, 50.28it/s]
100%|██████████| 50/50 [00:00<00:00, 198.77it/s]
100%|██████████| 100/100 [00:01<00:00, 51.14it/s]


-------------------------------------
TRIAL 3
Content filtering:
Majority group statistics:  0.7843489565403767 (mean),  0.2191847777846608 (std)
Minority group statistics:  0.861572680648605 (mean),  0.2630793670722092 (std)


MF:
Majority group statistics:  0.007244766784201685 (mean),  0.008317909615721585 (std)
Minority group statistics:  0.008249604886922248 (mean),  0.012669690070981314 (std)


Popularity:
Majority group statistics:  5603.686032956633 (mean),  11.476881522690343 (std)
Minority group statistics:  5611.306212294627 (mean),  12.183368230067996 (std)



100%|██████████| 50/50 [00:00<00:00, 174.56it/s]
100%|██████████| 100/100 [00:02<00:00, 45.39it/s]
100%|██████████| 50/50 [00:00<00:00, 167.04it/s]
100%|██████████| 100/100 [00:02<00:00, 49.67it/s]
100%|██████████| 50/50 [00:00<00:00, 209.96it/s]
100%|██████████| 100/100 [00:01<00:00, 50.63it/s]


-------------------------------------
TRIAL 4
Content filtering:
Majority group statistics:  0.8094015081427557 (mean),  0.28119342753069143 (std)
Minority group statistics:  0.8350186988901485 (mean),  0.2796573645389291 (std)


MF:
Majority group statistics:  0.0074493530010879055 (mean),  0.009649199218443491 (std)
Minority group statistics:  0.006678929997079412 (mean),  0.009876028737004864 (std)


Popularity:
Majority group statistics:  5604.725020619289 (mean),  12.127737699274194 (std)
Minority group statistics:  5607.859049234426 (mean),  11.313870582988077 (std)



100%|██████████| 50/50 [00:00<00:00, 185.40it/s]
100%|██████████| 100/100 [00:02<00:00, 45.64it/s]
100%|██████████| 50/50 [00:00<00:00, 164.52it/s]
100%|██████████| 100/100 [00:02<00:00, 48.28it/s]
100%|██████████| 50/50 [00:00<00:00, 214.54it/s]
100%|██████████| 100/100 [00:01<00:00, 50.52it/s]


-------------------------------------
TRIAL 5
Content filtering:
Majority group statistics:  0.7823034468384632 (mean),  0.24940065033263656 (std)
Minority group statistics:  0.8466018028668038 (mean),  0.30504492754688717 (std)


MF:
Majority group statistics:  0.007895322419812579 (mean),  0.01102359941439595 (std)
Minority group statistics:  0.008022724440383733 (mean),  0.011883179264104128 (std)


Popularity:
Majority group statistics:  5603.850841521601 (mean),  12.151262949952475 (std)
Minority group statistics:  5606.400255420362 (mean),  12.828172085605349 (std)



100%|██████████| 50/50 [00:00<00:00, 200.00it/s]
100%|██████████| 100/100 [00:02<00:00, 46.77it/s]
100%|██████████| 50/50 [00:00<00:00, 174.88it/s]
100%|██████████| 100/100 [00:02<00:00, 49.40it/s]
100%|██████████| 50/50 [00:00<00:00, 216.28it/s]
100%|██████████| 100/100 [00:01<00:00, 51.50it/s]


-------------------------------------
TRIAL 6
Content filtering:
Majority group statistics:  0.7856772991002965 (mean),  0.253044638024484 (std)
Minority group statistics:  0.7976929932724779 (mean),  0.23516361176202874 (std)


MF:
Majority group statistics:  0.009043801744694283 (mean),  0.010798378234229527 (std)
Minority group statistics:  0.007350791852268627 (mean),  0.009514399984462673 (std)


Popularity:
Majority group statistics:  5604.627582196374 (mean),  13.386285381666914 (std)
Minority group statistics:  5606.315312900324 (mean),  13.118540869056364 (std)



100%|██████████| 50/50 [00:00<00:00, 199.38it/s]
100%|██████████| 100/100 [00:02<00:00, 46.21it/s]
100%|██████████| 50/50 [00:00<00:00, 172.74it/s]
100%|██████████| 100/100 [00:02<00:00, 49.88it/s]
100%|██████████| 50/50 [00:00<00:00, 217.58it/s]
100%|██████████| 100/100 [00:01<00:00, 51.38it/s]


-------------------------------------
TRIAL 7
Content filtering:
Majority group statistics:  0.739421726939416 (mean),  0.23574937563153542 (std)
Minority group statistics:  0.7786128072192601 (mean),  0.26368704394561565 (std)


MF:
Majority group statistics:  0.009001280639115037 (mean),  0.010842769253355868 (std)
Minority group statistics:  0.011270347623671583 (mean),  0.012336311995395158 (std)


Popularity:
Majority group statistics:  5604.686205976731 (mean),  12.586617704178645 (std)
Minority group statistics:  5608.590629206986 (mean),  15.37286649508708 (std)



100%|██████████| 50/50 [00:00<00:00, 200.88it/s]
100%|██████████| 100/100 [00:02<00:00, 45.68it/s]
100%|██████████| 50/50 [00:00<00:00, 170.11it/s]
100%|██████████| 100/100 [00:01<00:00, 51.70it/s]
100%|██████████| 50/50 [00:00<00:00, 215.86it/s]
100%|██████████| 100/100 [00:01<00:00, 50.64it/s]


-------------------------------------
TRIAL 8
Content filtering:
Majority group statistics:  0.7427611512949214 (mean),  0.2562225872092572 (std)
Minority group statistics:  0.7878136149318345 (mean),  0.2423727290803879 (std)


MF:
Majority group statistics:  0.00937908037410072 (mean),  0.011914733179889767 (std)
Minority group statistics:  0.007437706560809934 (mean),  0.010293272210230986 (std)


Popularity:
Majority group statistics:  5603.236150570335 (mean),  13.024339627003242 (std)
Minority group statistics:  5610.007676196124 (mean),  12.122823321560343 (std)



100%|██████████| 50/50 [00:00<00:00, 196.71it/s]
100%|██████████| 100/100 [00:02<00:00, 42.44it/s]
100%|██████████| 50/50 [00:00<00:00, 159.23it/s]
100%|██████████| 100/100 [00:01<00:00, 51.55it/s]
100%|██████████| 50/50 [00:00<00:00, 210.83it/s]
100%|██████████| 100/100 [00:02<00:00, 49.39it/s]

-------------------------------------
TRIAL 9
Content filtering:
Majority group statistics:  0.8127406728241661 (mean),  0.23834359645572115 (std)
Minority group statistics:  0.8095781744322736 (mean),  0.288588057100509 (std)


MF:
Majority group statistics:  0.008148663743303967 (mean),  0.009239749228529192 (std)
Minority group statistics:  0.009516974549105689 (mean),  0.010317912822821988 (std)


Popularity:
Majority group statistics:  5603.715183434306 (mean),  11.856706098822631 (std)
Minority group statistics:  5604.633019114637 (mean),  13.061358997132862 (std)






## Variance in arrival times
Core idea: have two groups of users, one which arrives first in the system and is the majority group, and the latter of which comprises the minority group.

Procedure: startup and train with the earlier group for the cold start, then run for X iterations (training between steps), still with only the earlier group. Then, we add the latter minority, and run for X iterations (training between steps as well).

In [11]:
def run_content_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    init_users = user_attrs[:args["maj_size"], :]
    later_users = user_attrs[args["maj_size"]:, :]
    content = ContentFiltering(
        num_attributes=args["num_attrs"],
        actual_item_representation=item_attrs,
        actual_user_representation=init_users)
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    content.add_metrics(*metrics)
    content.startup_and_train(timesteps=args["startup_iters"]) # update user representations, but only serve random items
    content.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    content.add_users(later_users)
    content.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    content.close() # end logging
    return content

def run_mf_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    init_users = user_attrs[:args["maj_size"], :]
    later_users = user_attrs[args["maj_size"]:, :]
    mf = ImplicitMF(
        actual_item_representation=item_attrs,
        actual_user_representation=init_users,
        num_latent_factors=args["latent_factors"],
        **init_params
    )
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    mf.add_metrics(*metrics)
    mf.startup_and_train(timesteps=args["startup_iters"], no_new_items=False) # update user representations, but only serve random items
    mf.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], reset_interactions=False, **run_params)
    mf.add_users(later_users)
    mf.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    mf.close() # end logging
    return mf

def run_pop_sim(item_attrs, user_attrs, args, rng):
    init_params, run_params = init_sim_state(args)
    init_users = user_attrs[:args["maj_size"], :]
    later_users = user_attrs[args["maj_size"]:, :]
    p = PopularityRecommender(
        actual_item_representation=item_attrs,
        actual_user_representation=init_users,
        **init_params
    )
    metrics = [
        MSEMeasurement(diagnostics=True)
    ]
    p.add_metrics(*metrics)
    p.startup_and_train(timesteps=args["startup_iters"])
    p.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    p.add_users(later_users)
    p.run(timesteps=args["sim_iters"], train_between_steps=args["repeated_training"], **run_params)
    p.close() # end logging
    return p

In [12]:
users, items = sample_users_and_items(
    rng, 
    arg_dict["maj_size"], 
    arg_dict["min_size"], 
    arg_dict["num_items"], 
    arg_dict["num_attrs"], 
    arg_dict["num_sims"]
)

for i in range(arg_dict["num_sims"]):
    filtering = run_content_sim(items[i], users[i], arg_dict, rng)
    mf = run_mf_sim(items[i], users[i], arg_dict, rng)
    pop = run_pop_sim(items[i], users[i], arg_dict, rng)
    
    print("-------------------------------------")
    print(f"TRIAL {i}")
    print("Content filtering:")
    
    majority_minority_outcomes(filtering.metrics[0], arg_dict["maj_size"])
    print("\nMF:")
    majority_minority_outcomes(mf.metrics[0], arg_dict["maj_size"])
    print("\nPopularity:")
    majority_minority_outcomes(pop.metrics[0], arg_dict["maj_size"])
    # run training procedures
    # measure mean + var of MSE for majority vs minority users
    # make some plots? with 1 sim should be doable
    # graph MSE for minority vs majority users?

100%|██████████| 50/50 [00:00<00:00, 217.19it/s]
100%|██████████| 100/100 [00:01<00:00, 58.56it/s]
100%|██████████| 100/100 [00:02<00:00, 49.91it/s]
100%|██████████| 50/50 [00:00<00:00, 190.00it/s]
100%|██████████| 100/100 [00:01<00:00, 66.95it/s]
100%|██████████| 100/100 [00:01<00:00, 55.76it/s]
100%|██████████| 50/50 [00:00<00:00, 243.22it/s]
100%|██████████| 100/100 [00:01<00:00, 64.38it/s]
100%|██████████| 100/100 [00:01<00:00, 54.93it/s]


-------------------------------------
TRIAL 0
Content filtering:
Majority group statistics:  0.8175693134456963 (mean),  0.26986566897037184 (std)
Minority group statistics:  0.018752377010023323 (mean),  0.016077061845363037 (std)


MF:
Majority group statistics:  0.010894684579750116 (mean),  0.015443909024596024 (std)
Minority group statistics:  0.02577378981467936 (mean),  0.026996673386064925 (std)


Popularity:
Majority group statistics:  3888.6785564409743 (mean),  11.789507107873117 (std)
Minority group statistics:  35115.947594676 (mean),  31.787034291323767 (std)



100%|██████████| 50/50 [00:00<00:00, 225.34it/s]
100%|██████████| 100/100 [00:01<00:00, 56.71it/s]
100%|██████████| 100/100 [00:02<00:00, 49.57it/s]
100%|██████████| 50/50 [00:00<00:00, 196.00it/s]
100%|██████████| 100/100 [00:01<00:00, 64.73it/s]
100%|██████████| 100/100 [00:01<00:00, 56.15it/s]
100%|██████████| 50/50 [00:00<00:00, 219.08it/s]
100%|██████████| 100/100 [00:01<00:00, 64.25it/s]
100%|██████████| 100/100 [00:01<00:00, 52.42it/s]


-------------------------------------
TRIAL 1
Content filtering:
Majority group statistics:  0.7389389366683707 (mean),  0.24765237618338268 (std)
Minority group statistics:  0.022561653560975566 (mean),  0.01505374538116429 (std)


MF:
Majority group statistics:  0.0079625756485988 (mean),  0.009409944359327542 (std)
Minority group statistics:  0.0271092259786845 (mean),  0.02235644739650145 (std)


Popularity:
Majority group statistics:  3888.9622457363976 (mean),  10.451142152937907 (std)
Minority group statistics:  35105.152872453706 (mean),  23.640037737209013 (std)



100%|██████████| 50/50 [00:00<00:00, 221.60it/s]
100%|██████████| 100/100 [00:01<00:00, 57.37it/s]
100%|██████████| 100/100 [00:02<00:00, 48.73it/s]
100%|██████████| 50/50 [00:00<00:00, 201.99it/s]
100%|██████████| 100/100 [00:01<00:00, 66.44it/s]
100%|██████████| 100/100 [00:01<00:00, 57.77it/s]
100%|██████████| 50/50 [00:00<00:00, 258.74it/s]
100%|██████████| 100/100 [00:01<00:00, 61.87it/s]
100%|██████████| 100/100 [00:01<00:00, 52.90it/s]


-------------------------------------
TRIAL 2
Content filtering:
Majority group statistics:  0.7593375781769005 (mean),  0.24671997599808979 (std)
Minority group statistics:  0.018080584646758713 (mean),  0.017727809119181915 (std)


MF:
Majority group statistics:  0.008327759070673362 (mean),  0.012576834917553703 (std)
Minority group statistics:  0.019269078381531364 (mean),  0.01931254484905855 (std)


Popularity:
Majority group statistics:  3888.4076857462583 (mean),  10.173917916651337 (std)
Minority group statistics:  35120.923155223456 (mean),  35.947931945486225 (std)



100%|██████████| 50/50 [00:00<00:00, 207.16it/s]
100%|██████████| 100/100 [00:01<00:00, 58.38it/s]
100%|██████████| 100/100 [00:02<00:00, 47.48it/s]
100%|██████████| 50/50 [00:00<00:00, 195.56it/s]
100%|██████████| 100/100 [00:01<00:00, 66.07it/s]
100%|██████████| 100/100 [00:01<00:00, 56.59it/s]
100%|██████████| 50/50 [00:00<00:00, 252.36it/s]
100%|██████████| 100/100 [00:01<00:00, 64.85it/s]
100%|██████████| 100/100 [00:01<00:00, 52.58it/s]


-------------------------------------
TRIAL 3
Content filtering:
Majority group statistics:  0.7498697113706865 (mean),  0.2492107724584685 (std)
Minority group statistics:  0.023393546584559454 (mean),  0.020269689239596825 (std)


MF:
Majority group statistics:  0.00837979100778913 (mean),  0.009375555596475692 (std)
Minority group statistics:  0.02792024890177236 (mean),  0.030271873065931182 (std)


Popularity:
Majority group statistics:  3889.65530277342 (mean),  10.627936172771541 (std)
Minority group statistics:  35111.27166201169 (mean),  35.54257896122973 (std)



100%|██████████| 50/50 [00:00<00:00, 226.37it/s]
100%|██████████| 100/100 [00:01<00:00, 58.49it/s]
100%|██████████| 100/100 [00:02<00:00, 47.49it/s]
100%|██████████| 50/50 [00:00<00:00, 192.00it/s]
100%|██████████| 100/100 [00:01<00:00, 63.51it/s]
100%|██████████| 100/100 [00:01<00:00, 56.57it/s]
100%|██████████| 50/50 [00:00<00:00, 226.04it/s]
100%|██████████| 100/100 [00:01<00:00, 60.78it/s]
100%|██████████| 100/100 [00:01<00:00, 53.07it/s]


-------------------------------------
TRIAL 4
Content filtering:
Majority group statistics:  0.7996625536767835 (mean),  0.23567960523299825 (std)
Minority group statistics:  0.02151404718371934 (mean),  0.016985629657049604 (std)


MF:
Majority group statistics:  0.007773635617146819 (mean),  0.011302531657983181 (std)
Minority group statistics:  0.02719361926327977 (mean),  0.026959252946831505 (std)


Popularity:
Majority group statistics:  3889.2593919840724 (mean),  10.496888506989784 (std)
Minority group statistics:  35112.847611441364 (mean),  33.74685011444293 (std)



100%|██████████| 50/50 [00:00<00:00, 222.83it/s]
100%|██████████| 100/100 [00:01<00:00, 55.75it/s]
100%|██████████| 100/100 [00:02<00:00, 46.74it/s]
100%|██████████| 50/50 [00:00<00:00, 191.52it/s]
100%|██████████| 100/100 [00:01<00:00, 61.87it/s]
100%|██████████| 100/100 [00:01<00:00, 52.77it/s]
100%|██████████| 50/50 [00:00<00:00, 234.39it/s]
100%|██████████| 100/100 [00:01<00:00, 62.28it/s]
100%|██████████| 100/100 [00:01<00:00, 54.81it/s]


-------------------------------------
TRIAL 5
Content filtering:
Majority group statistics:  0.7717340105466213 (mean),  0.245759155103303 (std)
Minority group statistics:  0.019924584322905937 (mean),  0.017461629700908223 (std)


MF:
Majority group statistics:  0.009026328307299858 (mean),  0.01164850713503942 (std)
Minority group statistics:  0.022020332318015992 (mean),  0.026266193197887762 (std)


Popularity:
Majority group statistics:  3889.368003937659 (mean),  11.125610290713052 (std)
Minority group statistics:  35120.271098834164 (mean),  38.7956864694449 (std)



100%|██████████| 50/50 [00:00<00:00, 195.22it/s]
100%|██████████| 100/100 [00:01<00:00, 57.32it/s]
100%|██████████| 100/100 [00:02<00:00, 47.28it/s]
100%|██████████| 50/50 [00:00<00:00, 193.26it/s]
100%|██████████| 100/100 [00:01<00:00, 64.28it/s]
100%|██████████| 100/100 [00:01<00:00, 57.00it/s]
100%|██████████| 50/50 [00:00<00:00, 248.24it/s]
100%|██████████| 100/100 [00:01<00:00, 60.29it/s]
100%|██████████| 100/100 [00:01<00:00, 53.41it/s]


-------------------------------------
TRIAL 6
Content filtering:
Majority group statistics:  0.8266503087513033 (mean),  0.23568682781191314 (std)
Minority group statistics:  0.018836427539406276 (mean),  0.015255494469627442 (std)


MF:
Majority group statistics:  0.0066502834447969734 (mean),  0.00781512774847951 (std)
Minority group statistics:  0.021966598821302558 (mean),  0.021516453693802187 (std)


Popularity:
Majority group statistics:  3888.953086755097 (mean),  9.4077437350147 (std)
Minority group statistics:  35116.82429794373 (mean),  33.04793435114072 (std)



100%|██████████| 50/50 [00:00<00:00, 212.44it/s]
100%|██████████| 100/100 [00:01<00:00, 54.99it/s]
100%|██████████| 100/100 [00:02<00:00, 48.02it/s]
100%|██████████| 50/50 [00:00<00:00, 185.40it/s]
100%|██████████| 100/100 [00:01<00:00, 61.81it/s]
100%|██████████| 100/100 [00:01<00:00, 54.97it/s]
100%|██████████| 50/50 [00:00<00:00, 237.80it/s]
100%|██████████| 100/100 [00:01<00:00, 60.82it/s]
100%|██████████| 100/100 [00:01<00:00, 54.45it/s]


-------------------------------------
TRIAL 7
Content filtering:
Majority group statistics:  0.765469548212102 (mean),  0.24911540357436135 (std)
Minority group statistics:  0.022566620732384565 (mean),  0.018590495791931304 (std)


MF:
Majority group statistics:  0.009324526276256049 (mean),  0.011858002043561439 (std)
Minority group statistics:  0.024344459525835065 (mean),  0.024923526244326217 (std)


Popularity:
Majority group statistics:  3889.4099403263003 (mean),  11.035373224807104 (std)
Minority group statistics:  35113.260977089594 (mean),  36.36548578374866 (std)



100%|██████████| 50/50 [00:00<00:00, 220.34it/s]
100%|██████████| 100/100 [00:01<00:00, 57.43it/s]
100%|██████████| 100/100 [00:02<00:00, 49.37it/s]
100%|██████████| 50/50 [00:00<00:00, 195.01it/s]
100%|██████████| 100/100 [00:01<00:00, 64.77it/s]
100%|██████████| 100/100 [00:01<00:00, 56.00it/s]
100%|██████████| 50/50 [00:00<00:00, 243.09it/s]
100%|██████████| 100/100 [00:01<00:00, 62.90it/s]
100%|██████████| 100/100 [00:01<00:00, 54.08it/s]


-------------------------------------
TRIAL 8
Content filtering:
Majority group statistics:  0.8425072663141834 (mean),  0.2538256198209568 (std)
Minority group statistics:  0.02210696284019035 (mean),  0.0196541716673939 (std)


MF:
Majority group statistics:  0.007364931801430612 (mean),  0.008962286998283782 (std)
Minority group statistics:  0.02327550594207846 (mean),  0.027121868697399406 (std)


Popularity:
Majority group statistics:  3889.300218006038 (mean),  9.953986350492675 (std)
Minority group statistics:  35115.276252070995 (mean),  37.774927496511445 (std)



100%|██████████| 50/50 [00:00<00:00, 222.83it/s]
100%|██████████| 100/100 [00:01<00:00, 57.08it/s]
100%|██████████| 100/100 [00:02<00:00, 47.80it/s]
100%|██████████| 50/50 [00:00<00:00, 196.14it/s]
100%|██████████| 100/100 [00:01<00:00, 62.42it/s]
100%|██████████| 100/100 [00:01<00:00, 53.55it/s]
100%|██████████| 50/50 [00:00<00:00, 240.34it/s]
100%|██████████| 100/100 [00:01<00:00, 57.88it/s]
100%|██████████| 100/100 [00:02<00:00, 48.22it/s]

-------------------------------------
TRIAL 9
Content filtering:
Majority group statistics:  0.7405658557879112 (mean),  0.2521275207747069 (std)
Minority group statistics:  0.02358580877292491 (mean),  0.020274298000252208 (std)


MF:
Majority group statistics:  0.007807476818130836 (mean),  0.010408111861186575 (std)
Minority group statistics:  0.027087816940364412 (mean),  0.030979792374611965 (std)


Popularity:
Majority group statistics:  3888.8491023323454 (mean),  10.499330832584945 (std)
Minority group statistics:  35112.66418224835 (mean),  37.604501069805316 (std)




