In [22]:
import pandas
from tqdm import tqdm
from itertools import permutations
from collections import defaultdict

# Frequencies

In [35]:
tqdm.pandas()

In [2]:
df = pandas.read_csv("../ensembles/prec50/results.csv")

In [51]:
groups = ["HISP", "NH_WHITE", "NH_BLACK", "NH_ASIAN"]
abbreviations = ["H", "W", "B", "A"]

In [32]:
def number_over_threshold(data, threshold):
    return data[groups].apply(lambda x: (x / data["TOTPOP"]) > threshold).sum(axis=1)

In [69]:
mapping = {
    "".join(str(ranking.index(initial) + 1) for initial in abbreviations): "".join(ranking)
    for ranking in permutations(abbreviations)
}

In [74]:
def encoded_rankings(df):
    ranked = df[groups].rank(ascending=False, axis=1)
    encoded = ranked.astype(int).astype(str).sum(axis=1).astype(int).astype(str)
    return encoded.map(mapping)

In [124]:
all_orderings = list(mapping.values())

In [123]:
def case(df, m_or_p, number_over, threshold):
    return df[(df["maj_or_plur"] == m_or_p) & (df[threshold] == number_over)].groupby("ranking").size()

In [125]:
def frequencies(df, threshold):
    return pandas.DataFrame({
        (str(i) + m_or_p): case(df, m_or_p, i, threshold) for i, m_or_p in [(1, "M"),(1, "P"),(2, "M"),(2, "P"),(3, "M"),(3, "P")]
    }).reindex(all_orderings).fillna(0)

In [136]:
def process(df, ensemble_name):
    df["over_quarter"] = number_over_threshold(df, 0.25)
    df["over_sixth"] = number_over_threshold(df, 1/6)
    df["majority"] = number_over_threshold(df, 0.5) > 0
    df["maj_or_plur"] = df["majority"].map({True: "M", False: "P"})
    df["ranking"] = encoded_rankings(df)
    df["ranking"].value_counts().to_csv(f"./frequencies/{ensemble_name}/orderings.csv", header=True)
    frequencies(df, "over_sixth").to_csv(f"./frequencies/{ensemble_name}/over_sixth.csv", header=True)
    frequencies(df, "over_quarter").to_csv(f"./frequencies/{ensemble_name}/over_quarter.csv", header=True)

In [137]:
df_10xM = pandas.read_csv("../ensembles/prec10/results.csv")

In [138]:
df_10xM_CA = pandas.read_csv("../ensembles/ca10/results.csv")

In [139]:
process(df_10xM, "10xM")


In [140]:
process(df_10xM_CA, "10xM_CA")

## How often is each group 1st, 2nd, 3rd or 4th?

In [87]:
for group in ["H", "W", "B", "A"]:
    print("\n{}".format(group))
    for position in [0,1,2,3]:
        pct = sum(
            row.number
            for row in orderings.itertuples()
            if row.ordering[position] == group
        ) / orderings.number.sum()
        print("Position {}: {}".format(position+1, pct))


H
Position 1: 0.2834178
Position 2: 0.4706782
Position 3: 0.1432792
Position 4: 0.1026248

W
Position 1: 0.404036
Position 2: 0.2480614
Position 3: 0.3471596
Position 4: 0.000743

B
Position 1: 0.3064708
Position 2: 0.208257
Position 3: 0.2073134
Position 4: 0.2779588

A
Position 1: 0.0060754
Position 2: 0.0730034
Position 3: 0.3022478
Position 4: 0.6186734


---

# Expectations


In [3]:
orderings = pandas.read_csv("./frequencies/50x1/orderings.csv")

In [5]:
orderings.columns = ["ordering", "number"]

In [34]:
def first_wins(ordering):
    yield (ordering[0], 1)

In [35]:
def top_two_equal(ordering):
    first, second, *rest = ordering
    yield (first, 1/2)
    yield (second, 1/2)

In [36]:
def top_three_equal(ordering):
    first, second, third, *rest = ordering
    yield (first, 1/3)
    yield (second, 1/3)
    yield (third, 1/3)

In [48]:
def expected_wins(orderings, compute_wins=first_wins):
    total_wins = {'A': 0, 'H': 0, 'B': 0, 'W': 0}

    for row in orderings.itertuples():
        for group, wins in compute_wins(row.ordering):
            total_wins[group] += wins * row.number
    
    total_wards = orderings.number.sum()
    return {group: wins/total_wards for group, wins in total_wins.items()}

In [73]:
results = pandas.DataFrame({
    "top_one_wins": expected_wins(orderings, compute_wins=first_wins),
    "top_two_equal": expected_wins(orderings, compute_wins=top_two_equal),
    "top_three_equal": expected_wins(orderings, compute_wins=top_three_equal),
})

In [44]:
# 10x5 cases

def top_one_wins_five(ordering):
    yield (ordering[0], 5)

def three_two(ordering):
    yield (ordering[0], 3)
    yield (ordering[1], 2)

def two_two_one(ordering):
    yield (ordering[0], 2)
    yield (ordering[1], 2)
    yield (ordering[2], 1)
    
# 10x3 cases

def top_one_wins_three(ordering):
    yield (ordering[0], 3)

def two_one(ordering):
    yield (ordering[0], 2)
    yield (ordering[1], 1)

def one_one_one(ordering):
    yield (ordering[0], 1)
    yield (ordering[1], 1)
    yield (ordering[2], 1)
    

In [58]:
orderings10xM = pandas.read_csv("./frequencies/10xM/orderings.csv")
orderings10xM.columns = ['ordering', 'number']
results10x5 = pandas.DataFrame({
    "top_one_wins_five": expected_wins(orderings10xM, compute_wins=top_one_wins_five),
    "three_two": expected_wins(orderings10xM, compute_wins=three_two),
    "two_two_one": expected_wins(orderings10xM, compute_wins=two_two_one)
})


In [59]:
results10x3 = pandas.DataFrame({
    "top_one_wins_three": expected_wins(orderings10xM, compute_wins=top_one_wins_three),
    "two_one": expected_wins(orderings10xM, compute_wins=two_one),
    "one_one_one": expected_wins(orderings10xM, compute_wins=one_one_one)
})


In [74]:
results *= 50
# results10x3 *= 10
# results10x5 *= 10

In [76]:
results10x3.to_csv("./expectations_10x3.csv")
results10x5.to_csv("./expectations_10x5.csv")
results.to_csv("./expectations_50x1.csv")

In [75]:
results

Unnamed: 0,top_one_wins,top_two_equal,top_three_equal
A,0.30377,1.97697,6.355443
B,15.32354,12.868195,12.03402
H,14.17089,18.8524,14.956253
W,20.2018,16.302435,16.654283


In [70]:
results10x5

Unnamed: 0,top_one_wins_five,three_two,two_two_one
A,0.0,0.42184,2.64339
B,17.58505,14.04715,13.30003
H,12.9633,18.36052,17.42098
W,19.45165,17.17049,16.6356


In [71]:
results10x3

Unnamed: 0,top_one_wins_three,two_one,one_one_one
A,0.0,0.21092,2.43247
B,10.55103,8.78208,8.03496
H,7.77798,10.47659,9.53705
W,11.67099,10.53041,9.99552
