In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier

In [2]:

num_nominees = 9

np.random.choice(num_nominees, num_nominees, replace = False)

array([3, 5, 1, 0, 8, 4, 2, 6, 7])

In [4]:
full_table = pd.read_csv('./data/processed_results/osc_df')

# Setting Up our DataFrame

In [5]:
min_year = 1995

# Training Set - Excluding 2019
train = full_table.loc[((full_table['year'] < 2019) & (full_table['year'] > min_year))]
test_2019 = full_table.loc[(full_table['year'] == 2019)]

print('training set contains:', train.shape[0], 'movies')
print('Prediciting on:', test_2019.shape[0], 'movies')

training set contains: 154 movies
Prediciting on: 9 movies


In [6]:
# Identify predictors and response

full_predictors = ['year','nom_gg_drama', 'winner_gg_drama', 'nom_gg_comedy', 'winner_gg_comedy',
       'nom_pga', 'winner_pga', 'nom_bafta', 'winner_bafta', 'nom_dga', 'winner_dga',
        'nom_sag', 'winner_sag', 'nom_cannes', 'winner_cannes','Nominations']

# Simulating a Voter using a Decision Tree

In [7]:
voter1 = DecisionTreeClassifier(splitter='random',
                                max_depth=3,
                                min_samples_leaf=3,
                                random_state = 92)

In [140]:
def simulate_a_vote(model, train_df, to_predict_df, full_predictors):
    train = train_df.copy()
    test = to_predict_df.copy()
    
    # A noise column, randomly generated each time represents a voter's bias
    train.loc[:,'Noise'] = np.random.rand(train_df.shape[0])
    test.loc[:,'Noise'] = np.random.rand(to_predict_df.shape[0])

    # Looking at a random amount of awards shows (similar to bootstrapping) reflects a voter's attention to the season
    #num_features = np.random.choice(len(full_predictors))
    num_features = np.random.choice(int(len(full_predictors)/1.7))
    voter_features = list(np.random.choice(full_predictors, num_features)) + ['Noise']

    x = np.array(train[voter_features])
    y = np.array(train['Oscar_win'])
    
    model.fit(x,y)
    # ProbA of the voter will represent the ranked votes
    ballot_clean = model.predict_proba(np.array(test[voter_features]))[:,1]
    # Add small random values to break up ties
    ballot = ballot_clean + np.random.rand(len(ballot_clean))/10000
    
    # Use np.argsort() to rank the order of the probA
    # The Academy uses ranked votes calculate winner
    temp = ballot.argsort()
    ranks = np.empty_like(temp)
    ranks[temp] = np.arange(len(ballot))
    ranks = np.abs(ranks - len(ballot))
    return ranks

In [131]:
vote = simulate_a_vote(voter1, train, test_2019, full_predictors)
print("This voter's ballot looks like:",vote)
print("This means their first choice is:",list(test_2019.film)[np.argmin(vote)])

This voter's ballot looks like: [4 2 5 6 3 9 8 7 1]
This means their first choice is: Parasite (2019 film)


# Simulating the Entire Academy

In [98]:
def simulate_voting_body(num_voters, model, train_df, to_predict_df, full_predictors):
    collected_ballots = np.zeros((num_voters, to_predict_df.shape[0]))
    for i in range(num_voters):
        collected_ballots[i,:] = simulate_a_vote(model, train_df, to_predict_df, full_predictors)
    return collected_ballots

In [99]:
simulate_voting_body(5, voter1, train, test_2019, full_predictors)

array([[5., 8., 2., 4., 6., 7., 1., 3., 9.],
       [4., 6., 2., 5., 8., 7., 9., 3., 1.],
       [8., 4., 6., 1., 2., 9., 7., 3., 5.],
       [3., 4., 8., 6., 7., 5., 1., 9., 2.],
       [7., 5., 4., 8., 6., 9., 2., 3., 1.]])

In [108]:
def tally_votes(voting_body, list_of_nominees):
    # List of nominees must be in the same order as the vote index
    firsts = np.where(voting_body==1,1,0)
    tally = np.sum(firsts, axis = 0)
    tallied_votes_df = pd.DataFrame(tally, columns=['Votes']).T
    tallied_votes_df.columns = list_of_nominees
    return tallied_votes_df.T.sort_values('Votes', ascending = False)

In [134]:
this_academy = simulate_voting_body(1000, voter1, train, test_2019, full_predictors)
tally_votes(this_academy, list(test_2019.film))

Unnamed: 0,Votes
1917 (2019 film),390
Parasite (2019 film),209
Once Upon a Time in Hollywood,136
The Irishman,89
Joker (2019 film),49
Jojo Rabbit,43
Little Women (2019 film),32
Ford v Ferrari,27
Marriage Story,25


# Tiered Voting Changes
We start elimnating the least voted for film from the ballots and re_ranking the films

In [110]:
def remove_least(voting_body, list_of_nominees):
    # List of nominees must be in the same order as the vote index
    firsts = np.where(voting_body==1,1,0)
    tally = np.sum(firsts, axis = 0)
    least_votes_index = np.argmin(tally)
    # Removes the least voted entry (from # 1 to 0)
    voting_body = np.delete(voting_body, least_votes_index, axis = 1)
    list_of_nominees.remove(list_of_nominees[least_votes_index])
    return voting_body, list_of_nominees

In [111]:
def re_rank_ballots(voting_body):
    """
    Takes a voting body (numpy array)
    Makes sure each row goes from 1 to shape[1]
    """
    #array = test_row
    re_ranked = np.zeros(voting_body.shape)
    for i in range(voting_body.shape[0]):
        temp = voting_body[i,:].argsort()
        ranks = np.empty_like(temp)
        #print(ranks)
        ranks[temp] = np.arange(len(voting_body[i,:]))
        re_ranked[i,:] = ranks + 1
    return re_ranked

In [106]:
def run_one_round_of_eliminations(voting_body, list_of_nominees):
    voting_body, list_of_nominees = remove_least(voting_body, list_of_nominees)
    voting_body = re_rank_ballots(voting_body)
    return voting_body, list_of_nominees

In [139]:
new_votes, new_noms = run_one_round_of_eliminations(this_academy, list(test_2019.film))

print(len(new_noms), 'films remaining')
print('\nNew Standings:')
tally_votes(new_votes, new_noms)

8 films remaining

New Standings:


Unnamed: 0,Votes
1917 (2019 film),392
Parasite (2019 film),212
Once Upon a Time in Hollywood,138
The Irishman,97
Joker (2019 film),52
Jojo Rabbit,45
Little Women (2019 film),33
Ford v Ferrari,31


## Currently have to figure out how to re-rank every ballot

In [168]:
def run_preferential_voting(voting_body,list_of_nominees, show_steps = False):
    top_pick_percent = tally_votes(voting_body,list_of_nominees).max()[0]/tally_votes(voting_body,list_of_nominees).sum()[0]
    while top_pick_percent < 0.5:
        voting_body,list_of_nominees = run_one_round_of_eliminations(voting_body, list_of_nominees)
        top_pick_percent = tally_votes(voting_body,list_of_nominees).max()[0]/tally_votes(voting_body,list_of_nominees).sum()[0]    
        if show_steps:
            print(tally_votes(voting_body, list_of_nominees),'\n')
    return voting_body, list_of_nominees

In [145]:
tally_votes(new_votes, new_noms).max()[0]/tally_votes(new_votes, new_noms).sum()[0]

0.392

In [146]:
final_ballot, final_films = run_preferential_voting(this_academy, list(test_2019.film),True)
tally_votes(final_ballot, final_films)

                               Votes
1917 (2019 film)                 392
Parasite (2019 film)             212
Once Upon a Time in Hollywood    138
The Irishman                      97
Joker (2019 film)                 52
Jojo Rabbit                       45
Little Women (2019 film)          33
Ford v Ferrari                    31 

                               Votes
1917 (2019 film)                 398
Parasite (2019 film)             216
Once Upon a Time in Hollywood    141
The Irishman                     100
Joker (2019 film)                 56
Jojo Rabbit                       50
Little Women (2019 film)          39 

                               Votes
1917 (2019 film)                 400
Parasite (2019 film)             225
Once Upon a Time in Hollywood    149
The Irishman                     105
Joker (2019 film)                 66
Jojo Rabbit                       55 

                               Votes
1917 (2019 film)                 415
Parasite (2019 film)            

Unnamed: 0,Votes
1917 (2019 film),591
Parasite (2019 film),409


# Lets Simulate the Oscars!

In [170]:
min_year = 1995

# Training Set - Excluding 2019
train = full_table.loc[((full_table['year'] < 2019) & (full_table['year'] > min_year))]
test_2019 = full_table.loc[(full_table['year'] == 2019)]

print('training set contains:', train.shape[0], 'movies')
print('Prediciting on:', test_2019.shape[0], 'movies')

# Identify predictors and response

full_predictors = ['year','nom_gg_drama', 'winner_gg_drama', 'nom_gg_comedy', 'winner_gg_comedy',
       'nom_pga', 'winner_pga', 'nom_bafta', 'winner_bafta', 'nom_dga', 'winner_dga',
        'nom_sag', 'winner_sag', 'nom_cannes', 'winner_cannes','Nominations']

voter2 = DecisionTreeClassifier(splitter='random',
                                max_depth=3,
                                min_samples_leaf=3,
                                random_state = 92)

academy_sim = simulate_voting_body(num_voters=7000, model = voter2, train_df = train, to_predict_df = test_2019, full_predictors=full_predictors)

training set contains: 154 movies
Prediciting on: 9 movies


In [171]:
print('Initial Rankings:')
print(tally_votes(academy_sim, list(test_2019.film)),'\n')
final_ballot, final_films = run_preferential_voting(academy_sim, list(test_2019.film),True)

Initial Rankings:
                               Votes
1917 (2019 film)                3051
Parasite (2019 film)            1409
Once Upon a Time in Hollywood    772
The Irishman                     577
Joker (2019 film)                377
Jojo Rabbit                      304
Marriage Story                   191
Little Women (2019 film)         164
Ford v Ferrari                   155 

                               Votes
1917 (2019 film)                3068
Parasite (2019 film)            1426
Once Upon a Time in Hollywood    793
The Irishman                     599
Joker (2019 film)                397
Jojo Rabbit                      321
Marriage Story                   214
Little Women (2019 film)         182 

                               Votes
1917 (2019 film)                3090
Parasite (2019 film)            1449
Once Upon a Time in Hollywood    819
The Irishman                     626
Joker (2019 film)                431
Jojo Rabbit                      349
Marriage Story  