In [1]:
import numpy as np
import csv
import pandas as pd
import math

# Euclidean Distance Correlation Matrix

Generate a 53x53 matrix that stores the distances between each state for correlation
Generate a 1x53 vector containing the widths of the kernel based on the k closest states
Generate a 53x53 matrix representing the correlation between any two states

In [2]:
def gen_distances(state_demographics):
    dist_array = np.empty([53,53])
    for i in range(len(state_demographics)):
        for j in range(len(state_demographics)):
            dist_array[i,j]= np.linalg.norm(np.array(state_demographics[i])-np.array(state_demographics[j]))
    return dist_array

In [3]:
def gamma(dist_array,k):
    gamma = []
    for state in dist_array:
        k_shortest = np.argpartition(state, k)
        gamma.append(-1/(2*(np.linalg.norm(state[k_shortest[1:k+1]])))**2)    
    return gamma

In [4]:
def rbf_kernel(dist_array, gamma):
    correlation_matrix = np.empty([53,53])
    for i in range(len(dist_array)):
        for j in range(len(dist_array)):
            if i == j:
                correlation_matrix[i,j] = 0
            else:
                if gamma[i] == -math.inf:
                    correlation_matrix[i,j] = 0
                else:
                    correlation_matrix[i,j] = np.exp(gamma[i]*(dist_array[i,j]**2))
    return correlation_matrix


# Seeding State Shifts, but BETTER!
generates arbitrary number of seeds

In [5]:
def run_seeding_state_shifts(state_info, correlation_matrix, num_simulations, num_seeds):

    electoral_college_outcomes = [0 for x in range(539)]
    for sim in range(num_simulations):
        seed = []
        for i in range(num_seeds):
            seed.append(np.random.randint(53))

        seeded_value = [(np.random.normal(state_info[seed[x]][1],(state_info[seed[x]][2])**0.5,1)-state_info[seed[x]][1]) for x in range(num_seeds)]
        state_outcomes = [0 for x in range(53)]
        electoral_counts = [0,0]
        
        
        for i in range(53):
            shift = 0
            weight_sum = 0
            for x in range(num_seeds):
                shift += correlation_matrix[seed[x]][i]*(seeded_value[x])
                if num_seeds > 1:
                    weight_sum += correlation_matrix[seed[x]][i]
                else:
                    weight_sum = 1
            shift /= weight_sum
            
            state_outcomes[i] = np.random.normal(state_info[i][1]+shift,(state_info[i][2])**0.5,1)

        for i in range(len(state_outcomes)):
            if state_outcomes[i]>0.5:
                electoral_counts[0] += state_info[i][3]
                state_info[i][4]+=1

            elif state_outcomes[i]<0.5:
                electoral_counts[1] += state_info[i][3]
                state_info[i][5]+=1

        if electoral_counts[0]>=270:
            outcome_counts[0]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        elif electoral_counts[1]>=270:
            outcome_counts[1]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        else:
            outcome_counts[2]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
  

    for i in state_info:
        i[6] = float(i[4])/num_simulations
        i[7] = float(i[5])/num_simulations
 

In [23]:
def run_seeding_fixed_results_with_variation(state_info, correlation_matrix, num_simulations, seed,outcome):
    num_seeds = len(seed)
    electoral_college_outcomes = [0 for x in range(539)]
    for sim in range(num_simulations):
        
        seeded_value = [(np.random.normal(outcome[x],state_info[seed[x]][2]**0.5,1) - outcome[x]) for x in range(num_seeds)]
        state_outcomes = [0 for x in range(53)]
        electoral_counts = [0,0]
        
        
        for i in range(53):
            shift = 0
            weight_sum = 0
            for x in range(num_seeds):
                shift += correlation_matrix[seed[x]][i]*(seeded_value[x])
                if num_seeds > 1:
                    weight_sum += correlation_matrix[seed[x]][i]
                else:
                    weight_sum = 1
            shift /= weight_sum
            
            state_outcomes[i] = np.random.normal(state_info[i][1]+shift,(state_info[i][2])**0.5,1)

        for i in range(len(state_outcomes)):
            if state_outcomes[i]>0.5:
                electoral_counts[0] += state_info[i][3]
                state_info[i][4]+=1

            elif state_outcomes[i]<0.5:
                electoral_counts[1] += state_info[i][3]
                state_info[i][5]+=1

        if electoral_counts[0]>=270:
            outcome_counts[0]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        elif electoral_counts[1]>=270:
            outcome_counts[1]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        else:
            outcome_counts[2]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
  

    for i in state_info:
        i[6] = float(i[4])/num_simulations
        i[7] = float(i[5])/num_simulations
 

In [27]:
def run_seeding_fixed_results(state_info, correlation_matrix, num_simulations, seed,outcome):
    num_seeds = len(seed)
    electoral_college_outcomes = [0 for x in range(539)]
    for sim in range(num_simulations):
        
        seeded_value = [(outcome[x] - state_info[seed[x]][1]) for x in range(num_seeds)]
        state_outcomes = [0 for x in range(53)]
        electoral_counts = [0,0]
        
        
        for i in range(53):
            shift = 0
            weight_sum = 0
            for x in range(num_seeds):
                shift += correlation_matrix[seed[x]][i]*(seeded_value[x])
                if num_seeds > 1:
                    weight_sum += correlation_matrix[seed[x]][i]
                else:
                    weight_sum = 1
            shift /= weight_sum
            
            state_outcomes[i] = np.random.normal(state_info[i][1]+shift,(state_info[i][2])**0.5,1)

        for i in range(len(state_outcomes)):
            if state_outcomes[i]>0.5:
                electoral_counts[0] += state_info[i][3]
                state_info[i][4]+=1

            elif state_outcomes[i]<0.5:
                electoral_counts[1] += state_info[i][3]
                state_info[i][5]+=1

        if electoral_counts[0]>=270:
            outcome_counts[0]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        elif electoral_counts[1]>=270:
            outcome_counts[1]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
        else:
            outcome_counts[2]+=1
            electoral_college_outcomes[electoral_counts[0]]+=1
  

    for i in state_info:
        i[6] = float(i[4])/num_simulations
        i[7] = float(i[5])/num_simulations

In [6]:
def run_adding_after_the_fact(state_info, correlation_matrix, num_simulations):
    for sim in range(num_simulations):
        distance = []
        state_outcomes = [0 for x in range(53)]
        electoral_outcome = [0,0]
        for i in range(53):
            state_outcomes[i] = np.random.normal(state_info[i][1], (state_info[i][2])**0.5,1)
            distance.append((float(state_outcomes[i])-state_info[i][1]))

        shifts = correlation_matrix.dot(distance)

        for i in range(53):

            if state_outcomes[i]+(shifts[i]/np.sum(correlation_matrix[i]))>0.5:
                electoral_outcome[0]+=state_info[i][3]
                state_info[i][4]+=1

            else:
                electoral_outcome[1]+=state_info[i][3]
                state_info[i][5]+=1

        if electoral_outcome[0]>=270:
            outcome_counts[0]+=1
            electoral_college_outcomes[electoral_outcome[0]]+=1
        elif electoral_outcome[1]>=270:
            outcome_counts[1]+=1
            electoral_college_outcomes[electoral_outcome[0]]+=1
        else:
            outcome_counts[2]+=1
            electoral_college_outcomes[electoral_outcome[0]]+=1


    for i in state_info:

        i[6] = float(i[4])/num_simulations
        i[7] = float(i[5])/num_simulations

        print(i)

In [6]:
df=pd.read_csv('AveragedPolls - 2020-10-21.csv',float_precision='round_trip')
pd.set_option("display.precision",15)
demdf = pd.read_csv('Standardized_State_priors.csv',float_precision='round_trip')
pd.set_option("display.precision",15)

In [34]:
demographics = []
state_info = []
outcome_counts = [0,0,0]
electoral_college_outcomes = [x for x in range(539)]
for n in range(53): 
    demographics.append([demdf.iloc[n,demdf.columns.get_loc('whitenonHispanicPercent')],demdf.iloc[n,demdf.columns.get_loc('BlackPercent')],demdf.iloc[n,demdf.columns.get_loc('HispanicPercent')],demdf.iloc[n,demdf.columns.get_loc('nonreligiousPercent')],demdf.iloc[n,demdf.columns.get_loc('medianage')],demdf.iloc[n,demdf.columns.get_loc('Urbanicity')],demdf.iloc[n,demdf.columns.get_loc('Percentcollegedegree')]])
    state_info.append([df.iloc[n,df.columns.get_loc('StateName')], df.iloc[n,df.columns.get_loc('StateMean')], df.iloc[n,df.columns.get_loc('Variance')], df.iloc[n,df.columns.get_loc('ElectoralVotes')], 0,0, 0, 0])
    print(n, end = ". ")
    print(state_info[n])

   

    
    

0. ['Alabama', 0.619323510705329, 0.0053533205785766, 9, 0, 0, 0, 0]
1. ['Alaska', 0.520787233901613, 0.00401347051032681, 3, 0, 0, 0, 0]
2. ['Arizona', 0.481754368867197, 0.0047620412357882, 11, 0, 0, 0, 0]
3. ['Arkansas', 0.66909680263556, 0.00506979446206795, 6, 0, 0, 0, 0]
4. ['California', 0.34176597626492, 0.00238142673823776, 55, 0, 0, 0, 0]
5. ['Colorado', 0.441508063802729, 0.00178425013473564, 9, 0, 0, 0, 0]
6. ['Connecticut', 0.41484746012372, 0.00343173142983656, 7, 0, 0, 0, 0]
7. ['Delaware', 0.443293312927267, 0.00400173142983656, 3, 0, 0, 0, 0]
8. ['District of Columbia', 0.110265808256422, 0.00394173142983656, 3, 0, 0, 0, 0]
9. ['Florida', 0.483662379886877, 0.00456192493440979, 29, 0, 0, 0, 0]
10. ['Georgia', 0.497072195657572, 0.00475020285371373, 16, 0, 0, 0, 0]
11. ['Hawaii', 0.349475442176937, 0.00385221895995009, 4, 0, 0, 0, 0]
12. ['Idaho', 0.608204342235883, 0.00394165735576249, 4, 0, 0, 0, 0]
13. ['Illinois', 0.430976627030397, 0.00329014466368014, 20, 0, 0, 0,

In [35]:

distances = gen_distances(demographics)
gammas = gamma(distances, 2)
correlation_matrix = rbf_kernel(distances, gammas)


In [36]:
num_simulations = 10000
num_seeds = 1
states = [45]
fixed_result = [0.49]
#run_seeding_state_shifts(state_info, correlation_matrix, num_simulations, num_seeds)
run_seeding_fixed_results_with_variation(state_info, correlation_matrix, num_simulations, states, fixed_result)

In [37]:
for i in state_info:
    print(i)
print("\nModel Predictions:\n")

candidate_1_win_percentage = outcome_counts[0]/num_simulations * 100
candidate_2_win_percentage = outcome_counts[1]/num_simulations * 100

print("Candidate 1 Win%: " + str(candidate_1_win_percentage))   
print("Candidate 2 Win%: " + str(candidate_2_win_percentage))

['Alabama', 0.619323510705329, 0.0053533205785766, 9, 9406, 594, 0.9406, 0.0594]
['Alaska', 0.520787233901613, 0.00401347051032681, 3, 6175, 3825, 0.6175, 0.3825]
['Arizona', 0.481754368867197, 0.0047620412357882, 11, 4139, 5861, 0.4139, 0.5861]
['Arkansas', 0.66909680263556, 0.00506979446206795, 6, 9897, 103, 0.9897, 0.0103]
['California', 0.34176597626492, 0.00238142673823776, 55, 252, 9748, 0.0252, 0.9748]
['Colorado', 0.441508063802729, 0.00178425013473564, 9, 1555, 8445, 0.1555, 0.8445]
['Connecticut', 0.41484746012372, 0.00343173142983656, 7, 1147, 8853, 0.1147, 0.8853]
['Delaware', 0.443293312927267, 0.00400173142983656, 3, 2083, 7917, 0.2083, 0.7917]
['District of Columbia', 0.110265808256422, 0.00394173142983656, 3, 0, 10000, 0.0, 1.0]
['Florida', 0.483662379886877, 0.00456192493440979, 29, 4249, 5751, 0.4249, 0.5751]
['Georgia', 0.497072195657572, 0.00475020285371373, 16, 4897, 5103, 0.4897, 0.5103]
['Hawaii', 0.349475442176937, 0.00385221895995009, 4, 168, 9832, 0.0168, 0.98