# Voter participation in 1844 election

Solution for voter participation exercise.

In [None]:
import numpy as np
import pandas as pd

rnd = np.random.default_rng()

voter_df = pd.read_csv('data/election_1844.csv')
participation = np.array(voter_df['Participation'])
spread = np.array(voter_df['Spread'])

In [None]:
# Compute correlation.  It's -0.425.
actual_r = np.corrcoef(participation, spread)[0, 1]
actual_r

In [None]:
n_trials = 10_000
results = np.zeros(n_trials)

for i in range(n_trials):
    # Shuffle the participation rates.
    shuffled = rnd.permuted(participation)
    # Compute re-sampled correlation.
    fake_r = np.corrcoef(shuffled, spread)[0, 1]
    # Keep the value in the results.
    results[i] = fake_r

plt.hist(results, bins=25)
plt.title('Distribution of shuffled correlations')
plt.xlabel('Correlation with shuffled participation')

# Count the trials when result <= observed.
k = np.sum(results <= actual_r)
# Compute the proportion of such trials.
kk = k / n_trials

print('Proportion of shuffled r <= observed:', np.round(kk, 2))