# Association of athletic and IQ scores with $r$

We use random pairings of the athletic and IQ scores to find the
null-world distribution of the correlation coefficients of these scores.

In [None]:
# Load the Numpy library for arrays.
import numpy as np
# Load the Pandas library for loading and selecting data.
import pandas as pd
# Plotting library.
import matplotlib.pyplot as plt

# Set up the random number generator
rnd = np.random.default_rng()

# Read the data file containing athletic and IQ scores.
ath_iq_df = pd.read_csv('data/athletic_iq.csv')

# Turn athletic and IQ scores into arrays.
ath = np.array(ath_iq_df['athletic_score'])
iq = np.array(ath_iq_df['iq_score'])

# Calculate, select correlation coefficient.
actual_r = np.corrcoef(ath, iq)[0, 1]

# Set the number of trials for the null-world simulation.
n_trials = 10_000

# An empty array to store the trial results.
results = np.zeros(n_trials)

# Do 10,000 experiments.
for i in range(n_trials):
    # Shuffle the IQ scores so we can pair them against athletic scores.
    shuffled = rnd.permuted(iq)
    # Calculate the correlation coefficient.
    fake_r = np.corrcoef(ath, shuffled)[0, 1]
    # Keep track of the result in results array.
    results[i] = fake_r
    # End one trial, go back and repeat until 10000 trials are complete.

# Obtain a histogram of the trial results.
plt.hist(results, bins=25)
plt.title('Random correlation coefficients')
plt.xlabel('Observed r values from random pairing')

# Determine in how many trials the random r value was greater than
# the observed r value.
k = np.sum(results >= actual_r)
# Convert to a proportion.
kk = k / n_trials
# Print the result.
print('Proportion of random pairings giving r >= observed:', kk)