<a href="https://colab.research.google.com/github/mikexcohen/ANTS_youtube_videos/blob/main/stats_ch16_permutation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sadistics
## Mike X Cohen (sincxpress.com)
### https://www.amazon.com/etc
#### Code for chapter 16

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt


# The code below define global figure properties used for publication.
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg') # display figures in vector format
plt.rcParams.update({'font.size':14,             # font size
                     'savefig.dpi':300,          # output resolution
                     'axes.titlelocation':'left',# title location
                     'axes.spines.right':False,  # remove axis bounding box
                     'axes.spines.top':False,    # remove axis bounding box
                     })

# Example in t-test

In [None]:
# number of 'trials' in each condition
n1 = 50
n2 = 70  # note the trial inbalance!

# create data
data1 = np.random.randn(n1,1)
data2 = np.random.randn(n2,1) + .3  # note the mean offset!

# step 1: pool the data
alldata = np.concatenate((data1, data2))

# corresponding labels
truelabels = np.concatenate((np.ones(n1), 2*np.ones(n2)))

# compute the observed condition difference
true_conddif = np.mean(alldata[truelabels==1]) - np.mean(alldata[truelabels==2])

# step 2: shuffle the data once
shuflabels = np.random.permutation(truelabels)

# step 3: compute the mean difference of the shuffled labels
shufconddif = np.mean(alldata[shuflabels==1]) - np.mean(alldata[shuflabels==2])

# creating a null-hypothesis (H0) distribution

# number of iterations for permutation testing
nIterations = 1000

permvals = np.zeros(nIterations)

for permi in range(nIterations):
    # steps 2 and 3 from above
    shuflabels = np.random.permutation(truelabels)
    permvals[permi] = np.mean(alldata[shuflabels==1]) - np.mean(alldata[shuflabels==2])





# visualizations
_,axs = plt.subplots(1,3,figsize=(12,4))

# show the real data and means
axs[0].plot(data1,np.zeros(n1),'ko',markersize=12,markerfacecolor=(.4,.4,.4),alpha=.5)
axs[0].plot(data2,np.ones(n2),'ks',markersize=12,markerfacecolor=(.8,.8,.8),alpha=.5)
axs[0].plot([np.mean(data1),np.mean(data1)],[.7,1.3],'k--',linewidth=3)
axs[0].plot([np.mean(data2),np.mean(data2)],[-.3,.3],'k--',linewidth=3)
axs[0].set(ylim=[-1,2],ylabel='Data series',yticks=[0,1],xlabel='Data value')
axs[0].set_title(r'$\bf{A})$  Real data')

# show one example shuffled data
axs[1].plot(alldata[shuflabels==1],np.zeros(n1),'ko',markersize=12,markerfacecolor=(.4,.4,.4),alpha=.5)
axs[1].plot(alldata[shuflabels==2],np.ones(n2),'ks',markersize=12,markerfacecolor=(.8,.8,.8),alpha=.5)
axs[1].plot([np.mean(alldata[shuflabels==1]),np.mean(alldata[shuflabels==1])],[.7,1.3],'k--',linewidth=3)
axs[1].plot([np.mean(alldata[shuflabels==2]),np.mean(alldata[shuflabels==2])],[-.3,.3],'k--',linewidth=3)
axs[1].set(ylim=[-1,2],ylabel='Data series',yticks=[0,1],yticklabels=['"0"','"1"'],xlabel='Data value')
axs[1].set_title(r'$\bf{B})$  Shuffled data')

# distribution of shuffled means
axs[2].hist(permvals,bins=40,color=[.7,.7,.7])
axs[2].axvline(x=true_conddif,color='k',linestyle='--',linewidth=3)
axs[2].legend(['True', 'Shuffled'])
axs[2].set(xlim=[-1,1],xlabel='Mean value',ylabel='Count')
axs[2].set_title(r'$\bf{C})$  True and permuted means')

plt.tight_layout()
plt.savefig('permute_ttestIllustrated.png')
plt.show()