# 10 minutes to Pingouin

## 1. T-test

In [1]:
# Generate two correlated random variables
import numpy as np
np.random.seed(123)
mean, cov, n = [4, 5], [(1, .6), (.6, 1)], 30
x, y = np.random.multivariate_normal(mean, cov, n).T

# T-test
from pingouin import ttest
ttest(x, y)

Unnamed: 0,T-val,p-val,dof,tail,cohen-d,power,BF10
T-test,-3.401,0.001222,58,two-sided,0.878,0.917,26.155


## 2. Pearson's correlation

In [2]:
from pingouin import corr
corr(x, y)

Unnamed: 0,r,CI95%,r2,adj_r2,p-val,BF10
pearson,0.595,"[0.3, 0.79]",0.354,0.306,0.000527,54.222


## 3. Robust correlation

In [3]:
# Introduce an outlier
x[5] = 18
# Use the robust Shepherd's pi correlation
corr(x, y, method="shepherd")

Unnamed: 0,r,CI95%,r2,adj_r2,p-val
shepherd,0.561,"[0.25, 0.77]",0.315,0.264,0.001543


## 4. Test the normality of the data

In [4]:
from pingouin import test_normality
# Return a boolean (true if normal) and the associated p-value
test_normality(x, y)

(array([False,  True]), array([0.   , 0.552]))

## 5. One-way ANOVA using a pandas DataFrame

In [5]:
# Generate a pandas DataFrame
import pandas as pd
np.random.seed(123)
mean, cov, n = [4, 6], [(1, .6), (.6, 1)], 10
x, y = np.random.multivariate_normal(mean, cov, n).T
z = np.random.normal(4, size=n)

# DV = dependant variable / Group = between-subject factor
df = pd.DataFrame({'Group': np.repeat(['A', 'B', 'C'], 10),
                   'DV': np.hstack([x, y, z])})

# One-way ANOVA
from pingouin import anova
stats = anova(data=df, dv='DV', between='Group', detailed=True)
stats

Unnamed: 0,Source,SS,DF,MS,F,p-unc,np2
0,Group,28.995,2,14.498,8.929,0.00105551,0.398
1,Within,43.837,27,1.624,-,-,-


## 6. One-way non-parametric ANOVA (Kruskal-Wallis)

In [6]:
from pingouin import kruskal
stats = kruskal(data=df, dv='DV', between='Group')
stats

Unnamed: 0,Source,ddof1,H,p-unc
Kruskal,Group,2,10.622,0.004937


## 7. Post-hoc tests corrected for multiple-comparisons

In [7]:
from pingouin import pairwise_ttests, print_table

# FDR-corrected post hocs with Hedges'g effect size
posthoc = pairwise_ttests(data=df, dv='DV', between='Group', padjust='fdr_bh',
                          effsize='hedges')

# Pretty printing of table
print_table(posthoc)


POST HOC TESTS

Type     A    B    Paired      T-val  tail         p-unc    p-corr  p-adjust      BF10    efsize  eftype
-------  ---  ---  --------  -------  ---------  -------  --------  ----------  ------  --------  --------
between  A    B    False      -3.472  two-sided    0.003     0.004  fdr_bh      13.734    -1.487  hedges
between  A    C    False      -0.096  two-sided    0.925     0.925  fdr_bh       0.399    -0.041  hedges
between  B    C    False       3.851  two-sided    0.001     0.004  fdr_bh      26.509     1.650  hedges



## 8. Two-way mixed ANOVA

In [8]:
# Add a "Time" column in the DataFrame
df['Time'] = np.tile(np.repeat(['Pre', 'Post'], 5), 3)
# Create a subject identifier column
df['Subject'] = np.r_[np.tile(np.arange(5), 2), np.tile(np.arange(5, 10), 2),
                      np.tile(np.arange(10, 15), 2)]

# Compute the two-way mixed ANOVA and export to a .csv file
from pingouin import mixed_anova
stats = mixed_anova(data=df, dv='DV', between='Group', within='Time', subject='Subject',
                    correction=False, export_filename='mixed_anova.csv')
stats

Unnamed: 0,Source,SS,DF1,DF2,MS,F,p-unc,np2,eps
0,Group,28.995,2,12,14.498,8.623,0.004773,0.59,-
1,Time,6.839,1,12,6.839,4.995,0.045204,0.294,1
2,Interaction,0.391,2,12,0.196,0.143,0.868286,0.023,-


## 9. Pairwise correlations between columns of a dataframe

In [9]:
df = pd.DataFrame({'X': x, 'Y': y, 'Z': z})
from pingouin import pairwise_corr
pairwise_corr(df, columns=['X', 'Y', 'Z'])

Unnamed: 0,X,Y,method,tail,r,CI95%,r2,adj_r2,z,p-unc,BF10
0,X,Y,pearson,two-sided,0.707,"[0.14, 0.92]",0.5,0.357,0.881,0.022187,3.227
1,X,Z,pearson,two-sided,0.283,"[-0.42, 0.77]",0.08,-0.183,0.291,0.42764,0.321
2,Y,Z,pearson,two-sided,0.105,"[-0.56, 0.69]",0.011,-0.271,0.105,0.77223,0.243


## 10. Convert between effect sizes

In [10]:
from pingouin import convert_effsize
# Convert from Cohen's d to Hedges' g
convert_effsize(0.4, 'cohen', 'hedges', nx=10, ny=12)

0.38481012658227853