---

<!-- <a href="https://github.com/rraadd88/roux/blob/master/examples/roux_stat_sets.ipynb"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>
 -->
 
## ⬤⬤ Comparison between sets.

In [1]:
# install extra requirements
# !pip install roux[stat]
# loading non-roux requirements
import pandas as pd

**Demo data**

In [2]:
## random state
import numpy as np
np.random.seed(1)

In [3]:
## sets
set_sizes=np.random.randint(low=20,high=100,size=20)
sets={}
for i in set_sizes:
    sets[i]=pd.Series(np.random.randint(low=0,high=100,size=i),name='item')
df0=pd.concat(sets,axis=0,names=['set']).reset_index(0)
print(f"{df0['set'].nunique()} reference sets containing {df0['item'].nunique()} unique items.")
df0.head(1)

Unnamed: 0,set,item
0,57,14


In [4]:
## tests
test_sizes=np.random.randint(low=20,high=30,size=10)
tests={}
for i in test_sizes:
    tests[i]=pd.Series(np.random.randint(low=0,high=100,size=i),name='item')
df1=pd.concat(tests,axis=0,names=['test']).reset_index(0)
print(f"{df1['test'].nunique()} test sets containing {df1['item'].nunique()} unique items.")
df1.head(1)

Unnamed: 0,test,item
0,25,77


## Enrichments

In [5]:
from roux.stat.sets import get_enrichment
get_enrichment(
    df1=df1, # containing items to test
    df2=df0, # containing items in the sets
    colid='item', ## item id
    colset='set', ## column with sets
    coltest='test', ## column with tests
    background=100, ## background size i.e. possible max. unique items in sets 
    )

Unnamed: 0,test,set,overlap size,overlap %,overlap items,contingency table,Odds ratio,fold change,overlap/test %,set/background,P (hypergeom. test),P (Fisher's exact),Q (hypergeom. test),Q (Fisher's exact)
60,25,21,7,21.875000,"[46, 63, 64, 78, 80, 92, 99]","[[7, 15], [10, 68]]",3.173333,1.871658,31.818182,0.17,0.043079,0.052511,0.255714,0.262554
62,25,26,9,25.000000,"[9, 20, 21, 23, 33, 46, 50, 71, 90]","[[9, 13], [14, 64]]",3.164835,1.778656,40.909091,0.23,0.027676,0.041601,0.255714,0.262554
77,25,95,18,27.272727,"[8, 9, 20, 21, 23, 46, 50, 51, 63, 64, 67, 71,...","[[18, 4], [44, 34]]",3.477273,1.319648,81.818182,0.62,0.024525,0.045219,0.255714,0.262554
65,25,32,10,24.390244,"[6, 8, 21, 23, 46, 71, 77, 85, 91, 92]","[[10, 12], [19, 59]]",2.587719,1.567398,45.454545,0.29,0.051143,0.065957,0.255714,0.263826
83,27,29,11,28.205128,"[28, 29, 31, 32, 35, 55, 63, 66, 67, 78, 93]","[[11, 13], [15, 61]]",3.441026,1.762821,45.833333,0.26,0.013436,0.016344,0.268720,0.163442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,25,29,5,11.627907,"[33, 46, 63, 67, 78]","[[5, 17], [21, 57]]",0.798319,0.874126,22.727273,0.26,0.743907,0.788800,0.998899,0.901769
70,25,48,7,13.725490,"[46, 50, 63, 64, 90, 92, 99]","[[7, 15], [29, 49]]",0.788506,0.883838,31.818182,0.36,0.759972,0.802494,0.998899,0.901769
73,25,70,11,18.965517,"[20, 21, 50, 51, 64, 71, 77, 80, 90, 92, 99]","[[11, 11], [36, 42]]",1.166667,1.063830,50.000000,0.47,0.468140,0.811592,0.998899,0.901769
72,25,57,10,17.857143,"[8, 9, 21, 23, 50, 63, 64, 67, 71, 80]","[[10, 12], [34, 44]]",1.078431,1.033058,45.454545,0.44,0.532585,1.000000,0.998899,1.000000


## Documentation
[`roux.stat.sets`](https://github.com/rraadd88/roux#module-rouxstatsets)