This notebook demonstrates

In [24]:
# This cell contains the system commands for locally installing the package;
# please remember that in most cases, especially in the playground,
# Google CoLab requires the runtime to be restarted (CTRL+M) in order for it
# to pick the new package (thus: run this cell, restart the runtime, and
# run other cells)
!rm -rf catcoocc
!git clone https://github.com/tresoldi/catcoocc.git
!pip3 install --force-reinstall --no-deps -e catcoocc/

Cloning into 'catcoocc'...
remote: Enumerating objects: 151, done.[K
remote: Counting objects: 100% (151/151), done.[K
remote: Compressing objects: 100% (111/111), done.[K
remote: Total 151 (delta 62), reused 120 (delta 35), pack-reused 0[K
Receiving objects: 100% (151/151), 38.60 MiB | 33.96 MiB/s, done.
Resolving deltas: 100% (62/62), done.
Obtaining file:///content/catcoocc/catcoocc
Installing collected packages: catcoocc
  Found existing installation: catcoocc 0.1
    Can't uninstall 'catcoocc'. No files were found to uninstall.
  Running setup.py develop for catcoocc
Successfully installed catcoocc


In [0]:
# Import 3rd party libraries
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import tabulate

# Import own libraries
import catcoocc
from catcoocc.scorer import CatScorer

def plot_scorer(scorer, alpha_x, alpha_y, title=None, font_scale=2, figsize=(25, 25)):
    if not title:
        title = ""

    matrix = pd.DataFrame(scorer, index=alpha_y, columns=alpha_x)

    sns.set(font_scale=font_scale, font="FreeMono")
    plt.figure(figsize=figsize)

    ax = plt.subplot(111)
    sns.heatmap(matrix, annot=True, fmt='.2f', linewidths=.5, center=0, ax=ax).set_title(title, fontsize=font_scale*figsize[0])

Lorem ipsum

In [5]:
mushroom_data = catcoocc.read_sequences("catcoocc/docs/mushroom-small.tsv")
mushroom_cooccs = catcoocc.collect_cooccs(mushroom_data)
scorer = catcoocc.scorer.CatScorer(mushroom_cooccs)

mle = scorer.mle()
pmi = scorer.pmi()
npmi = scorer.pmi(True)
chi2 = scorer.chi2()
chi2_ns = scorer.chi2(False)
cramersv = scorer.cramers_v()
cramersv_ns = scorer.cramers_v(False)
fisher = scorer.fisher()
theil_u = scorer.theil_u()
catcoocc_i = scorer.catcoocc_i()
catcoocc_ii = scorer.catcoocc_ii()

headers = [
    'pair',
    'mle_0',        'mle_1', 
    'pmi_0',        'pmi_1', 
    'npmi_0',       'npmi_1', 
    'chi2_0',       'chi2_1', 
    'chi2ns_0',     'chi2ns_1', 
    'cremersv_0',   'cremersv_1', 
    'cremersvns_0', 'cremersvns_1', 
    'fisher_0',     'fisher_1', 
    'theilu_0',     'theilu_1', 
    'catcoocci_0',  'catcoocci_1', 
    'catcooccii_0', 'catcooccii_1', 
]

table = []
for pair in sorted(scorer.obs):
    buf = [
        pair,
        "%0.4f" % mle[pair][0],         "%0.4f" % mle[pair][1],
        "%0.4f" % pmi[pair][0],         "%0.4f" % pmi[pair][1],
        "%0.4f" % npmi[pair][0],        "%0.4f" % npmi[pair][1],
        "%0.4f" % chi2[pair][0],        "%0.4f" % chi2[pair][1],
        "%0.4f" % chi2_ns[pair][0],     "%0.4f" % chi2_ns[pair][1],
        "%0.4f" % cramersv[pair][0],    "%0.4f" % cramersv[pair][1],
        "%0.4f" % cramersv_ns[pair][0], "%0.4f" % cramersv_ns[pair][1],
        "%0.4f" % fisher[pair][0],      "%0.4f" % fisher[pair][1],
        "%0.4f" % theil_u[pair][0],     "%0.4f" % theil_u[pair][1],
        "%0.4f" % catcoocc_i[pair][0],  "%0.4f" % catcoocc_i[pair][1],
        "%0.4f" % catcoocc_ii[pair][0], "%0.4f" % catcoocc_ii[pair][1],
    ]
    table.append(buf)
    
print(tabulate.tabulate(table, headers=headers, tablefmt="github"))
xy, yx, alpha_x, alpha_y = catcoocc.scorer.scorer2matrices(catcoocc_ii)


| pair                    |   mle_0 |   mle_1 |   pmi_0 |   pmi_1 |   npmi_0 |   npmi_1 |   chi2_0 |   chi2_1 |   chi2ns_0 |   chi2ns_1 |   cremersv_0 |   cremersv_1 |   cremersvns_0 |   cremersvns_1 |   fisher_0 |   fisher_1 |   theilu_0 |   theilu_1 |   catcoocci_0 |   catcoocci_1 |   catcooccii_0 |   catcooccii_1 |
|-------------------------|---------|---------|---------|---------|----------|----------|----------|----------|------------|------------|--------------|--------------|----------------|----------------|------------|------------|------------|------------|---------------|---------------|----------------|----------------|
| ('edible', 'bell')      |  0.3846 |  1      |  0.4308 |  0.4308 |   0.3107 |   0.3107 |   1.8315 |   1.8315 |     3.5897 |     3.5897 |       0.2027 |       0.2027 |         0.1987 |         0.1987 |        inf |        inf |     1      |     0.3985 |        0.4308 |        0.1717 |         0.789  |         0.789  |
| ('edible', 'convex')    |  0.4615 |  0