In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as spstats
from skbio import DistanceMatrix
import json
from glob import glob
import rpy2
import pandas as pd

%matplotlib inline
%load_ext rpy2.ipython
with open("sample-run.json") as fh:
    samples = json.load(fh)

In [None]:
def make_trumat():
    arr = np.zeros((96, 96))
    for x in range(2):
        for y in range(2):
            for j in range(x * 48, (x+1) * 48):
                for k in range(y * 48, (y+1) * 48):
                    if x == y:
                        arr[j, k] = 2
                    else:
                        arr[j, k] = 4
    for i in range(16):
        st = i * 6
        sp = st + 6
        for j in range(st, sp):
            for k in range(st, sp):
                if j == k:
                    arr[j, k] = 0
                else:
                    arr[j, k] = 1
    return arr

In [None]:
plt.imshow(make_trumat(), interpolation='none')
truth = DistanceMatrix(make_trumat()).condensed_form()

In [None]:
scores = []

pairs = list(zip(sorted(glob("kwip/*_wip.dist")),
                 sorted(glob("kwip/*_ip.dist"))))
for wipf, ipf in pairs:
    wip =  DistanceMatrix.read(wipf).condensed_form()
    ip =  DistanceMatrix.read(ipf).condensed_form()
    wipr, _ =  spstats.pearsonr(truth, wip)
    ipr, _ =  spstats.pearsonr(truth, ip)
    wips, _ =  spstats.spearmanr(truth, wip)
    ips, _ =  spstats.spearmanr(truth, ip)
    scores.append((wipr, ipr, wips, ips))
    

In [None]:
scoremat = pd.DataFrame(np.array(scores), columns=('WIP', "IP", "WIPs", "IPs"))

In [None]:
%%R -i scoremat
library(ggplot2)
library(reshape2)
summary(scoremat)


sm.melt = melt(scoremat[,c(1, 2)], value.name="r", variable.name='Metric')
summary(sm.melt)
t.test(scoremat$WIP, scoremat$IP)

In [None]:
%%R

ggplot(sm.melt, aes(x=Metric, y=r)) +
    geom_violin(aes(fill=r)) +
    ylim(0, 1) +
    theme_bw()