In [1]:
import pickle

import pandas as pd
import numpy as np
import networkx as nx
from scipy.stats import beta
import itertools

import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import rankdata, kendalltau, spearmanr

import plotly.graph_objects as go
from tqdm.notebook import tqdm
from pathlib import Path

sns.set_theme("paper", "white")

In [2]:
df_users = pd.read_csv("./DSock/Active_Users.csv", index_col=0)
display(df_users.head())

timeline = pd.date_range("2020-09-03-03:59:59", periods=6, tz="utc")

Unnamed: 0,MTurk_ID,username,id,isMod,isObserver,isPuppet,n_reports,puppet,report,issued_posts,...,Q17,Q18,Q19,Q20,Q21,Q22,Q23,Q24,Q25,Strategy
1,A10IUXCZBNYUXC,Jacob_1907,2658,f,f,f,17,0,1,25,...,2.0,2.0,2.0,1.0,1.0,2.0,1.0,4.0,2.0,
2,A15F2JTVAJXITI,Luke_3376,2297,f,t,f,0,0,0,0,...,1.0,3.0,1.0,3.0,1.0,3.0,1.0,4.0,1.0,
3,A173A97OFDAX9F,Claire_679,2601,f,f,f,0,0,0,4,...,1.0,2.0,2.0,1.0,1.0,2.0,1.0,3.0,3.0,
4,A18XVBWB2A270I,Katherine_2323,2338,f,t,f,0,0,0,0,...,1.0,2.0,4.0,2.0,2.0,2.0,1.0,1.0,3.0,
5,A1AIMKA54UBYIQ,Asia_4906,2638,f,f,f,1,0,1,9,...,3.0,3.0,2.0,1.0,2.0,2.0,2.0,4.0,3.0,


Unnamed: 0,MTurk_ID,username,id,Role,survey,mentions,per_rank
0,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,1,0,0.0
1,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,2,0,0.0
2,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,3,0,0.0
3,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,4,0,0.0
4,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,5,0,0.0


In [3]:
df_posts = pd.read_csv("./DSock/direct_influence/posts_with_senti_and_topic.csv", parse_dates=["createdAt"])
df_comts = pd.read_csv("./DSock/direct_influence/comments_with_senti_and_topic.csv", delimiter=",", parse_dates=["createdAt"]).dropna(subset=["CommenterId", "PostId"])
df_views = pd.read_csv("./DSock/postViews.csv", delimiter="|", parse_dates=["createdAt"]).dropna(subset=["UserId", "PostId"])
df_likes = pd.read_csv("./DSock/Likes.csv", delimiter="|", parse_dates=["createdAt"]).dropna(subset=["UserId", "PostId"])
df_repts = pd.read_csv("./DSock/ReportUsers.csv", delimiter="|", parse_dates=["createdAt"])

print(f"posts: {df_posts.shape}, comments: {df_comts.shape}, views: {df_views.shape}, likes: {df_likes.shape}, reports: {df_repts.shape}")

df_likes["UserId"] = df_likes["UserId"].astype(int)
df_likes["PostId"] = df_likes["PostId"].astype(int)

df_comts["CommenterId"] = df_comts["CommenterId"].astype(int)
df_comts["PostId"] = df_comts["PostId"].astype(int)

df_annot_posts = pd.read_csv("./DSock/posts_anno_labels.csv")
df_posts = df_posts.merge(df_annot_posts[["post_id", "majority_topic_label", "majority_sent_label"]], left_on="PostID", right_on="post_id")

df_annot_comts = pd.read_csv("./DSock/comments_anno_labels.csv")
df_comts = df_comts.merge(df_annot_comts[["comment_id", "majority_topic_label", "majority_sent_label"]], left_on="id", right_on="comment_id")

posts: (1057, 11), comments: (1615, 11), views: (234178, 10), likes: (1034, 6), reports: (46, 6)


In [4]:
G = nx.DiGraph()
for u, uname, obsr, sock in df_users[["id", "username", "isObserver", "isPuppet"]].values:
    G.add_node(f"u{u}", id=u, kind="user", name=uname, observer=(obsr == "t"), sock=(sock == "t"))
    if obsr == "t":
        G.nodes[f"u{u}"]["color"] = "obsr"
    elif sock == "t":
        G.nodes[f"u{u}"]["color"] = "sock"
    else:
        G.nodes[f"u{u}"]["color"] = "part"

for p, u, t, polar, score, topic in df_posts[["PostID", "AuthorId", "createdAt", "majority_sent_label", "scores", "majority_topic_label"]].values:
    if f"u{u}" in G.nodes:
        G.add_node(f"p{p}", id=p, kind="post", time=t, user=u, polar=polar, score=score, topic=topic)

for c, u, p, t, polar, score, topic in df_comts[["id", "CommenterId", "PostId", "createdAt", "majority_sent_label", "scores", "majority_topic_label"]].values:
    if f"u{u}" in G.nodes and f"p{p}" in G.nodes:
        G.add_node(f"c{c}", id=c, kind="comt", time=t, user=u, polar=polar, score=score, topic=topic)
        G.add_edge(f"c{c}", f"p{p}", kind="known", time=t, weight=1)
        G.add_edge(f"p{p}", f"c{c}", kind="known", time=t, weight=1)

for l, u, p, t in df_likes[["id", "UserId", "PostId", "createdAt"]].values:
    if f"u{u}" in G.nodes and f"p{p}" in G.nodes:
        G.add_node(f"l{l}", user=f"u{u}", post=f"p{p}", time=t, id=l, kind="like",
                   topic=G.nodes[f"p{p}"]["topic"], polar=G.nodes[f"p{p}"]["polar"], score=1)
        G.add_edge(f"u{u}", f"l{l}", time=t, weight=1, kind="infer")
        G.add_edge(f"l{l}", f"p{p}", time=t, weight=1, kind="known")

for u, p, t, v in df_views[["UserId", "PostId", "createdAt", "id"]].values:
    if f"u{u}" in G.nodes and f"p{p}" in G.nodes:
        G.add_node(f"v{v}", user=f"u{u}", post=f"p{p}", time=t, id=v, kind="view",
                   topic=G.nodes[f"p{p}"]["topic"], polar=G.nodes[f"p{p}"]["polar"], score=1)
        G.add_edge(f"u{u}", f"v{v}", time=t, weight=1, kind="infer")
        G.add_edge(f"v{v}", f"p{p}", time=t, weight=1, kind="known")

user_view = {u: set() for u in G if u[0] == "u"}
for u, p, t, v in df_views[["UserId", "PostId", "createdAt", "id"]].values:
    if f"u{u}" in G.nodes and f"p{p}" in G.nodes:
        user_view[f"u{u}"].add(f"v{v}")

for p in tqdm(G):
    if p[0] == "p":
        u = f"u{G.nodes[p]['user']}"
        for v in user_view[u]:
            if G.nodes[v]["time"] <= G.nodes[p]["time"]:
                G.add_edge(p, v, time=G.nodes[v]["time"], weight=1, kind="infer")

HBox(children=(FloatProgress(value=0.0, max=218011.0), HTML(value='')))




In [5]:
with open("res/pagerank/0.9-0.5-0.5.pkl", "rb") as fp:
    d = pickle.load(fp)

In [6]:
df_users[df_users["id"] == 2340]

Unnamed: 0,MTurk_ID,username,id,isMod,isObserver,isPuppet,n_reports,puppet,report,issued_posts,...,Q17,Q18,Q19,Q20,Q21,Q22,Q23,Q24,Q25,Strategy
7,A1CGW3DQ5JRSJH,Asia_3576,2340,f,t,f,0,0,0,0,...,1.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,3.0,


In [7]:
user_list = df_users["id"].tolist()
# user_list = df_users[df_users["isObserver"] == "f"]["id"].tolist()

In [8]:
obsr_list = [f"u{u}" for u in df_users[df_users["isObserver"] == "t"]["id"]]
pr_alpha = 0.9
beta_a = 0.5
beta_b = 0.5
keyq = [(node_name, t, topic, polar, pr_alpha, beta_a, beta_b)
        for node_name in obsr_list for t in range(1,6) for topic in range(8) for polar in [0, 2]
       ]

In [9]:
property_full = {k: d[k]["full_graph"] for k in tqdm(keyq)}
property_exps = {k: d[k]["exps_graph"] for k in tqdm(keyq)}

df_full = pd.DataFrame.from_dict(property_full, orient="index")
df_exps = pd.DataFrame.from_dict(property_exps, orient="index")

HBox(children=(FloatProgress(value=0.0, max=2720.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2720.0), HTML(value='')))




In [10]:
use_color = sns.color_palette("Set1")
use_color

In [1]:
def plot_hist(name="nodes", xlabel="", ylabel="", title=""):
    fig = plt.figure(figsize=(4, 4))
    ax = fig.add_subplot()
    
    _, bins = np.histogram(df_full[name], bins=10)

    sns.histplot(df_full[name], bins=bins, ax=ax, stat="probability", label="Full Influence Graph", color=use_color[1], alpha=0.7, kde=True)
    sns.histplot(df_exps[name], bins=bins, ax=ax, stat="probability", label="Express only", color=use_color[4], alpha=0.7, kde=True)

    ax.set_title(f"FIG mean {df_full[name].mean():.2f} std {df_full[name].std():.2f}, Expression mean {df_exps[name].mean():.2f} std {df_exps[name].std():.2f}")
    ax.set_xlabel(name)
    ax.legend()
    
    return fig, ax

fig = plot_hist(name="nodes")
fig = plot_hist(name="edges")
fig = plot_hist(name="density")
fig = plot_hist(name="diameter")
fig = plot_hist(name="degree")

NameError: name 'plt' is not defined

In [30]:
pd.DataFrame.from_dict({
    "FIG mean": df_full.mean(axis=0),
    "FIG std": df_full.std(axis=0),
    "Expression only mean": df_exps.mean(axis=0),
    "Expression only std": df_exps.std(axis=0),
}).T

Unnamed: 0,nodes,edges,components,density,diameter,degree
FIG mean,192.05,544.962868,1.0,0.025622,3.313971,1.172803
FIG std,499.987779,1606.806172,0.0,0.052713,3.292299,1.153502
Expression only mean,16.660294,18.180882,7.569853,0.048922,0.0,0.603399
Expression only std,27.073192,30.929591,12.384141,0.073348,0.0,0.592636


In [31]:
pab = [(p, a, b)
    for p in [0.9, 0.7, 0.5, 0.3, 0.1]
    for a in [0.5, 0.9, 0.7, 0.3, 0.1]
    for b in [0.5, 0.9, 0.7, 0.3, 0.1]
]
obsr_list = [f"u{u}" for u in df_users[df_users["isObserver"] == "t"]["id"]]

In [32]:
def pr2rank(pr_value):
    ret = {u: 0 for u in user_list}
    for n in pr_value:
        if n[0] in ["p", "c"]:
            ret[G.nodes[n]["user"]] += pr_value[n]
    rank_value = [-ret[u] for u in user_list]
    rank = rankdata(rank_value, "max")
    # print(rankdata(rank, "min"))
    return dict(zip(user_list, rank))

def get_params(p, a, b):
    fpath = Path(f"res/pagerank/{p}-{a}-{b}.pkl")
    if not fpath.exists():
        return None
    with open(fpath, "rb") as fp:
        d = pickle.load(fp)
    keyq = [(node_name, t, topic, polar, p, a, b)
            for node_name in obsr_list for t in range(1, 6) for topic in range(8) for polar in [0, 2]
            ]
    ranks = [(k, pr2rank(d[k]["pr"])) for k in keyq]
    return ranks

ret = [get_params(*tup) for tup in tqdm(pab)]

HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))




In [51]:
keys = [x[0] for r in ret if r is not None for x in r]
values = [x[1] for r in ret if r is not None for x in r]

rank_dict = dict(zip(keys, values))

In [39]:
# example = pr2rank(d[('u2337', 1, 4, 0, 0.9, 0.5, 0.5)]["pr"])

# rank_dict = {k: pr2rank(d[k]["pr"]) for k in tqdm(keyq)}

HBox(children=(FloatProgress(value=0.0, max=2720.0), HTML(value='')))




In [52]:
df_inf = pd.DataFrame.from_dict(rank_dict, orient="index")
display(df_inf.head())




Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,2658,2297,2601,2338,2638,2701,2340,2719,2667,2342,...,2636,2734,2657,2441,2714,2313,2337,2350,2703,2749
u2297,1,0,0,0.9,0.5,0.5,116,116,116,116,116,116,116,116,116,116,...,116,116,116,116,116,116,116,116,116,116
u2297,1,0,2,0.9,0.5,0.5,116,116,116,116,116,2,116,6,116,116,...,4,116,116,116,116,116,116,116,116,116
u2297,1,1,0,0.9,0.5,0.5,116,116,116,116,116,116,116,116,116,116,...,116,116,116,116,116,116,116,116,116,116
u2297,1,1,2,0.9,0.5,0.5,116,116,116,116,2,3,116,4,116,116,...,116,116,116,116,116,116,116,116,116,116
u2297,1,2,0,0.9,0.5,0.5,116,116,116,116,116,116,116,116,116,116,...,116,116,116,116,116,116,116,116,116,116


In [54]:
avg_inf = df_inf.mean(axis=0, level=[1, 4, 5, 6])
display(avg_inf)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,2658,2297,2601,2338,2638,2701,2340,2719,2667,2342,...,2636,2734,2657,2441,2714,2313,2337,2350,2703,2749
1,0.9,0.5,0.5,115.586397,116.0,115.794118,116.0,105.522059,105.573529,116.0,95.658088,110.761029,116.0,...,110.869485,116.000000,105.430147,116.000000,116.000000,116.0,116.0,116.0,116.000000,116.0
2,0.9,0.5,0.5,103.716912,116.0,111.257353,116.0,107.847426,96.628676,116.0,97.391544,104.808824,116.0,...,110.279412,116.000000,105.220588,110.906250,112.606618,116.0,116.0,116.0,110.461397,116.0
3,0.9,0.5,0.5,103.810662,116.0,106.261029,116.0,109.505515,94.106618,116.0,100.913603,91.935662,116.0,...,110.656250,116.000000,107.970588,110.726103,111.398897,116.0,116.0,116.0,110.676471,116.0
4,0.9,0.5,0.5,106.762868,116.0,95.659926,116.0,104.268382,93.275735,116.0,96.922794,89.104779,116.0,...,110.264706,101.744485,106.698529,110.343750,110.957721,116.0,116.0,116.0,105.419118,116.0
5,0.9,0.5,0.5,105.373162,116.0,91.669118,116.0,103.643382,86.884191,116.0,96.125000,92.049632,116.0,...,109.847426,100.610294,106.698529,112.163603,110.882353,116.0,116.0,116.0,99.702206,116.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,0.1,0.1,0.1,115.582721,116.0,115.792279,116.0,105.512868,105.625000,116.0,95.580882,110.806985,116.0,...,110.889706,116.000000,105.468750,116.000000,116.000000,116.0,116.0,116.0,116.000000,116.0
2,0.1,0.1,0.1,103.755515,116.0,111.266544,116.0,107.814338,96.762868,116.0,97.330882,104.808824,116.0,...,110.306985,116.000000,105.349265,110.841912,112.580882,116.0,116.0,116.0,110.696691,116.0
3,0.1,0.1,0.1,103.851103,116.0,106.544118,116.0,109.564338,94.051471,116.0,100.937500,91.933824,116.0,...,110.707721,116.000000,108.130515,110.645221,110.926471,116.0,116.0,116.0,111.064338,116.0
4,0.1,0.1,0.1,106.841912,116.0,95.343750,116.0,104.360294,93.363971,116.0,97.246324,89.108456,116.0,...,110.345588,101.395221,106.904412,110.235294,110.575368,116.0,116.0,116.0,105.705882,116.0


In [42]:
pab_done = set([k[4:] for k in keys])
pab_done = [tup for tup in pab if tup in pab_done]

In [109]:
df_user_express = pd.read_csv("./DSock/User_Expressed_Influence_Rank.csv")
display(df_user_express.head())

df_active_user_express = pd.read_csv("./DSock/Active_User_Expressed_Influence_Rank.csv")
display(df_active_user_express.head())

Unnamed: 0,MTurk_ID,username,id,Role,survey,mentions,per_rank
0,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,1,0,0.0
1,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,2,0,0.0
2,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,3,0,0.0
3,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,4,0,0.0
4,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,5,0,0.0


Unnamed: 0,MTurk_ID,username,id,Role,survey,mentions
0,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,1,0
1,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,2,0
2,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,3,0
3,APRZ7BR8C0ZMQ,Allison_2018,2389,sockpuppet,4,0
4,AKK6H2YXK4RF6,Allison_2019,2627,participant,1,5


In [57]:
df_cor = {tup: [avg_inf.loc[(t, *tup), n] for n, t in df_user_express[["id", "survey"]].values] for tup in tqdm(pab_done)}

df_cor = pd.DataFrame.from_dict(df_cor)

HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))




In [59]:
def jaccard(col1, col2, num=10):
    r1 = set(col1.nlargest(num).index)
    r2 = set(col2.nlargest(num).index)
    return len(r1&r2)/len(r1|r2)

def rank_metrics(col1, col2):
    ret1 = {
        "spearman": dict(zip(["correlation", "pvalue"], spearmanr(col1, col2))),
        "kendaltau": dict(zip(["correlation", "pvalue"], kendalltau(col1, col2))),
    }
    
    ret2 = {
        f"jaccard-{j}": {"correlation": jaccard(col1, col2, j), "pvalue": None}
        for j in [10, 20, 30, 40, 50, 100]
    }
    
    return {**ret1, **ret2}

print(jaccard(df_user_express["per_rank"], df_cor[(0.9, 0.5,0.5)].rank(ascending=False, pct=True, method="max")))
print(rank_metrics(-df_user_express["per_rank"], df_cor[(0.9, 0.5, 0.5)]))

0.05263157894736842
{'spearman': {'correlation': 0.45650526161253324, 'pvalue': 3.366528726313494e-31}, 'kendaltau': {'correlation': 0.38717253932724155, 'pvalue': 9.74497386056816e-29}, 'jaccard-10': {'correlation': 0.05263157894736842, 'pvalue': None}, 'jaccard-20': {'correlation': 0.02564102564102564, 'pvalue': None}, 'jaccard-30': {'correlation': 0.1111111111111111, 'pvalue': None}, 'jaccard-40': {'correlation': 0.19402985074626866, 'pvalue': None}, 'jaccard-50': {'correlation': 0.28205128205128205, 'pvalue': None}, 'jaccard-100': {'correlation': 0.26582278481012656, 'pvalue': None}}


In [60]:
res_all = {tup: rank_metrics(df_user_express["per_rank"], df_cor[tup].rank(ascending=False, pct=True, method="max")) for tup in tqdm(pab_done)}

df_res = pd.concat([pd.DataFrame(res_all[tup]) for tup in tqdm(pab_done)], keys=pab_done)

print(df_res.loc[(slice(None), slice(None), slice(None), "correlation")].max())

display(df_res)

HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


spearman       0.459230
kendaltau      0.389651
jaccard-10     0.052632
jaccard-20     0.081081
jaccard-30     0.153846
jaccard-40     0.230769
jaccard-50     0.282051
jaccard-100    0.290323
dtype: float64


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,spearman,kendaltau,jaccard-10,jaccard-20,jaccard-30,jaccard-40,jaccard-50,jaccard-100
0.9,0.5,0.5,correlation,4.565053e-01,3.871725e-01,0.052632,0.081081,0.132075,0.230769,0.265823,0.290323
0.9,0.5,0.5,pvalue,3.366529e-31,9.744974e-29,,,,,,
0.9,0.5,0.9,correlation,4.565174e-01,3.871873e-01,0.052632,0.081081,0.132075,0.230769,0.265823,0.290323
0.9,0.5,0.9,pvalue,3.352847e-31,9.707810e-29,,,,,,
0.9,0.5,0.7,correlation,4.565593e-01,3.872199e-01,0.052632,0.081081,0.132075,0.230769,0.265823,0.290323
...,...,...,...,...,...,...,...,...,...,...,...
0.1,0.1,0.7,pvalue,1.382394e-31,4.480158e-29,,,,,,
0.1,0.1,0.3,correlation,4.591878e-01,3.896135e-01,0.052632,0.081081,0.153846,0.230769,0.282051,0.290323
0.1,0.1,0.3,pvalue,1.363346e-31,4.428855e-29,,,,,,
0.1,0.1,0.1,correlation,4.592031e-01,3.896268e-01,0.052632,0.081081,0.153846,0.230769,0.282051,0.290323


In [65]:
idxmax = df_res.loc[(slice(None), slice(None), slice(None), "correlation")].idxmax()
print(idxmax)

spearman       (0.1, 0.9, 0.3)
kendaltau      (0.1, 0.9, 0.3)
jaccard-10     (0.9, 0.5, 0.5)
jaccard-20     (0.9, 0.5, 0.5)
jaccard-30     (0.1, 0.5, 0.5)
jaccard-40     (0.9, 0.5, 0.5)
jaccard-50     (0.7, 0.5, 0.5)
jaccard-100    (0.9, 0.5, 0.5)
dtype: object


In [104]:
pd.concat([df_res.loc[(*ind, slice(None)), :] for ind in set(idxmax)], axis=0)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,spearman,kendaltau,jaccard-10,jaccard-20,jaccard-30,jaccard-40,jaccard-50,jaccard-100
0.1,0.9,0.3,correlation,0.4592302,0.3896505,0.052632,0.081081,0.153846,0.230769,0.282051,0.290323
0.1,0.9,0.3,pvalue,1.34394e-31,4.378114e-29,,,,,,
0.1,0.5,0.5,correlation,0.4591889,0.3896194,0.052632,0.081081,0.153846,0.230769,0.282051,0.290323
0.1,0.5,0.5,pvalue,1.3628480000000001e-31,4.4288140000000004e-29,,,,,,
0.9,0.5,0.5,correlation,0.4565053,0.3871725,0.052632,0.081081,0.132075,0.230769,0.265823,0.290323
0.9,0.5,0.5,pvalue,3.366529e-31,9.744974000000001e-29,,,,,,
0.7,0.5,0.5,correlation,0.4571133,0.3876835,0.052632,0.081081,0.132075,0.230769,0.282051,0.290323
0.7,0.5,0.5,pvalue,2.744736e-31,8.269218e-29,,,,,,


In [None]:
pd.set_option('precision', 3)

In [119]:
df_cor = {tup: [avg_inf.loc[(t, *tup), n] for n, t in df_active_user_express[["id", "survey"]].values] for tup in tqdm(pab_done)}

df_cor = pd.DataFrame.from_dict(df_cor)

res_all = {tup: rank_metrics(df_active_user_express["mentions"], df_cor[tup].rank(ascending=False, pct=True, method="max")) for tup in tqdm(pab_done)}

df_res = pd.concat([pd.DataFrame(res_all[tup]) for tup in tqdm(pab_done)], keys=pab_done)

print(df_res.loc[(slice(None), slice(None), slice(None), "correlation")].max())

display(df_res)

idxmax = df_res.loc[(slice(None), slice(None), slice(None), "correlation")].idxmax()
print(idxmax)

display(pd.concat([df_res.loc[(*ind, slice(None)), :] for ind in set(idxmax)], axis=0))

HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=125.0), HTML(value='')))


spearman       0.483964
kendaltau      0.412216
jaccard-10     0.052632
jaccard-20     0.176471
jaccard-30     0.250000
jaccard-40     0.311475
jaccard-50     0.265823
jaccard-100    0.351351
dtype: float64


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,spearman,kendaltau,jaccard-10,jaccard-20,jaccard-30,jaccard-40,jaccard-50,jaccard-100
0.9,0.5,0.5,correlation,4.801218e-01,4.091248e-01,0.052632,0.142857,0.25,0.311475,0.265823,0.342282
0.9,0.5,0.5,pvalue,1.430763e-24,1.256218e-22,,,,,,
0.9,0.5,0.9,correlation,4.801423e-01,4.091536e-01,0.052632,0.142857,0.25,0.311475,0.265823,0.342282
0.9,0.5,0.9,pvalue,1.423407e-24,1.249522e-22,,,,,,
0.9,0.5,0.7,correlation,4.802175e-01,4.092150e-01,0.052632,0.142857,0.25,0.311475,0.265823,0.342282
...,...,...,...,...,...,...,...,...,...,...,...
0.1,0.1,0.7,pvalue,5.619062e-25,6.230646e-23,,,,,,
0.1,0.1,0.3,correlation,4.838854e-01,4.121455e-01,0.052632,0.176471,0.25,0.290323,0.265823,0.351351
0.1,0.1,0.3,pvalue,5.517314e-25,6.131071e-23,,,,,,
0.1,0.1,0.1,correlation,4.839158e-01,4.121712e-01,0.052632,0.176471,0.25,0.290323,0.265823,0.351351


spearman       (0.1, 0.9, 0.3)
kendaltau      (0.1, 0.9, 0.3)
jaccard-10     (0.9, 0.5, 0.5)
jaccard-20     (0.1, 0.5, 0.5)
jaccard-30     (0.9, 0.5, 0.5)
jaccard-40     (0.9, 0.5, 0.5)
jaccard-50     (0.9, 0.5, 0.5)
jaccard-100    (0.1, 0.5, 0.5)
dtype: object


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,spearman,kendaltau,jaccard-10,jaccard-20,jaccard-30,jaccard-40,jaccard-50,jaccard-100
0.1,0.9,0.3,correlation,0.4839637,0.4122163,0.052632,0.176471,0.25,0.290323,0.265823,0.351351
0.1,0.9,0.3,pvalue,5.408446e-25,6.033007000000001e-23,,,,,,
0.1,0.5,0.5,correlation,0.4838885,0.4121581,0.052632,0.176471,0.25,0.290323,0.265823,0.351351
0.1,0.5,0.5,pvalue,5.513035e-25,6.130940000000001e-23,,,,,,
0.9,0.5,0.5,correlation,0.4801218,0.4091248,0.052632,0.142857,0.25,0.311475,0.265823,0.342282
0.9,0.5,0.5,pvalue,1.430763e-24,1.2562180000000001e-22,,,,,,
