In [73]:
import pickle 
import pandas as pd 
import wandb

api = wandb.Api()

with open('../../data/Hazumi_features/Hazumi1911_features.pkl', mode='rb') as f:
    SS, TS, SP, TP, text, audio, visual, bio, vid = pickle.load(f, encoding='utf-8')

def get_wandb_result(project_name):
    runs = api.runs(f"r-yanagimoto/{project_name}")

    summary_list, config_list, name_list = [], [], []
    for run in runs: 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(
            {k: v for k,v in run.config.items()
            if not k.startswith('_')})

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    runs_df = pd.DataFrame({
        "summary": summary_list,
        "config": config_list,
        "name": name_list
        })

    return runs_df

In [89]:
text_acc = {}
audio_acc = {}
visual_acc = {}
runs_df = get_wandb_result("third sentiment")
for _, run in runs_df.iterrows():
    uid = run["name"]
    modal = run["config"]["modal"]
    if "acc" in run["summary"].keys():
        acc = round(run["summary"]["acc"], 3)
        if modal == 't':
            text_acc[uid] = acc
        elif modal == 'a':
            audio_acc[uid] = acc
        else:
            visual_acc[uid] = acc


acc = {
    "text": text_acc,
    "audio": audio_acc,
    "visual": visual_acc
}

result_df = pd.DataFrame(acc)

personality = {}
for uid in vid:
    personality[uid] = SP[uid]

personality_df = pd.DataFrame.from_dict(personality, orient="index", columns=["E", "A", "C", "N", "O"])

df = pd.concat([result_df, personality_df], axis=1)

In [90]:
import itertools # 効率的なループ実行のためのイテレータ生成関数のインポート
import matplotlib.pyplot as plt # グラフ描画ライブラリ
import seaborn as sns # データの可視化を行うライブラリ
sns.set() # デフォルトスタイルにseabornを適用
from scipy.stats import pearsonr # 科学技術計算ライブラリ
import numpy as np # 数値計算を効率的に行うための拡張モジュール

for i, j in itertools.combinations(df, 2): #組み合わせでforループを実行
    x = df.loc[:,[i]].values
    y = df.loc[:,[j]].values
    a, b = pearsonr(np.ravel(x), np.ravel(y)) # リストを整形し相関係数:aとp値:bの計算
    if 0 != b.round(10) < 0.10: # p値 < 0.10
        print("=" * 45)
        print(i + "----" + j)
        print("相関係数:", a.round(4))
        print("p値:", b.round(4))

text----audio
相関係数: 0.7407
p値: 0.0
text----visual
相関係数: 0.5213
p値: 0.0063
text----E
相関係数: 0.3683
p値: 0.0641
audio----visual
相関係数: 0.6605
p値: 0.0002
audio----N
相関係数: -0.3311
p値: 0.0985
A----O
相関係数: -0.6577
p値: 0.0003
C----N
相関係数: -0.3376
p値: 0.0917
