# 性格特性と心象の関係

In [None]:
import os 
import glob
import pickle 
import statistics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr

with open('../../data/Hazumi_features/Hazumi1911_features.pkl', mode='rb') as f:
    SS_ternary, TS_ternary, sentiment, third_sentiment, persona, third_persona, text, audio,\
    visual, vid = pickle.load(f, encoding='utf-8')

def calc_p(df): 
    """
    p値の算出

    Args:
        df(DataFrame):説明

    """
    for i in range(5):
        x = df.iloc[:, 0].values 
        y = df.iloc[:, i+1].values 
        a, b = pearsonr(np.ravel(x), np.ravel(y))
        if b <= 0.05:
            print("=" * 45)
            print(f"--{df.columns[i+1]}--")
            print("相関係数:", a.round(4))
            print("p値:", b.round(4))
            print("=" * 45)



def calc_appearance_rate(X):
    """
    ユーザごとの低群、中群、高群の出現率を計算する

    Args:
        X():説明

    Returns:
        negative():説明
        neutral():説明
        positive():説明

    """
    negative = []
    neutral = []
    positive = []

    for user, labels in X.items():
        neg = 0
        neu = 0 
        pos = 0
        tot = 0
        for label in labels:
            if label == 0.0:
                neg += 1
            elif label == 1.0:
                neu += 1
            else:
                pos += 1
            tot += 1

        negative.append(neg/tot)
        neutral.append(neu/tot)
        positive.append(pos/tot)
    
    return negative, neutral, positive
    

def calc_persona(X):
    """
    性格特性スコアをリスト化する

    Args:
        X():

    Returns:
        ext():
        agr():
        con():
        neu():
        ope():
        
    """
    ext = []
    agr = []
    con = []
    neu = []
    ope = []

    for _, labels in X.items():
        ext.append(labels[0])
        agr.append(labels[1])
        con.append(labels[2])
        neu.append(labels[3])
        ope.append(labels[4])

    return ext, agr, con, neu, ope

def calc_sentiment(X):
    """ 
    心象スコアをリスト化する

    Args:
        X():

    Returns:
        res():
    """ 
    res = []

    for scores in X.values():
        res.append(statistics.mean(scores))

    return res

In [None]:
# 本人アノテーションによる心象スコア（離散）
negative, neutral, positive = calc_appearance_rate(SS_ternary)

# 第三者アノテーションによる心象スコア（離散）
third_neg, third_neu, third_pos = calc_appearance_rate(TS_ternary)

# 本人アノテーションによる心象スコア（連続）
sentiment = calc_sentiment(sentiment)

# 第三者アノテーションによる心象スコア（連続）
third_sentiment = calc_sentiment(third_sentiment)

    
# 本人アノテーションによる性格特性スコア（連続）
extraversion, agreauleness, conscientiousness, neuroticism, openness = calc_persona(persona)

# 第三者アノテーションによる性格特性スコア（連続）
third_extr, third_agre, third_cons, third_neur, third_open = calc_persona(third_persona)

### 心象（本人）と性格特性(本人）の相関

In [None]:
# 低群の出現割合と性格特性の相関
df1 = pd.DataFrame({"negative":negative,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----negative----')
print(df1.corr()['negative'])
calc_p(df1)

# 中群と性格特性の相関
df2 = pd.DataFrame({"neutral":neutral,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----neutral----')
print(df2.corr()['neutral'])
calc_p(df2)

# 高群と性格特性の相関
df3 = pd.DataFrame({"positive":positive,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----positive----')
print(df3.corr()['positive'])
calc_p(df3)

#　会話全体の心象スコアの平均と性格特性の相関
df4 = pd.DataFrame({"sentiment":sentiment,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----sentiment----')
print(df4.corr()['sentiment'])
calc_p(df4)

### 心象（本人）と性格特性(第三者）の相関

In [None]:
# 低群の出現率と性格特性の相関
df1 = pd.DataFrame({"negative":negative,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----negative----')
print(df1.corr()['negative'])
calc_p(df1)

# 中群の出現率と性格特性の相関
df2 = pd.DataFrame({"neutral":neutral,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----neutral----')
print(df2.corr()['neutral'])
calc_p(df2)

# 高群の出現率と性格特性の相関
df3 = pd.DataFrame({"positive":positive,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----positive----')
print(df3.corr()['positive'])
calc_p(df3)

#　会話全体の心象スコアの平均と性格特性の相関
df4 = pd.DataFrame({"sentiment":sentiment,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----sentiment----')
print(df4.corr()['sentiment'])
calc_p(df4)


### 心象（第三者）と性格特性(本人）の相関

In [None]:
# 低群の出現割合と性格特性の相関
df1 = pd.DataFrame({"third_neg":third_neg,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----third_neg----')
print(df1.corr()['third_neg'])
calc_p(df1)

# 中群と性格特性の相関
df2 = pd.DataFrame({"third_neu":third_neu,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----third_neu----')
print(df2.corr()['third_neu'])
calc_p(df2)

# 高群と性格特性の相関
df3 = pd.DataFrame({"third_pos":third_pos,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----third_pos----')
print(df3.corr()['third_pos'])
calc_p(df3)

#　会話全体の心象スコアの平均と性格特性の相関
df4 = pd.DataFrame({"third_sentiment":third_sentiment,
                   "extraversion":extraversion,
                   "agreauleness":agreauleness,
                   "conscientiousness":conscientiousness,
                   "neuroticism":neuroticism,
                   "openness":openness})
print('----third_sentiment----')
print(df4.corr()['third_sentiment'])
calc_p(df4)

### 心象（第三者）と性格特性(第三者）の相関

In [10]:
# 低群の出現率と性格特性の相関
df1 = pd.DataFrame({"third_neg":third_neg,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----third_neg----')
print(df1.corr()['third_neg'])
calc_p(df1)

# 中群の出現率と性格特性の相関
df2 = pd.DataFrame({"third_neu":third_neu,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----third_neu----')
print(df2.corr()['third_neu'])
calc_p(df2)

# 高群の出現率と性格特性の相関
df3 = pd.DataFrame({"third_pos":third_pos,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----third_pos----')
print(df3.corr()['third_pos'])
calc_p(df3)

#　会話全体の心象スコアの平均と性格特性の相関
df4 = pd.DataFrame({"third_sentiment":third_sentiment,
                   "third_extr":third_extr,
                   "third_agre":third_agre,
                   "third_cons":third_cons,
                   "third_neur":third_neur,
                   "third_open":third_open})
print('----third_sentiment----')
print(df4.corr()['third_sentiment'])
calc_p(df4)


----third_neg----
third_neg     1.000000
third_extr   -0.299452
third_agre   -0.379445
third_cons   -0.276074
third_neur    0.226542
third_open   -0.606646
Name: third_neg, dtype: float64
--third_open--
相関係数: -0.6066
p値: 0.001
----third_neu----
third_neu     1.000000
third_extr   -0.333940
third_agre   -0.116704
third_cons   -0.167545
third_neur    0.010674
third_open   -0.193808
Name: third_neu, dtype: float64
----third_pos----
third_pos     1.000000
third_extr    0.402641
third_agre    0.333159
third_cons    0.290137
third_neur   -0.164821
third_open    0.536819
Name: third_pos, dtype: float64
--third_extr--
相関係数: 0.4026
p値: 0.0414
--third_open--
相関係数: 0.5368
p値: 0.0047
----third_sentiment----
third_sentiment    1.000000
third_extr         0.396840
third_agre         0.397825
third_cons         0.324348
third_neur        -0.231814
third_open         0.478654
Name: third_sentiment, dtype: float64
--third_extr--
相関係数: 0.3968
p値: 0.0447
--third_agre--
相関係数: 0.3978
p値: 0.0441
--third_ope