In [1]:
import numpy as np
import pandas as pd

True

## hypothesis
Vocal pitch influences the perception of dominance in a conversation. Specifically, individuals with lower vocal pitch are more likely to be perceived as dominant in a conversation, based on findings from Tsantani et al. (2016) in their study ‘Low Vocal Pitch Preference Drives First Impressions Irrespective of Context in Male Voices but Not in Female Voices.’ Their research indicates that low-pitched voices, particularly in males, are associated with dominance and trustworthiness.

In [11]:
# Creating a dataframe containing annotated dominance and fundamental frequency for each participant
!python parallel_pitch.py

true:  ['02', '04', '08', '10', '11', '13', '17', '18', '19', '20', '22', '23']
maybe:  ['07', '09']
false:  ['05', '14', '21']


# results

In [8]:
import pickle

df = pd.DataFrame()
with open("data/results/non_verbal_multi/pitch_results.pkl", "rb") as f:
    df = pickle.load(f)
    

In [13]:
# a comparison between pitch influence vs topical perplexity
ppxl = {
    "ppxl_P1": [1.639401, 1.607766, 1.326069, 1.454787, 1.421077, 1.613264, 1.498680, 1.510881, 1.353567,
             1.452495, 1.339124, 1.415388, 1.375989, 1.634085, 1.756079, np.nan, 1.296187],
    "ppxl_P2": [1.472807, 1.417201, 1.472962, 1.359300, 1.265324, 1.320702, 1.830283, 1.512922, 1.443114,
             1.514535, 1.214433, 1.657944, 1.516017, 1.214660, 1.506281, np.nan, 1.321923]
}
ppxl = pd.DataFrame(ppxl)

In [18]:
df["ppxl_P1"] = ppxl["ppxl_P1"]
df["ppxl_P2"] = ppxl["ppxl_P2"]
df["hypo_ppxl"] = (((df["ppxl_P1"] > df["ppxl_P2"]) & (df["dominance_P1"] > df["dominance_P2"]))) | \
                  (((df["ppxl_P1"] < df["ppxl_P2"]) & (df["dominance_P1"] < df["dominance_P2"])))

In [19]:
display(df)

Unnamed: 0,session,f0_P1,f0_P2,dominance_P1,dominance_P2,hypo,ppxl_P1,ppxl_P2,hypo_ppxl
0,2,95.991426,95.111992,2.0,3.4,true,1.639401,1.472807,False
1,4,96.021414,97.92961,2.0,1.6,true,1.607766,1.417201,True
2,5,91.887388,92.526656,2.0,3.8,false,1.326069,1.472962,True
3,7,74.769208,84.26931,3.4,3.4,maybe,1.454787,1.3593,False
4,8,94.41349,107.539095,2.8,2.4,true,1.421077,1.265324,True
5,9,70.34084,84.9398,3.6,3.6,maybe,1.613264,1.320702,False
6,10,99.151697,67.455212,2.6,2.8,true,1.49868,1.830283,True
7,11,69.548342,72.579713,3.6,1.4,true,1.510881,1.512922,False
8,13,118.198081,68.264357,1.4,2.4,true,1.353567,1.443114,True
9,14,89.32804,69.769168,3.6,3.4,false,1.452495,1.514535,False


In [6]:
import scipy
import plotly.express as px

cmp = 'algae'
def correlation_heatmap(y_cols, x_cols, full_data):
    '''
    Uses scipy.stats.spearmanr function
    Params:
    y_cols, x_cols: sets of column titles (strings)
    full_data: pandas dataframe that includes all columns listed in y_cols, x_cols
    Returns:
    corr: Spearman correlation coefficient matrix (y_cols = rows, x_cols = cols of matrix)
    fig_corr: annotated plotly heatmap of coefficients
    p: Spearman p-value matrix
    fig_p: annotated plotly heatmap of p-values
    '''
    cols = y_cols+x_cols
    all_correlations = scipy.stats.spearmanr(full_data[cols], nan_policy='omit')
    corr = all_correlations.statistic[:len(y_cols), -len(x_cols):]
    corr = pd.DataFrame(corr)
    corr.columns = x_cols
    corr.index = y_cols

    p = all_correlations.pvalue[:len(y_cols), -len(x_cols):]
    p = pd.DataFrame(p)
    p.columns = x_cols
    p.index = y_cols
    
    fig_corr = px.imshow(corr, text_auto=True, aspect='auto', color_continuous_scale='RdBu')
    fig_r2 = px.imshow(corr**2, text_auto=True, aspect='auto', color_continuous_scale='RdBu')
    fig_p = px.imshow(p, text_auto=True, aspect='auto', color_continuous_scale='gray_r')

    return corr, fig_corr, p, fig_p, fig_r2

def correlation(df, y_cols, x_cols):
        corr, fig_corr, p, fig_p, fig_r2 = correlation_heatmap(y_cols, x_cols, df)
        fig_corr.show()
        fig_p.show()
        fig_r2.show()
        return corr, fig_corr, p, fig_p, fig_r2




In [7]:
# Using corrected annotation
y_s = df[["dominance_P1", "dominance_P2", "f0_P1", "f0_P2", "ppxl_P1", "ppxl_P2"]]
X = df[["dominance_P1", "dominance_P2", "f0_P1", "f0_P2", "ppxl_P1", "ppxl_P2"]]
x_cols = list(X.keys())
y_cols = list(y_s.keys())
corr, fig_corr, p, fig_p, fig_r2 = correlation(df, x_cols, y_cols)