In [1]:
import pandas as pd
import math
import numpy as np
import statistics
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats
import researchpy as rp
import matplotlib.pyplot as plt


# Import the data from the spreadsheet
neutral_df = pd.read_excel('./neutral_com.xlsx')
neutral_df["Type"] = "Neutral"

inflam_df = pd.read_excel('./inflam_com.xlsx')
inflam_df["Type"] = "Inflam"

df = pd.concat([neutral_df, inflam_df])

In [2]:
neutral_df
neutral_stress = neutral_df['Stress'];
neutral_speech_concern = neutral_df['Speech_concern']
neutral_perception_concern = neutral_df['Perception_concern']
neutral_comfort = neutral_df['Comfort']
neutral_open_convo = neutral_df['Openness']
neutral_divulge = neutral_df['Divulgence']

neutral_anon_df = neutral_df.query("Group in ['Anon']")
neutral_direct_df = neutral_df.query("Group in ['Direct']")


neutral_anon_df.describe()
neutral_direct_df.describe()

neutral_df

Unnamed: 0,Timestamp,What is your study ID?,Group,Stress,Speech_concern,Perception_concern,Comfort,Openness,Divulgence,Type
0,2022-10-28 11:56:47.530,1,Direct,8,4,6,5,10,10,Neutral
1,2022-10-28 14:59:24.191,2,Direct,3,5,5,7,9,8,Neutral
2,2022-11-04 15:00:40.126,4,Direct,2,4,7,5,8,8,Neutral
3,2022-11-11 15:18:17.641,5,Anon,1,1,1,8,7,5,Neutral
4,2022-12-02 12:03:19.311,6,Anon,2,5,1,9,9,10,Neutral
5,2022-12-02 15:18:03.167,7,Anon,4,6,7,5,6,4,Neutral


In [6]:
inflam_df
inflam_stress = neutral_df['Stress'];
inflam_speech_concern = neutral_df['Speech_concern']
inflam_perception_concern = neutral_df['Perception_concern']
inflam_comfort = neutral_df['Comfort']
inflam_open_convo = neutral_df['Openness']
inflam_divulge = neutral_df['Divulgence']

inflam_anon_df = inflam_df.query("Group in ['Anon']")
inflam_direct_df = inflam_df.query("Group in ['Direct']")

inflam_anon_df.describe()
inflam_direct_df.describe()

df

Unnamed: 0,Timestamp,What is your study ID?,Group,Stress,Speech_concern,Perception_concern,Comfort,Openness,Divulgence,Type
0,2022-10-28 11:56:47.530,1,Direct,8,4,6,5,10,10,Neutral
1,2022-10-28 14:59:24.191,2,Direct,3,5,5,7,9,8,Neutral
2,2022-11-04 15:00:40.126,4,Direct,2,4,7,5,8,8,Neutral
3,2022-11-11 15:18:17.641,5,Anon,1,1,1,8,7,5,Neutral
4,2022-12-02 12:03:19.311,6,Anon,2,5,1,9,9,10,Neutral
5,2022-12-02 15:18:03.167,7,Anon,4,6,7,5,6,4,Neutral
0,2022-10-28 12:04:15.057,1,Direct,6,4,7,10,10,10,Inflam
1,2022-10-28 15:05:59.517,2,Direct,5,8,9,4,9,9,Inflam
2,2022-11-04 15:07:58.020,4,Direct,2,3,5,8,8,7,Inflam
3,2022-11-11 15:25:10.793,5,Anon,1,1,1,7,7,5,Inflam


In [None]:
# Two way ANOVA used for analysis of conversation data
# https://www.statology.org/two-way-anova-python/

# Group = Direct or Anon
# Type = Neutral or Inflam

# Example
# model = ols('height ~ C(water) + C(sun) + C(water):C(sun)', data=df).fit()

In [30]:
columns = ['Comfort', 'Stress', 'Openness', 'Speech_concern', 'Perception_concern', 'Divulgence']

df[columns].corr()

Unnamed: 0,Comfort,Stress,Openness,Speech_concern,Perception_concern,Divulgence
Comfort,1.0,-0.432025,0.413743,-0.350368,-0.522649,0.322299
Stress,-0.432025,1.0,0.20164,0.439555,0.652922,0.288815
Openness,0.413743,0.20164,1.0,0.127959,0.07679,0.933817
Speech_concern,-0.350368,0.439555,0.127959,1.0,0.72619,0.32196
Perception_concern,-0.522649,0.652922,0.07679,0.72619,1.0,0.185095
Divulgence,0.322299,0.288815,0.933817,0.32196,0.185095,1.0


In [5]:
# Impact of Type and group on stress perception

model = ols("Stress ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Group),10.083333,1.0,1.592105,0.24257
C(Type),0.083333,1.0,0.013158,0.911503
C(Group):C(Type),0.083333,1.0,0.013158,0.911503
Residual,50.666667,8.0,,


In [9]:
# Impact of Type and group on the participants concern over what they said

model = ols("Perception_concern ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Group),21.333333,1.0,2.909091,0.126475
C(Type),5.333333,1.0,0.727273,0.418567
C(Group):C(Type),0.333333,1.0,0.045455,0.836503
Residual,58.666667,8.0,,


In [None]:
# Impact of Type and group on the participants concern over how their comments were percieved

model = ols("Speech_concern ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

In [11]:
# Impact of Type and group on the participants perception of the openness of the conversation

model = ols("Openness ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Group),6.75,1.0,3.115385,0.115555
C(Type),0.083333,1.0,0.038462,0.849409
C(Group):C(Type),0.083333,1.0,0.038462,0.849409
Residual,17.333333,8.0,,


In [12]:
# Impact of Type and group on the participants perception of the comfort of the conversation

model = ols("Comfort ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Group),2.083333,1.0,0.390625,0.549378
C(Type),2.083333,1.0,0.390625,0.549378
C(Group):C(Type),2.083333,1.0,0.390625,0.549378
Residual,42.666667,8.0,,


In [23]:
# Impact of Type and group on the participants willingness to divulge information
print(df.count())
model = ols("Divulgence ~ C(Group) + C(Type) + C(Group):C(Type)", data=df).fit()
sm.stats.anova_lm(model, typ=2)

Timestamp                 12
What is your study ID?    12
Group                     12
Stress                    12
Speech_concern            12
Perception_concern        12
Comfort                   12
Openness                  12
Divulgence                12
Type                      12
dtype: int64


Unnamed: 0,sum_sq,df,F,PR(>F)
C(Group),12.0,1.0,2.285714,0.16902
C(Type),0.333333,1.0,0.063492,0.807412
C(Group):C(Type),0.333333,1.0,0.063492,0.807412
Residual,42.0,8.0,,


In [21]:
# Sanity Check
#create data
# test_df = pd.DataFrame({'water': np.repeat(['daily', 'weekly'], 15),
#                    'sun': np.tile(np.repeat(['low', 'med', 'high'], 5), 2),
#                    'height': [6, 6, 6, 5, 6, 5, 5, 6, 4, 5,
#                               6, 6, 7, 8, 7, 3, 4, 4, 4, 5,
#                               4, 4, 4, 4, 4, 5, 6, 6, 7, 8]})

# print(test_df.count())

# model = ols('height ~ C(water) + C(sun) + C(water):C(sun)', data=test_df).fit()
# sm.stats.anova_lm(model, typ=2)

water     30
sun       30
height    30
dtype: int64


Unnamed: 0,sum_sq,df,F,PR(>F)
C(water),8.533333,1.0,16.0,0.000527
C(sun),24.866667,2.0,23.3125,2e-06
C(water):C(sun),2.466667,2.0,2.3125,0.120667
Residual,12.8,24.0,,
