In [4]:
import pandas as pd
import statistics as st
import scipy
from scipy.stats import chi2_contingency
import statsmodels.api as sm
import numpy as np
import scipy.stats as stats
from scipy.stats import mannwhitneyu
from scipy.stats import ranksums
import plotly.graph_objects as go

In [5]:
def rank(group1, group2, alt = 'two-sided', norm = False, dist = False):
    if dist: 
        print ('group_1 value counts:', group1.value_counts(normalize = norm).sort_index(),sep='\n' ) 
        print ('---')
        print ('group_2 value counts:', group2.value_counts(normalize = norm).sort_index(),sep='\n'  ) 
        print ('---')
    # Perform the Mann-Whitney U test
    statistic, p_value = mannwhitneyu(group1, group2, alternative = alt)
    print ('Mann-Whitney U statistic:', statistic, ' ----- p_value:', p_value)
    return statistic, p_value

## Chapter 1 Read DATA and clean

In [8]:
df = pd.read_excel('profiles-2_July+16,+2023_20.32.xlsx')

In [9]:
df

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,z5,z6,z7,z8,z9,z10,z11,z12,z13,participant
0,Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Recipient Last Name,...,z5,z6,z7,z8,z9,z10,z11,z12,z13,participant
1,2023-05-24 10:12:36,2023-05-24 10:30:24,0,194.27.149.203,100,1067,1,2023-05-24 10:30:24.993000,R_1nUuR1KqCGsWQVj,,...,a,d,m,b,i,h,j,f,l,
2,2023-05-24 10:20:11,2023-05-24 10:42:30,0,194.27.149.203,100,1339,1,2023-05-24 10:42:31.946000,R_TpJ4vkJAJQeTHJ7,,...,b,c,h,k,a,l,g,e,i,
3,2023-05-24 10:31:54,2023-05-24 10:48:38,0,194.27.149.203,100,1004,1,2023-05-24 10:48:39.617000,R_1Q4wPzBTdeQj4Zw,,...,i,b,g,l,a,h,c,f,e,
4,2023-05-24 10:18:16,2023-05-24 11:06:15,0,194.27.149.203,100,2878,1,2023-05-24 11:06:15.802000,R_svgMcUe2Vjv8XND,,...,j,i,a,e,h,d,c,g,k,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324,2023-07-15 13:04:24,2023-07-15 13:15:11,0,46.221.48.14,100,647,1,2023-07-15 13:15:12.597000,R_23eDxb0jEix0Qxy,,...,a,g,l,h,f,k,m,e,d,846803
325,2023-07-15 16:06:13,2023-07-15 16:24:28,0,78.174.202.98,100,1095,1,2023-07-15 16:24:29.727000,R_2PweJFSgVfKxp66,,...,g,f,c,d,e,h,l,m,j,663633
326,2023-07-15 17:14:56,2023-07-15 17:24:02,0,88.233.134.40,100,545,1,2023-07-15 17:24:02.749000,R_2zqDdqtyi2DNCI9,,...,e,a,j,b,l,k,h,i,d,199129
327,2023-07-15 18:33:40,2023-07-15 18:48:23,0,78.135.74.147,100,882,1,2023-07-15 18:48:23.864000,R_1ptjp4iz2Ym2p0k,,...,i,b,l,h,a,c,f,k,e,766787


In [10]:
columns = df.columns.to_list()

In [11]:
df = df.drop([0],axis=0)
df.head(5)

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,z5,z6,z7,z8,z9,z10,z11,z12,z13,participant
1,2023-05-24 10:12:36,2023-05-24 10:30:24,0,194.27.149.203,100,1067,1,2023-05-24 10:30:24.993000,R_1nUuR1KqCGsWQVj,,...,a,d,m,b,i,h,j,f,l,
2,2023-05-24 10:20:11,2023-05-24 10:42:30,0,194.27.149.203,100,1339,1,2023-05-24 10:42:31.946000,R_TpJ4vkJAJQeTHJ7,,...,b,c,h,k,a,l,g,e,i,
3,2023-05-24 10:31:54,2023-05-24 10:48:38,0,194.27.149.203,100,1004,1,2023-05-24 10:48:39.617000,R_1Q4wPzBTdeQj4Zw,,...,i,b,g,l,a,h,c,f,e,
4,2023-05-24 10:18:16,2023-05-24 11:06:15,0,194.27.149.203,100,2878,1,2023-05-24 11:06:15.802000,R_svgMcUe2Vjv8XND,,...,j,i,a,e,h,d,c,g,k,
5,2023-05-24 10:52:49,2023-05-24 11:30:33,0,194.27.149.203,100,2263,1,2023-05-24 11:30:34.639000,R_elAJ89aUk8bc2pr,,...,h,g,i,l,f,e,d,k,b,


In [12]:
# Participants randomly saw Q1 or Q1a where Q1a is the alternative of Q1 where order of ranking of two person changes.
df['Q1'] = df['Q1'].fillna(df['Q1A'])
df['Q2'] = df['Q2'].fillna(df['Q2A'])
df['Q3'] = df['Q3'].fillna(df['Q3A'])
df['Q4'] = df['Q4'].fillna(df['Q4A'])
df['Q5'] = df['Q5'].fillna(df['Q5A'])
df['Q6'] = df['Q6'].fillna(df['Q6A'])

In [None]:
for q in [ 'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6']:
    df[q] = df.apply(lambda x: 0 if x[q] == 2 else x[q], axis="columns")
# Now 1 corresponds to Fallback Bargaining winner and 0 corresponds to minimum spread winner.

In [54]:
for i in ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6']:
    print (df.value_counts(i))
    print (df.value_counts(i, normalize = True))
    print ('---')

Q1
1    255
0     73
dtype: int64
Q1
1    0.777439
0    0.222561
dtype: float64
---
Q2
1    238
0     90
dtype: int64
Q2
1    0.72561
0    0.27439
dtype: float64
---
Q3
1    271
0     57
dtype: int64
Q3
1    0.82622
0    0.17378
dtype: float64
---
Q4
1    273
0     55
dtype: int64
Q4
1    0.832317
0    0.167683
dtype: float64
---
Q5
1    275
0     53
dtype: int64
Q5
1    0.838415
0    0.161585
dtype: float64
---
Q6
1    266
0     62
dtype: int64
Q6
1    0.810976
0    0.189024
dtype: float64
---


In [15]:
#Qtotal corresponds to total number of seelcted Fallback Bargaining
df ['Qtotal'] = df['Q1'] + df['Q2'] + df['Q3'] + df['Q4'] + df['Q5'] + df['Q6']
df ['Qtotal'].value_counts()

6    173
5     56
4     33
3     25
2     22
0     10
1      9
Name: Qtotal, dtype: int64

In [16]:
df["kadin"] = df.apply(lambda x: 1 if x["gender"] == 3 else 0, axis="columns")
df["kadin"].value_counts()

1    183
0    145
Name: kadin, dtype: int64

## Chapter 2 Consistency

### Chapter 2.1 Consistency with test question (1 out of 6 questions repeated randomly to check consistency)

In [17]:
for q in [ 'Q1t', 'Q2t', 'Q3t', 'Q4t', 'Q5t', 'Q6t']:
    df[q] = df.apply(lambda x: 0 if x[q] == 2 else x[q], axis="columns")

In [43]:
for i in ['Q1t', 'Q2t', 'Q3t', 'Q4t', 'Q5t', 'Q6t']:
    print (df.value_counts(i, normalize = True))
df['Q1notcons'] = df['Q1t'] - df['Q1']
df['Q2notcons'] = df['Q2t'] - df['Q2']
df['Q3notcons'] = df['Q3t'] - df['Q3']
df['Q4notcons'] = df['Q4t'] - df['Q4']
df['Q5notcons'] = df['Q5t'] - df['Q5']
df['Q6notcons'] = df['Q6t'] - df['Q6']

Q1t
1.0    0.807018
0.0    0.192982
dtype: float64
Q2t
1.0    0.702128
0.0    0.297872
dtype: float64
Q3t
1.0    0.791667
0.0    0.208333
dtype: float64
Q4t
1.0    0.791045
0.0    0.208955
dtype: float64
Q5t
1.0    0.8
0.0    0.2
dtype: float64
Q6t
1.0    0.814815
0.0    0.185185
dtype: float64


In [44]:
df['Q2notcons'].value_counts()

 0.0    38
-1.0     6
 1.0     3
Name: Q2notcons, dtype: int64

In [32]:
df[['Q2t', 'Q2']].value_counts()

Q2t  Q2
1.0  1     30
0.0  0      8
     1      6
1.0  0      3
dtype: int64

In [24]:
df['cons'] = np.where((df['Q1notcons'] == 0) |  (df['Q2notcons'] == 0) |  (df['Q3notcons'] == 0) |  (df['Q4notcons'] == 0)
                      |  (df['Q5notcons'] == 0) | (df['Q6notcons'] == 0)  , 1, 0)

In [25]:
df['cons'].value_counts()

1    270
0     58
Name: cons, dtype: int64

In [26]:
df['FBcons'] = ( ((df['Q1notcons'] == 0) & (df['Q1'] == 1) )| ( (df['Q2notcons'] == 0) & (df['Q2'] == 1) ) | 
                ((df['Q3notcons'] == 0) & (df['Q3'] == 1) )| ( (df['Q4notcons'] == 0) & (df['Q4'] == 1) )  |
                ((df['Q5notcons'] == 0) & (df['Q5'] == 1) )| ( (df['Q6notcons'] == 0) & (df['Q6'] == 1) )
                )
for i in ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6']:
    print (df[df['FBcons'] == 1 ] [i].value_counts())

1    207
0     27
Name: Q1, dtype: int64
1    205
0     29
Name: Q2, dtype: int64
1    220
0     14
Name: Q3, dtype: int64
1    219
0     15
Name: Q4, dtype: int64
1    223
0     11
Name: Q5, dtype: int64
1    210
0     24
Name: Q6, dtype: int64


In [27]:
df['MScons'] = ( ((df['Q1notcons'] == 0) & (df['Q1'] == 0) )| ( (df['Q2notcons'] == 0) & (df['Q2'] == 0) ) | 
                ((df['Q3notcons'] == 0) & (df['Q3'] == 0) )| ( (df['Q4notcons'] == 0) & (df['Q4'] == 0) )  |
                ((df['Q5notcons'] == 0) & (df['Q5'] == 0) )| ( (df['Q6notcons'] == 0) & (df['Q6'] == 0) )
                )
for i in ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6']:
    print (df[df['MScons'] == 1 ] [i].value_counts())

0    20
1    16
Name: Q1, dtype: int64
0    28
1     8
Name: Q2, dtype: int64
0    23
1    13
Name: Q3, dtype: int64
0    22
1    14
Name: Q4, dtype: int64
0    23
1    13
Name: Q5, dtype: int64
0    27
1     9
Name: Q6, dtype: int64


In [34]:
df['MScons'].value_counts()

False    292
True      36
Name: MScons, dtype: int64

In [36]:
df['MSnotcons'] = ( ((df['Q1notcons'] == 1) & (df['Q1'] == 0) ) | ((df['Q1notcons'] == 1) & (df['Q1t'] == 0) ) | 
                   ( (df['Q2notcons'] == 1) & (df['Q2'] == 0) ) | ( (df['Q2notcons'] == 1) & (df['Q2t'] == 0) ) |
                   ((df['Q3notcons'] == 1) & (df['Q3'] == 0) ) | ((df['Q3notcons'] == 1) & (df['Q3t'] == 0) ) |
                   ( (df['Q4notcons'] == 1) & (df['Q4'] == 0) ) | ( (df['Q4notcons'] == 1) & (df['Q4t'] == 0) ) |
                   ((df['Q5notcons'] == 1) & (df['Q5'] == 0) ) | ((df['Q5notcons'] == 1) & (df['Q5t'] == 0) ) |
                   ( (df['Q6notcons'] == 1) & (df['Q6'] == 0) ) | ( (df['Q6notcons'] == 1) & (df['Q6t'] == 0) )
                   
                )
df['MSnotcons'].value_counts()

False    299
True      29
Name: MSnotcons, dtype: int64

In [44]:
#df[df['FBcons'] == 1 ] ['gender'].value_counts()
#df[df['MScons'] == 1 ] ['gender'].value_counts()
df[(df['cons'] == 1)] ['gender'].value_counts()

3    142
2    123
4      5
Name: gender, dtype: int64

In [37]:
group1 = df [df ['cons'] == 1]['kadin']
group2 = df [df ['cons'] == 0]['kadin']
(statistic, p_value) = rank(group1, group2)

Mann-Whitney U statistic: 6413.0  ----- p_value: 0.011966508923872667


In [38]:
consistency_rank = {}

In [39]:
consistency_rank['Test Question Consistency across GENDER'] = {'Statistic': statistic, 'p-value': p_value}

In [40]:
consistency_rank['Test Question Consistency across GENDER']['#consistent male'] = group1.value_counts().to_dict()[0]
consistency_rank['Test Question Consistency across GENDER']['#consistent female'] = group1.value_counts().to_dict()[1]
consistency_rank['Test Question Consistency across GENDER']['#inconsistent male'] = group2.value_counts().to_dict()[0]
consistency_rank['Test Question Consistency across GENDER']['#inconsistent female'] = group2.value_counts().to_dict()[1]


In [41]:
consistency_rank

{'Test Question Consistency across GENDER': {'Statistic': 6413.0,
  'p-value': 0.011966508923872667,
  '#consistent male': 128,
  '#consistent female': 142,
  '#inconsistent male': 17,
  '#inconsistent female': 41}}

In [51]:
df_cons = df[ df['cons'] == 1]

### Chapter 2.2 Consistency on a choice of a rule - FB or MS

In [18]:
df[((df['Qtotal'] == 6) | (df['Qtotal'] == 0) )] ['kadin'].value_counts()

0    94
1    89
Name: kadin, dtype: int64

In [19]:
df [ 'Extremeconsistence' ] =   ((df['Qtotal'] == 6) | (df['Qtotal'] == 0) ).astype(int)
df [ 'Extremeconsistence' ].value_counts()

1    183
0    145
Name: Extremeconsistence, dtype: int64

In [30]:
df[(df['Extremeconsistence'] == 1) & (df['cons'] == 1)]['gender'].value_counts()

2    89
3    84
4     4
Name: gender, dtype: int64

In [282]:
group1 = df [df [ 'Extremeconsistence' ] == 1]['kadin']
group2 = df [df [ 'Extremeconsistence' ] == 0]['kadin']
(statistic, p_value) = rank(group1, group2)

group_1 value counts:
0    94
1    89
Name: kadin, dtype: int64
---
group_2 value counts:
0    51
1    94
Name: kadin, dtype: int64
---
Mann-Whitney U statistic: 11119.0  ----- p_value: 0.003415350018971655


In [283]:
consistency_rank['Extreme Consistency (choosing only FB or MS) across GENDER'] = {'Statistic': statistic, 'p-value': p_value}

In [284]:
consistency_rank['Extreme Consistency (choosing only FB or MS) across GENDER'] ['#consistent male'] = group1.value_counts().to_dict()[0]
consistency_rank['Extreme Consistency (choosing only FB or MS) across GENDER'] ['#consistent female'] = group1.value_counts().to_dict()[1]
consistency_rank['Extreme Consistency (choosing only FB or MS) across GENDER'] ['#inconsistent male'] = group2.value_counts().to_dict()[0]
consistency_rank['Extreme Consistency (choosing only FB or MS) across GENDER'] ['#inconsistent female'] = group2.value_counts().to_dict()[1]


In [24]:
df [ 'mildconsistency' ] =   ((df['Qtotal'] >= 5) | (df['Qtotal'] <= 1) )
df [ 'mildconsistency' ] = df [ 'mildconsistency' ].astype(int)
group1 = df [df [ 'mildconsistency' ] == 1]['kadin']
group2 = df [df [ 'mildconsistency' ] == 0]['kadin']
(statistic, p_value) = rank(group1, group2)

Mann-Whitney U statistic: 8876.0  ----- p_value: 0.1000088369718129


In [33]:
df[(df['mildconsistency'] == 1) & (df['cons'] == 1)]['gender'].value_counts()

3    111
2    104
4      5
Name: gender, dtype: int64

In [462]:
df [df['cons'] == 1]['gender'].value_counts()

3    142
2    123
4      5
Name: gender, dtype: int64

In [461]:
df [((df [ 'MScons' ] == 1) & (df[ 'Extremeconsistence' ] == 1))]['gender'].value_counts()

3    6
2    2
4    1
Name: gender, dtype: int64

In [289]:
consistency_rank['Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER'] = {'Statistic': statistic, 'p-value': p_value}

In [290]:
consistency_rank['Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER'] ['#consistent male'] = group1.value_counts().to_dict()[0]
consistency_rank['Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER'] ['#consistent female'] = group1.value_counts().to_dict()[1]
consistency_rank['Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER'] ['#inconsistent male'] = group2.value_counts().to_dict()[0]
consistency_rank['Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER'] ['#inconsistent female'] = group2.value_counts().to_dict()[1]


### Chapter 2.3 Full Consistency - always choosing FB

In [202]:
df[df['Qtotal'] == 6 ] ['kadin'].value_counts()

0    91
1    82
Name: kadin, dtype: int64

In [293]:
group1 = df [df ['Qtotal'] == 6]['kadin']
group2 = df [df ['Qtotal'] < 6]['kadin']
(statistic, p_value) = rank(group1, group2)

group_1 value counts:
0    91
1    82
Name: kadin, dtype: int64
---
group_2 value counts:
0     54
1    101
Name: kadin, dtype: int64
---
Mann-Whitney U statistic: 11026.0  ----- p_value: 0.0012455764317700795


In [294]:
consistency_rank['Full Consistency (always choosing FB) across GENDER'] = {'Statistic': statistic, 'p-value': p_value}

In [295]:
consistency_rank['Full Consistency (always choosing FB) across GENDER'] ['#consistent male'] = group1.value_counts().to_dict()[0]
consistency_rank['Full Consistency (always choosing FB) across GENDER'] ['#consistent female'] = group1.value_counts().to_dict()[1]
consistency_rank['Full Consistency (always choosing FB) across GENDER'] ['#inconsistent male'] = group2.value_counts().to_dict()[0]
consistency_rank['Full Consistency (always choosing FB) across GENDER'] ['#inconsistent female'] = group2.value_counts().to_dict()[1]


In [296]:
pd.DataFrame.from_dict({(i): consistency_rank[i] 
                           for i in consistency_rank.keys() 
                           },
                       orient='index')

Unnamed: 0,Statistic,p-value,#consistent male,#consistent female,#inconsistent male,#inconsistent female
Test Question Consistency across GENDER,6413.0,0.011967,128,142,17,41
Extreme Consistency (choosing only FB or MS) across GENDER,11119.0,0.003415,94,89,51,94
Mild Consistency (choosing FB or MS at min 5/6 questions) across GENDER,8876.0,0.100009,116,132,29,51
Full Consistency (always choosing FB) across GENDER,11026.0,0.001246,91,82,54,101


## Chapter 3 Preliminary Ranksum tests to compare selection of FB and MS across questions

In [49]:
# GENERAL
# two sided
# Initialize a dictionary to store the test results

pairwise_results = {}

for i in [1,2,3,4,5,6]:
    for j in range(i + 1, 7):
        group1 = df['Q'+str(i)]
        group2 = df['Q'+str(j)]
        
        # Perform the Mann-Whitney U test
        statistic, p_value = mannwhitneyu(group1, group2, alternative='two-sided')
    
        
        # Store the results in the pairwise_results dictionary
        key = f"{group1.name} vs {group2.name}"
        pairwise_results[key] = {'statistic': statistic, 'p-value': p_value.round(3)}
pairwise_results
#for key, value in pairwise_results.items():
#    p_value = '{:.20f}'.format(float(value['p-value']))
#    value['p-value'] = p_value
#    print(key, value)
#    print()


{'Q1 vs Q2': {'statistic': 56580.0, 'p-value': 0.125},
 'Q1 vs Q3': {'statistic': 51168.0, 'p-value': 0.117},
 'Q1 vs Q4': {'statistic': 50840.0, 'p-value': 0.076},
 'Q1 vs Q5': {'statistic': 50512.0, 'p-value': 0.048},
 'Q1 vs Q6': {'statistic': 51988.0, 'p-value': 0.289},
 'Q2 vs Q3': {'statistic': 48380.0, 'p-value': 0.002},
 'Q2 vs Q4': {'statistic': 48052.0, 'p-value': 0.001},
 'Q2 vs Q5': {'statistic': 47724.0, 'p-value': 0.0},
 'Q2 vs Q6': {'statistic': 49200.0, 'p-value': 0.01},
 'Q3 vs Q4': {'statistic': 53464.0, 'p-value': 0.836},
 'Q3 vs Q5': {'statistic': 53136.0, 'p-value': 0.676},
 'Q3 vs Q6': {'statistic': 54612.0, 'p-value': 0.613},
 'Q4 vs Q5': {'statistic': 53464.0, 'p-value': 0.834},
 'Q4 vs Q6': {'statistic': 54940.0, 'p-value': 0.476},
 'Q5 vs Q6': {'statistic': 55268.0, 'p-value': 0.356}}

In [52]:
#CONSISTENT ONES (test question consistency)
# two sided
# Initialize a dictionary to store the test results

pairwise_results = {}


for i in [1,2,3,4,5,6]:
    for j in range(i + 1, 7):
        group1 = df_cons['Q'+str(i)]
        group2 = df_cons['Q'+str(j)]
        
        # Perform the Mann-Whitney U test
        statistic, p_value = mannwhitneyu(group1, group2, alternative='two-sided')
        
        # Store the results in the pairwise_results dictionary
        key = f"{group1.name} vs {group2.name}"
        pairwise_results[key] = {'Statistic': statistic, 'p-value': p_value.round(3)}

pairwise_results

{'Q1 vs Q2': {'Statistic': 37800.0, 'p-value': 0.276},
 'Q1 vs Q3': {'Statistic': 35100.0, 'p-value': 0.236},
 'Q1 vs Q4': {'Statistic': 35100.0, 'p-value': 0.236},
 'Q1 vs Q5': {'Statistic': 34695.0, 'p-value': 0.118},
 'Q1 vs Q6': {'Statistic': 36990.0, 'p-value': 0.656},
 'Q2 vs Q3': {'Statistic': 33750.0, 'p-value': 0.023},
 'Q2 vs Q4': {'Statistic': 33750.0, 'p-value': 0.023},
 'Q2 vs Q5': {'Statistic': 33345.0, 'p-value': 0.008},
 'Q2 vs Q6': {'Statistic': 35640.0, 'p-value': 0.519},
 'Q3 vs Q4': {'Statistic': 36450.0, 'p-value': 1.0},
 'Q3 vs Q5': {'Statistic': 36045.0, 'p-value': 0.703},
 'Q3 vs Q6': {'Statistic': 38340.0, 'p-value': 0.103},
 'Q4 vs Q5': {'Statistic': 36045.0, 'p-value': 0.703},
 'Q4 vs Q6': {'Statistic': 38340.0, 'p-value': 0.103},
 'Q5 vs Q6': {'Statistic': 38745.0, 'p-value': 0.045}}

In [211]:
pd.DataFrame.from_dict({(i): pairwise_results[i] 
                           for i in pairwise_results.keys() 
                           },
                       orient='index')

Unnamed: 0,Statistic,p-value
Q1 vs Q2,37800.0,0.275772
Q1 vs Q3,35100.0,0.235701
Q1 vs Q4,35100.0,0.235701
Q1 vs Q5,34695.0,0.117624
Q1 vs Q6,36990.0,0.655751
Q2 vs Q3,33750.0,0.023371
Q2 vs Q4,33750.0,0.023371
Q2 vs Q5,33345.0,0.00826
Q2 vs Q6,35640.0,0.51925
Q3 vs Q4,36450.0,1.0


## Chapter 4: previously posted questions

### Chapter 4.1: Effect of delta on MS

#delta is defined as follows: difference of position of (FB winner) in two profiles -  difference of position of (MS winner) in two profiles
deltas:
Q1: 1
Q2: 4
Q3: 1
Q4: 1
Q5: 1
Q6: 1

FB vs Borda
Q1: FB = Borda (Example 1: a is both Borda and FB winner)
Q2: FB = Borda (Example 31: a is both Borda and FB winner)
Q3: FB is in Borda (Example 61: a and b are Borda; b is FB winner)
Q4: FB = Borda (Example 91: d is both Borda and FB winner)
Q5: FB != Borda (Example 121: a Borda and d is FB winner)
Q6: FB = Borda (Example 151: a is both Borda and FB winner)

In [394]:
# GENERAL
# two sided
group1 = df['Q2']
group2 = df['Q1']
group2 = group2.append([df['Q3'], df['Q4'], df['Q5'],df['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)

group_1 value counts:
0     90
1    238
Name: Q2, dtype: int64
---
group_2 value counts:
0     300
1    1340
dtype: int64
---
Mann-Whitney U statistic: 244360.0  ----- p_value: 0.00014920007591068303


  group2 = group2.append([df['Q3'], df['Q4'], df['Q5'],df['Q6']])


In [395]:
hypotheses = {}

In [396]:
hypotheses['Does high δ favor MS? Q2 vs others'] = {'Statistic': statistic, 'p-value': p_value}

In [399]:
# CONSISTENT
# two sided
group1 = df_cons['Q2']
group2 = df_cons['Q1']
group2 = group2.append([df_cons['Q3'], df_cons['Q4'], df_cons['Q5'],df_cons['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)
hypotheses['Does high δ favor MS? Q2 vs others. CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0     57
1    213
Name: Q2, dtype: int64
---
group_2 value counts:
0     206
1    1144
dtype: int64
---
Mann-Whitney U statistic: 171585.0  ----- p_value: 0.017338296351637906


  group2 = group2.append([df_cons['Q3'], df_cons['Q4'], df_cons['Q5'],df_cons['Q6']])


In [400]:
# Q2 vs Borda winner FB's (keep Q5 out)
# two sided
group1 = df['Q2']
group2 = df['Q1']
group2 = group2.append([df['Q3'], df['Q4'], df['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)
hypotheses['Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out)'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0     90
1    238
Name: Q2, dtype: int64
---
group_2 value counts:
0     247
1    1065
dtype: int64
---
Mann-Whitney U statistic: 196636.0  ----- p_value: 0.0005569681909813311


  group2 = group2.append([df['Q3'], df['Q4'], df['Q6']])


In [401]:
# Q2 vs Borda winner FB's (keep Q5 out)
#CONSISTENT
# two sided
group1 = df_cons['Q2']
group2 = df_cons['Q1']
group2 = group2.append([df_cons['Q3'], df_cons['Q4'], df_cons['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)
hypotheses['Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out). CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0     57
1    213
Name: Q2, dtype: int64
---
group_2 value counts:
0    172
1    908
dtype: int64
---
Mann-Whitney U statistic: 138240.0  ----- p_value: 0.04239517997991249


  group2 = group2.append([df_cons['Q3'], df_cons['Q4'], df_cons['Q6']])


In [403]:
# Q2 vs non-Borda winner FB's (only  Q5)
# two sided
group1 = df['Q2']
group2 = df['Q5']
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)
hypotheses['Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5))'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0     90
1    238
Name: Q2, dtype: int64
---
group_2 value counts:
0     53
1    275
Name: Q5, dtype: int64
---
Mann-Whitney U statistic: 47724.0  ----- p_value: 0.0004724531206650899


In [404]:
# Q2 vs non-Borda winner FB's (only  Q5)
# CONSISTENT
# two sided
group1 = df_cons['Q2']
group2 = df_cons['Q5']
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2)
hypotheses['Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)). CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0     57
1    213
Name: Q2, dtype: int64
---
group_2 value counts:
0     34
1    236
Name: Q5, dtype: int64
---
Mann-Whitney U statistic: 33345.0  ----- p_value: 0.008260237199382596


### Chapter 4.1: Top ranked FBS

In [406]:
# FB is top at Q1 Q2 Q6 vs others
group1 = df['Q1']
group1 = group1.append([df['Q2'], df['Q6']])
group2 = df['Q3']
group2 = group2.append([df['Q4'], df['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less')
hypotheses['FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5)'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0    225
1    759
dtype: int64
---
group_2 value counts:
0    165
1    819
dtype: int64
---
Mann-Whitney U statistic: 454608.0  ----- p_value: 0.00034687841611440493


  group1 = group1.append([df['Q2'], df['Q6']])
  group2 = group2.append([df['Q4'], df['Q5']])


In [408]:
# FB is top at Q1 Q2 Q6 vs others
#CONSISTENT
group1 = df_cons['Q1']
group1 = group1.append([df_cons['Q2'], df_cons['Q6']])
group2 = df_cons['Q3']
group2 = group2.append([df_cons['Q4'], df_cons['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less')
hypotheses['FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5). CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}

group_1 value counts:
0    155
1    655
dtype: int64
---
group_2 value counts:
0    108
1    702
dtype: int64
---
Mann-Whitney U statistic: 309015.0  ----- p_value: 0.0007740879939709764


  group1 = group1.append([df_cons['Q2'], df_cons['Q6']])
  group2 = group2.append([df_cons['Q4'], df_cons['Q5']])


In [410]:
pd.DataFrame.from_dict({(i): hypotheses[i] 
                           for i in hypotheses.keys() 
                           },
                       orient='index')

Unnamed: 0,Statistic,p-value
Does high δ favor MS? Q2 vs others,244360.0,0.000149
Does high δ favor MS? Q2 vs others. CONSISTENT,171585.0,0.017338
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out),196636.0,0.000557
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out). CONSISTENT,138240.0,0.042395
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)),47724.0,0.000472
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)). CONSISTENT,33345.0,0.00826
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5),454608.0,0.000347
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5). CONSISTENT,309015.0,0.000774


### Chapter 4.2: Does the best position of the FB winner matter?

Best positions of FB winner
Q1: 1
Q2: 1
Q3: 2
Q4: 4
Q5: 4
Q6: 1

In [416]:
# Q1 Q2 Q6 vs Q3 
# two sided
group1 = df['Q1']
group2 = df['Q3']
group1 = group1.append([df['Q2'], df['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 )'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.228659
1    0.771341
dtype: float64
---
group_2 value counts:
0    0.17378
1    0.82622
Name: Q3, dtype: float64
---
Mann-Whitney U statistic: 152520.0  ----- p_value: 0.018110644311367174


  group1 = group1.append([df['Q2'], df['Q6']])


In [419]:
# Q1 Q2 Q6 vs Q3 
# Consistent
# two sided
group1 = df_cons['Q1']
group2 = df_cons['Q3']
group1 = group1.append([df_cons['Q2'], df_cons['Q6']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 ). Consistent'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.191358
1    0.808642
dtype: float64
---
group_2 value counts:
0    0.137037
1    0.862963
Name: Q3, dtype: float64
---
Mann-Whitney U statistic: 103410.0  ----- p_value: 0.021653662031926486


  group1 = group1.append([df_cons['Q2'], df_cons['Q6']])


In [422]:
# Q1 Q2 Q6 vs Q4 Q5 
# two sided
group1 = df['Q1']
group2 = df['Q4']
group1 = group1.append([df['Q2'], df['Q6']])
group2 = group2.append([df['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q4 Q5 )'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.228659
1    0.771341
dtype: float64
---
group_2 value counts:
0    0.164634
1    0.835366
dtype: float64
---
Mann-Whitney U statistic: 302088.0  ----- p_value: 0.0007982116160387253


  group1 = group1.append([df['Q2'], df['Q6']])
  group2 = group2.append([df['Q5']])


In [424]:
# Q1 Q2 Q6 vs Q4 Q5 
# Consistent
# two sided
group1 = df_cons['Q1']
group2 = df_cons['Q4']
group1 = group1.append([df_cons['Q2'], df_cons['Q6']])
group2 = group2.append([df_cons['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q4 Q5 ) CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.191358
1    0.808642
dtype: float64
---
group_2 value counts:
0    0.131481
1    0.868519
dtype: float64
---
Mann-Whitney U statistic: 205605.0  ----- p_value: 0.0019528470390977094


  group1 = group1.append([df_cons['Q2'], df_cons['Q6']])
  group2 = group2.append([df_cons['Q5']])


In [427]:
# Q3 vs Q4 Q5 
# two sided
group1 = df['Q3']
group2 = df['Q4']
group2 = group2.append([df['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q3 choose more MS than Q4 Q5 )'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.17378
1    0.82622
Name: Q3, dtype: float64
---
group_2 value counts:
0    0.164634
1    0.835366
dtype: float64
---
Mann-Whitney U statistic: 106600.0  ----- p_value: 0.3588006098398875


  group2 = group2.append([df['Q5']])


In [428]:
# Q3 vs Q4 Q5 
#CONSISTENT
# two sided
group1 = df_cons['Q3']
group2 = df_cons['Q4']
group2 = group2.append([df_cons['Q5']])
# Perform the Mann-Whitney U test
(statistic, p_value) = rank(group1, group2, 'less', True)
hypotheses['Does the best position of the FB winner matter? (Q3 choose more MS than Q4 Q5 ) CONSISTENT'] = {'Statistic': statistic, 'p-value': p_value}


group_1 value counts:
0    0.137037
1    0.862963
Name: Q3, dtype: float64
---
group_2 value counts:
0    0.131481
1    0.868519
dtype: float64
---
Mann-Whitney U statistic: 72495.0  ----- p_value: 0.4133801044366974


  group2 = group2.append([df_cons['Q5']])


In [430]:
pd.DataFrame.from_dict({(i): hypotheses[i] 
                           for i in hypotheses.keys() 
                           },
                       orient='index')

Unnamed: 0,Statistic,p-value
Does high δ favor MS? Q2 vs others,244360.0,0.000149
Does high δ favor MS? Q2 vs others. CONSISTENT,171585.0,0.017338
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out),196636.0,0.000557
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out). CONSISTENT,138240.0,0.042395
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)),47724.0,0.000472
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)). CONSISTENT,33345.0,0.00826
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5),454608.0,0.000347
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5). CONSISTENT,309015.0,0.000774
Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 ),152520.0,0.018111
Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 ). Consistent,103410.0,0.021654


### Chapter 4.3: Does showing profiles in favor of MS first favor MS choices in other profiles? 

If MS > FB profile is shown first then Qxa is not nan for all Qx's (x being 1 2 3 4 5 6) 

In [433]:
df[~df['Q1A'].isnull()]

Unnamed: 0,StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,RecipientLastName,...,Q3notcons,Q4notcons,Q5notcons,Q6notcons,cons,FBcons,MScons,kadin,Extremeconsistence,mildconsistency
3,2023-05-24 10:31:54,2023-05-24 10:48:38,0,194.27.149.203,100,1004,1,2023-05-24 10:48:39.617000,R_1Q4wPzBTdeQj4Zw,,...,0.0,,,,1,True,False,0,0,1
4,2023-05-24 10:18:16,2023-05-24 11:06:15,0,194.27.149.203,100,2878,1,2023-05-24 11:06:15.802000,R_svgMcUe2Vjv8XND,,...,,,,,1,True,False,1,0,0
5,2023-05-24 10:52:49,2023-05-24 11:30:33,0,194.27.149.203,100,2263,1,2023-05-24 11:30:34.639000,R_elAJ89aUk8bc2pr,,...,,0.0,,,1,True,False,0,1,1
8,2023-05-24 10:20:20,2023-05-24 11:40:46,0,194.27.149.203,100,4825,1,2023-05-24 11:40:47.219000,R_3adibz2rx1YskYF,,...,,,,,1,True,False,0,1,1
16,2023-05-24 11:34:43,2023-05-24 12:33:34,0,194.27.149.203,100,3531,1,2023-05-24 12:33:35.571000,R_2Yupl7qcAMDZwx4,,...,,,,,1,True,False,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,2023-07-14 20:35:49,2023-07-14 20:44:02,0,88.246.73.93,100,492,1,2023-07-14 20:44:02.727000,R_1kHzwkbWkkCsHES,,...,0.0,,,,1,True,False,1,0,1
315,2023-07-14 21:27:24,2023-07-14 21:38:55,0,78.180.62.248,100,691,1,2023-07-14 21:38:56.641000,R_2ruNqTwUibARVHo,,...,,,,,1,True,False,1,1,1
317,2023-07-14 22:29:55,2023-07-14 22:33:59,0,94.235.123.249,82,244,0,2023-07-14 23:34:08.785000,R_30bMGUzpCAifFtj,,...,,0.0,,,1,True,False,1,0,1
318,2023-07-14 22:22:35,2023-07-14 22:44:15,0,77.67.190.133,82,1299,0,2023-07-14 23:44:16.359000,R_rfMM5z2nCOKPqBr,,...,,,,0.0,1,True,False,1,1,1


In [441]:
for i in range( 1, 7):
    group1 = df[df['Q'+str(i) + 'A' ].isnull()] ['Q'+str(i)]
    group2 = df[~df['Q'+str(i) + 'A'].isnull()] ['Q'+str(i)]
    # Perform the Mann-Whitney U test
    print ('Q'+str(i))
    (statistic, p_value) = rank(group1, group2, 'two-sided', True)
    hypotheses['Does showing profiles in favor of MS first favor MS choices in other profiles? Q' + str(i)] = {'Statistic': statistic, 'p-value': p_value}


Q1
group_1 value counts:
0    0.254658
1    0.745342
Name: Q1, dtype: float64
---
group_2 value counts:
0    0.191617
1    0.808383
Name: Q1, dtype: float64
---
Mann-Whitney U statistic: 12596.0  ----- p_value: 0.17091932986115288
Q2
group_1 value counts:
0    0.305389
1    0.694611
Name: Q2, dtype: float64
---
group_2 value counts:
0    0.242236
1    0.757764
Name: Q2, dtype: float64
---
Mann-Whitney U statistic: 12594.5  ----- p_value: 0.2009963442942263
Q3
group_1 value counts:
0    0.144654
1    0.855346
Name: Q3, dtype: float64
---
group_2 value counts:
0    0.201183
1    0.798817
Name: Q3, dtype: float64
---
Mann-Whitney U statistic: 14195.0  ----- p_value: 0.17786630396849923
Q4
group_1 value counts:
0    0.174699
1    0.825301
Name: Q4, dtype: float64
---
group_2 value counts:
0    0.160494
1    0.839506
Name: Q4, dtype: float64
---
Mann-Whitney U statistic: 13255.0  ----- p_value: 0.7316991454068377
Q5
group_1 value counts:
0    0.162651
1    0.837349
Name: Q5, dtype: float64


In [443]:
pd.DataFrame.from_dict({(i): hypotheses[i] 
                           for i in hypotheses.keys() 
                           },
                       orient='index')

Unnamed: 0,Statistic,p-value
Does high δ favor MS? Q2 vs others,244360.0,0.000149
Does high δ favor MS? Q2 vs others. CONSISTENT,171585.0,0.017338
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out),196636.0,0.000557
Does high δ favor MS? Q2 vs Borda winner FB others (keep Q5 out). CONSISTENT,138240.0,0.042395
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)),47724.0,0.000472
Does high δ favor MS? Q2 vs nonBorda winner FBs (only Q5)). CONSISTENT,33345.0,0.00826
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5),454608.0,0.000347
FB winner is selected more if NOT top ranked (Q1 Q2 Q6 vs Q3 Q4 Q5). CONSISTENT,309015.0,0.000774
Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 ),152520.0,0.018111
Does the best position of the FB winner matter? (Q1 Q2 Q6 choose more MS than Q3 ). Consistent,103410.0,0.021654


## Chapter 5: Schwarz values

In [14]:
sch = ['Shwarz_1',
 'Shwarz_2',
 'Shwarz_3',
 'Shwarz_4',
 'Shwarz_5',
 'Shwarz_6',
 'Shwarz_7',
 'Shwarz_8',
 'Shwarz_9',
 'Shwarz_10',
 'Shwarz_11',
 'Shwarz_12',
 'Shwarz_13',
 'Shwarz_14',
 'Shwarz_15',
 'Shwarz_16',
 'Shwarz_17',
 'Shwarz_18',
 'Shwarz_19',
 'Shwarz_20']
for s in sch:
    print(df[s].value_counts())

5    113
4     93
2     42
6     38
3     27
1     15
Name: Shwarz_1, dtype: int64
1    116
4     86
2     49
5     40
3     30
6      7
Name: Shwarz_2, dtype: int64
5    165
6     89
4     58
3      9
2      5
1      2
Name: Shwarz_3, dtype: int64
6    132
5    102
4     49
3     23
2     15
1      7
Name: Shwarz_4, dtype: int64
5    127
6    102
4     66
3     21
1      6
2      6
Name: Shwarz_5, dtype: int64
4    111
5     86
6     52
3     48
2     20
1     11
Name: Shwarz_6, dtype: int64
5    106
6     98
4     86
3     26
2     10
1      2
Name: Shwarz_7, dtype: int64
6    119
5    108
4     62
3     32
2      6
1      1
Name: Shwarz_8, dtype: int64
6    96
5    93
4    87
3    32
2    15
1     5
Name: Shwarz_9, dtype: int64
6    96
5    86
4    75
3    39
2    23
1     9
Name: Shwarz_10, dtype: int64
4    83
5    78
6    69
3    51
2    30
1    17
Name: Shwarz_11, dtype: int64
2    87
4    80
3    76
1    55
5    22
6     8
Name: Shwarz_12, dtype: int64
5    152
6     95
4     6

In [23]:
df['schwarz_mean']= df[['Shwarz_1',
 'Shwarz_2',
 'Shwarz_3',
 'Shwarz_4',
 'Shwarz_5',
 'Shwarz_6',
 'Shwarz_7',
 'Shwarz_8',
 'Shwarz_9',
 'Shwarz_10',
 'Shwarz_11',
 'Shwarz_12',
 'Shwarz_13',
 'Shwarz_14',
 'Shwarz_15',
 'Shwarz_16',
 'Shwarz_17',
 'Shwarz_18',
 'Shwarz_19',
 'Shwarz_20']].sum(axis=1) / 20

In [24]:
df['schwarz_mean']

1      3.85
2      4.40
3      5.05
4      4.80
5      3.60
       ... 
324    3.90
325    4.55
326    5.30
327    4.40
328    4.70
Name: schwarz_mean, Length: 328, dtype: float64

In [25]:
df['comformity'] =  df[['Shwarz_1', 'Shwarz_11']].sum(axis=1) / 2
df['tradition'] =  df[['Shwarz_2', 'Shwarz_12']].sum(axis=1) / 2
df['benevolence'] =  df[['Shwarz_3', 'Shwarz_13']].sum(axis=1) / 2
df['universalism'] =  df[['Shwarz_4', 'Shwarz_14']].sum(axis=1) / 2
df['self_direction'] =  df[['Shwarz_5', 'Shwarz_15']].sum(axis=1) / 2
df['stimulation'] =  df[['Shwarz_6', 'Shwarz_16']].sum(axis=1) / 2
df['hedonism'] =  df[['Shwarz_7', 'Shwarz_17']].sum(axis=1) / 2
df['achievement'] =  df[['Shwarz_8', 'Shwarz_18']].sum(axis=1) / 2
df['power'] =  df[['Shwarz_9', 'Shwarz_19']].sum(axis=1) / 2
df['security'] =  df[['Shwarz_10', 'Shwarz_20']].sum(axis=1) / 2

In [28]:
sch_modules = ['comformity','tradition','benevolence','universalism','self_direction',
               'stimulation','hedonism','achievement','power','security'] 

In [51]:
sch_modules_n = ['comformity_n','tradition_n','benevolence_n','universalism_n','self_direction_n',
               'stimulation_n','hedonism_n','achievement_n','power_n','security_n'] 

In [30]:
for  i in sch_modules:
    df[i + '_n'] = df[i]/ df['schwarz_mean']

In [47]:
df_cons = df[ df['cons'] == 1]

In [54]:
for i in sch_modules_n:
    print ( i )
    group1 = df[ i ]
    group2 = df_cons[ i ]
    (statistic, p_value) = rank(group1, group2)

comformity_n
Mann-Whitney U statistic: 44917.0  ----- p_value: 0.762082256500941
tradition_n
Mann-Whitney U statistic: 44070.0  ----- p_value: 0.9206260260067763
benevolence_n
Mann-Whitney U statistic: 42870.0  ----- p_value: 0.5025748415122713
universalism_n
Mann-Whitney U statistic: 44266.5  ----- p_value: 0.9950663049317018
self_direction_n
Mann-Whitney U statistic: 43610.0  ----- p_value: 0.7501438235570844
stimulation_n
Mann-Whitney U statistic: 45028.0  ----- p_value: 0.7221757390506549
hedonism_n
Mann-Whitney U statistic: 43599.5  ----- p_value: 0.7463560443400614
achievement_n
Mann-Whitney U statistic: 44052.0  ----- p_value: 0.9138230915155081
power_n
Mann-Whitney U statistic: 44864.0  ----- p_value: 0.7813689729482873
security_n
Mann-Whitney U statistic: 45169.5  ----- p_value: 0.6724088797859469


In [55]:
df [ 'onlyFB' ] =   ((df['Qtotal'] == 6) ).astype(int)
df [ 'onlyFB' ].value_counts()

1    173
0    155
Name: onlyFB, dtype: int64

In [85]:
df_cons = df[ df['cons'] == 1]

In [112]:
for i in sch_modules_n:
    print ( i )
    group1 = df_cons [df_cons [ 'onlyFB' ] == 1][i]
    group2 = df_cons [df_cons [ 'onlyFB' ] == 0][i]
    (statistic, p_value) = rank(group1, group2, alt = 'greater')


comformity_n
Mann-Whitney U statistic: 8532.5  ----- p_value: 0.5230751787197565
tradition_n
Mann-Whitney U statistic: 8188.5  ----- p_value: 0.7293566298488641
benevolence_n
Mann-Whitney U statistic: 8725.5  ----- p_value: 0.4003657088136178
universalism_n
Mann-Whitney U statistic: 7469.0  ----- p_value: 0.9614340816139657
self_direction_n
Mann-Whitney U statistic: 8471.0  ----- p_value: 0.5622762851801916
stimulation_n
Mann-Whitney U statistic: 7781.5  ----- p_value: 0.8971010594823975
hedonism_n
Mann-Whitney U statistic: 8358.5  ----- p_value: 0.632169467744814
achievement_n
Mann-Whitney U statistic: 9977.0  ----- p_value: 0.01177251789650769
power_n
Mann-Whitney U statistic: 9868.0  ----- p_value: 0.018352568073023025
security_n
Mann-Whitney U statistic: 8208.5  ----- p_value: 0.7186131234788351


In [81]:
# Calculate mean and variance
mean_Qtotal = np.mean(df_cons['Qtotal'])
var_Qtotal = np.var(df_cons['Qtotal'])


# Observed and expected counts
observed_counts = np.bincount(df_cons['Qtotal'])
expected_counts = stats.poisson(mean_Qtotal).pmf(np.arange(len(observed_counts))) * len(df_cons['Qtotal'])

# Perform G-test (log-likelihood ratio test)
g, p_value, dof, expected = stats.chi2_contingency([observed_counts, expected_counts])

print(f"Mean: {mean_Qtotal}, Variance: {var_Qtotal}")
print(f"G-test - G: {g}, p-value: {p_value}")

Mean: 5.025925925925926, Variance: 2.558587105624139
G-test - G: 100.05622830085049, p-value: 2.442428627773659e-19


In [82]:
X = df_cons[['comformity_n','tradition_n','benevolence_n','universalism_n','self_direction_n',
          'stimulation_n','hedonism_n','achievement_n','power_n','security_n']]
y = df_cons['Qtotal']


# Fit the Negative Binomial regression model
nb_model = sm.GLM(y, X, family=sm.families.NegativeBinomial())
nb_results = nb_model.fit()

# Print the summary of the regression results
print(nb_results.summary())
print(result.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 Qtotal   No. Observations:                  270
Model:                            GLM   Df Residuals:                      260
Model Family:        NegativeBinomial   Df Model:                            9
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -730.72
Date:                Thu, 03 Aug 2023   Deviance:                       53.454
Time:                        20:51:03   Pearson chi2:                     22.9
No. Iterations:                     5   Pseudo R-squ. (CS):           0.003406
Covariance Type:            nonrobust                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
comformity_n         0.1138      0.341  

In [113]:
X = df_cons[['comformity_n','tradition_n','benevolence_n','universalism_n','self_direction_n',
          'stimulation_n','hedonism_n','achievement_n','power_n','security_n']]

y = df_cons ['onlyFB']

# Add a constant term to the independent variables (required for statsmodels)
# Fit the logistic regression model
logit_model = sm.Logit(y, X)
logit_results = logit_model.fit()

# Print the summary of the regression results
print(logit_results.summary())


Optimization terminated successfully.
         Current function value: 0.638208
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                 onlyFB   No. Observations:                  270
Model:                          Logit   Df Residuals:                      260
Method:                           MLE   Df Model:                            9
Date:                Thu, 03 Aug 2023   Pseudo R-squ.:                 0.03734
Time:                        21:21:31   Log-Likelihood:                -172.32
converged:                       True   LL-Null:                       -179.00
Covariance Type:            nonrobust   LLR p-value:                    0.1466
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
comformity_n        -0.1961      0.666     -0.294      0.769      -1.502       1.110
tradition_n

In [87]:
df_cons ['onlyFB'].value_counts()

1    168
0    102
Name: onlyFB, dtype: int64

In [66]:
nan_freq = y.isna().sum()
print("Variables with NaN values and their NaN frequency:")
print(nan_freq[nan_freq > 0])

Variables with NaN values and their NaN frequency:
[]


In [101]:
fig = go.Figure()
for s in sch_modules_n:
    for i in [0, 1]:
        fig.add_trace(go.Box(name= s + ' ' +'ONLY FB = ' + str(i) , y=df_cons[(df_cons["onlyFB"] == i)][s]))
        
            
#fig.update_traces(boxpoints='all', boxmean=True)
fig.update_traces( boxmean=True)

fig.update_layout(title_text='<b>Schwarz values distributions <b>')
    

In [106]:
df_cons[df_cons["onlyFB"] == 1]['universalism_n'].describe()

count    168.000000
mean       1.054560
std        0.220020
min        0.561798
25%        0.897436
50%        1.089109
75%        1.191770
max        1.666667
Name: universalism_n, dtype: float64

In [107]:
df_cons[df_cons["onlyFB"] == 0]['universalism_n'].describe()

count    102.000000
mean       1.100284
std        0.176562
min        0.526316
25%        1.000000
50%        1.111111
75%        1.206593
max        1.527778
Name: universalism_n, dtype: float64