In [1]:
from scipy import stats

In [2]:
comm_sizes = [1,2,4]
seeds = [1,2,3,4,5]

## Game Success

Read in the Results

In [3]:
sizes = []
accuracies = []

with open('./Data/Results/accuracies.txt', 'r') as f:
    for i, line in enumerate(f):
        accs = line.split(';')[0]
        for acc in accs.split(','):
            accuracies.append(float(acc))
            sizes.append(comm_sizes[i])

print('The comm sizes: ', sizes)
print('And their accuracies: ', accuracies)

The comm sizes:  [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4]
And their accuracies:  [0.93856, 0.97503, 0.92774, 0.96633, 0.89976, 0.9125, 0.897315, 0.9189275, 0.906615, 0.9286025, 0.90751375, 0.89303625, 0.898134375, 0.896629375, 0.919498125]


Compute the Correlation Coefficient and P-Value

In [4]:
corr_coeff, p_value = stats.pearsonr(sizes, accuracies)
print('The correlation coefficient between the community size and the game success is: ', corr_coeff)
print('The respective p-value is: ', p_value)

print('\n \nWhen leaving out comm size 1:')
corr_coeff, p_value = stats.pearsonr(sizes[5:], accuracies[5:])
print('The correlation coefficient between the community size and the game success is: ', corr_coeff)
print('The respective p-value is: ', p_value)



The correlation coefficient between the community size and the game success is:  -0.606043532228729
The respective p-value is:  0.016630082857197206

 
When leaving out comm size 1:
The correlation coefficient between the community size and the game success is:  -0.4373192835663251
The respective p-value is:  0.20628101913403765


## Language Convergence

Read in Results

In [5]:
sizes = []
lang_conv_scores = []

with open('./Data/Results/conv_scores.txt', 'r') as f:
    for i, line in enumerate(f):
        conv_scores = line.split(';')[0]
        for conv_score in conv_scores.split(','):
            lang_conv_scores.append(float(conv_score))
            sizes.append(comm_sizes[i])

print('The comm sizes: ', sizes)
print('And their language convergence scores: ', lang_conv_scores)

The comm sizes:  [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4]
And their language convergence scores:  [1.0, 1.0, 1.0, 1.0, 1.0, 0.41692799999999997, 0.05441199999999999, 0.3771880000000001, 0.18587, 0.19354199999999996, 0.129448, 0.17345866666666668, 0.26265999999999995, 0.3855236666666667, 0.29656899999999997]


Compute the Correlation Coefficient and P-Value

In [6]:
corr_coeff, p_value = stats.pearsonr(sizes, lang_conv_scores)
print('The correlation coefficient between the community size and the language convergence is: ', corr_coeff)
print('The respective p-value is: ', p_value)

print('\n \nWhen leaving out comm size 1 (for this metric a rather theoretical score):')
corr_coeff, p_value = stats.pearsonr(sizes[5:], lang_conv_scores[5:])
print('The correlation coefficient between the community size and the language convergence is: ', corr_coeff)
print('The respective p-value is: ', p_value)


The correlation coefficient between the community size and the language convergence is:  -0.7281784767533492
The respective p-value is:  0.002082537838360411

 
When leaving out comm size 1 (for this metric a rather theoretical score):
The correlation coefficient between the community size and the language convergence is:  0.01725363989501008
The respective p-value is:  0.9622688961672926


## Topographic Similarity

Reading in Results

In [7]:
sizes = []
topographic_similarities = []

with open('./Data/Results/comm_top_sims.txt', 'r') as f:
    for i, line in enumerate(f):
        top_sims = line.split(';')[0]
        for top_sim in top_sims.split(','):
            topographic_similarities.append(float(top_sim))
            sizes.append(comm_sizes[i])

print('The comm sizes: ', sizes)
print('And their topographic similarities: ', topographic_similarities)

The comm sizes:  [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4]
And their topographic similarities:  [0.2599734489144281, 0.2848150025627295, 0.25690847286098495, 0.2547811642475429, 0.26189206766281786, 0.1963237534172415, 0.22691631601627626, 0.25955827232986983, 0.26029275049657774, 0.21167701033513026, 0.28097985521931546, 0.24388595961050957, 0.2645339923138001, 0.2730743490582893, 0.25215947965321917]


Compute the Correlation Coefficient and P-Value

In [8]:
corr_coeff, p_value = stats.pearsonr(sizes, topographic_similarities)
print('The correlation coefficient between the community size and the topographic similarity is: ', corr_coeff)
print('The respective p-value is: ', p_value)

print('\n \nWhen leaving out comm size 1:')
corr_coeff, p_value = stats.pearsonr(sizes[5:], topographic_similarities[5:])
print('The correlation coefficient between the community size and the topographic similarity is: ', corr_coeff)
print('The respective p-value is: ', p_value)

The correlation coefficient between the community size and the topographic similarity is:  0.1100749804572744
The respective p-value is:  0.6961491513717756

 
When leaving out comm size 1:
The correlation coefficient between the community size and the topographic similarity is:  0.616154768312512
The respective p-value is:  0.057840284591382074


## Concept Encoding

Reading in Results

In [9]:
sizes = []
concept_encoding_scores = []

with open('./Data/Results/concept_encoding_scores.txt', 'r') as f:
    for i, line in enumerate(f):
        accs = line.split(';')[0]
        for acc in accs.split(','):
            concept_encoding_scores.append(float(acc))
            sizes.append(comm_sizes[i])

print('The comm sizes: ', sizes)
print('And their concept encoding scores: ', concept_encoding_scores)

The comm sizes:  [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4]
And their concept encoding scores:  [0.007633333333333333, 0.0097, 0.004366666666666666, 0.0082, 0.004333333333333333, 0.0042833333333333334, 0.004366666666666666, 0.004583333333333333, 0.004016666666666667, 0.0043, 0.0044083333333333335, 0.004308333333333333, 0.004225, 0.00435, 0.0047083333333333335]


In [10]:
corr_coeff, p_value = stats.pearsonr(sizes, concept_encoding_scores)
print('The correlation coefficient between the community size and the topographic similarity is: ', corr_coeff)
print('The respective p-value is: ', p_value)

print('\n \nWhen leaving out comm size 1:')
corr_coeff, p_value = stats.pearsonr(sizes[5:], concept_encoding_scores[5:])
print('The correlation coefficient between the community size and the topographic similarity is: ', corr_coeff)
print('The respective p-value is: ', p_value)

The correlation coefficient between the community size and the topographic similarity is:  -0.5039463347413162
The respective p-value is:  0.055443031463381455

 
When leaving out comm size 1:
The correlation coefficient between the community size and the topographic similarity is:  0.25089426449705166
The respective p-value is:  0.4844313147705649
