In [1]:
import os
import csv
from scipy.stats import spearmanr

In [2]:
base_path = "../new-results/"
hyp_A_path = base_path + "hypothA/"
hyp_B_path = base_path + "hypothB/"
hyp_0_nice_path = base_path + "hypoth_0/Nice/"
hyp_0_hate_path = base_path + "hypoth_0/Hate/"

In [3]:
nice_subreddits = ["TheGirlSurvivalGuide", "ForeverAlone", "TwoXChromosomes", "depression", "Anxiety", "CasualConversation", "RandomKindness", "fountainpens", "knitting", "Buddhism", "ABraThatFits", "loseit", "history"]
hate_subreddits = ["Physical_Removal","dankmemes","PussyPass", "MensRights", "europeannationalism", "NationalSocialism", "WeissSturm", "Physical_Removal", "DebateAltRight", "WhiteRights", "TheDonald", "uncensorednews", "sjwhate"]

## Hypothesis 0

Let's look at how many times we get something significant, and also how many values we keep

In [13]:
can_retain = []
for file_set in [hyp_0_nice_path, hyp_0_hate_path]:
    print "FILE SET: ", file_set
    files = os.listdir(file_set)
    test_corr = []
    for fName in files:
        if ".csv" not in fName:
            continue
        itm = fName.split("_")
        subreddit = itm[0]
        feature = itm[1]

        with open(file_set+fName, "r") as csvfile:
            reader = csv.reader(csvfile)
            reader.next()
            for line in reader:
                subreddit = line[0]
                feature = line[1]
                accomm = float(line[2])
                p_value = float(line[4])
                retained = int(line[5]) - int(line[6])
        
                if p_value < 0.05 and retained > 500:
                    print subreddit, feature, retained   
                    test_corr.append([p_value, retained])
                    can_retain.append((subreddit, feature))
                
    #test to see if number retained correlated with accomm value     
    X = zip(*test_corr)[0]
    Y = zip(*test_corr)[1]
    print spearmanr(X, Y), len(X)
    print "\n \n \n"

FILE SET:  ../new-results/hypoth_0/Nice/
ABraThatFits ppron 559
ABraThatFits article 559
ABraThatFits conj 559
loseit pronoun 2609
loseit ppron 881
loseit prep 1423
loseit Dic 2609
ForeverAlone pronoun 2254
Buddhism pronoun 1310
Buddhism ppron 715
Buddhism article 1310
Buddhism prep 1091
TwoXChromosomes pronoun 3878
TwoXChromosomes ipron 2817
TwoXChromosomes prep 1604
TwoXChromosomes Dic 4043
CasualConversation ppron 620
CasualConversation Dic 2740
depression pronoun 673
depression ppron 673
depression Dic 673
history Dic 590
SpearmanrResult(correlation=-0.3250296152947949, pvalue=0.13995621497670302) 22

 
 

FILE SET:  ../new-results/hypoth_0/Hate/
DebateAltRight pronoun 2661
DebateAltRight ipron 604
DebateAltRight prep 707
The_Donald Dic 913
MensRights pronoun 864
MensRights ppron 798
MensRights prep 937
MensRights Dic 937
SpearmanrResult(correlation=0.16766767666785626, pvalue=0.69146535164673595) 8

 
 



## Hypothesis A

The amount of accommodation towards a user by users in general is not correlated with historical upvotes. 

Here we only use those occassions where we found that accommodation did exist, AND we retained enough user pairs per subreddit

In [5]:
files = os.listdir(hyp_A_path)
nice_feature_dict = {}
hate_feature_dict = {}

for fName in files:
    itm = fName.split("_")
    subreddit = itm[0]
    feature = itm[1]
    
    with open(hyp_A_path+fName, "rb") as csvfile:
        reader = csv.reader(csvfile)
        reader.next()
        values = list(reader)
        
    if subreddit in nice_subreddits:
        if (subreddit, feature) in can_retain:
            if feature not in nice_feature_dict:
                nice_feature_dict[feature] = []
            nice_feature_dict[feature].extend(values)
        
    elif subreddit in hate_subreddits:
        if (subreddit, feature) in can_retain:
            if feature not in hate_feature_dict:
                hate_feature_dict[feature] = []
            hate_feature_dict[feature].extend(values)
        
    else:
        print subreddit

### Hate

In [6]:
for feature in hate_feature_dict:
    X = zip(*hate_feature_dict[feature])[0]
    Y = zip(*hate_feature_dict[feature])[1]
    print feature, spearmanr(X, Y), len(X)

ppron SpearmanrResult(correlation=-0.019943564307792875, pvalue=0.62299628779880445) 610
differ SpearmanrResult(correlation=0.085931920708998535, pvalue=0.24611377864606962) 184
pronoun SpearmanrResult(correlation=-0.0059434308263881675, pvalue=0.84285736768067787) 1115
ipron SpearmanrResult(correlation=-0.074218815052095655, pvalue=0.13889721775875422) 399
Dic SpearmanrResult(correlation=0.0036764447294804488, pvalue=0.94033404130571707) 417
article SpearmanrResult(correlation=0.045410712540260537, pvalue=0.55770049042956149) 169
conj SpearmanrResult(correlation=0.045776229889798174, pvalue=0.55453093975365153) 169
prep SpearmanrResult(correlation=-0.059851015123887713, pvalue=0.11920979207336999) 679




### Nice

In [7]:
for feature in nice_feature_dict:
    X = zip(*nice_feature_dict[feature])[0]
    Y = zip(*nice_feature_dict[feature])[1]
    
    print feature, spearmanr(X, Y), len(X)

ppron SpearmanrResult(correlation=0.026146965145572954, pvalue=0.25463455190300244) 1900
pronoun SpearmanrResult(correlation=-0.032980465250315727, pvalue=0.028370641869235106) 4418
ipron SpearmanrResult(correlation=0.024244255241664948, pvalue=0.27609684825028663) 2020
Dic SpearmanrResult(correlation=-0.018112371062585658, pvalue=0.18976128913686305) 5243
article SpearmanrResult(correlation=0.078095741021916951, pvalue=0.088761320922540021) 476
conj SpearmanrResult(correlation=0.074662977427487079, pvalue=0.18626347998036952) 315
prep SpearmanrResult(correlation=-0.0025055022011575595, pvalue=0.90155324301987205) 2440


## Hypothesis B

The amount of accommodation between user pairs is not correlated to the pairs’ prior interactions

In [8]:
files = os.listdir(hyp_B_path)
nice_feature_dict = {}
hate_feature_dict = {}

for fName in files:
    if "Physical_Removal" in fName:
        itm = fName.split("_")
        subreddit = "Physical_Removal" 
        feature = itm[2]
    else:
        itm = fName.split("_")
        subreddit = itm[0]
        feature = itm[1]
    values = []
    with open(hyp_B_path+fName, "rb") as csvfile:
        reader = csv.reader(csvfile)
        reader.next()
        for line in reader:
            values.append(line[2:])
        
    if subreddit in nice_subreddits:
        if feature not in nice_feature_dict:
            nice_feature_dict[feature] = []
        nice_feature_dict[feature].extend(values)
        
    elif subreddit in hate_subreddits:
        if feature not in hate_feature_dict:
            hate_feature_dict[feature] = []
        hate_feature_dict[feature].extend(values)
        
    else:
        print subreddit
        

### Hate

In [9]:
for feature in hate_feature_dict:
    X = zip(*hate_feature_dict[feature])[0]
    Y = zip(*hate_feature_dict[feature])[1]
    print feature, spearmanr(X, Y), len(X)

ppron SpearmanrResult(correlation=-0.059045779835806445, pvalue=0.0078360828028494357) 2027
differ SpearmanrResult(correlation=0.042910844929661039, pvalue=0.24852159457285103) 725
tentat SpearmanrResult(correlation=0.05685771898341959, pvalue=0.2175802018823737) 472
pronoun SpearmanrResult(correlation=0.022101590886812853, pvalue=0.11803200669905906) 5003
i SpearmanrResult(correlation=-0.026858089173722448, pvalue=0.74091320460888488) 154
certain SpearmanrResult(correlation=0.10350312902653395, pvalue=0.19850832462886159) 156
discrep SpearmanrResult(correlation=0.060835872627910063, pvalue=0.49513798529782616) 128
we SpearmanrResult(correlation=-0.11627053425637858, pvalue=0.54808187246842932) 29
you SpearmanrResult(correlation=0.038892431789130016, pvalue=0.57333447537731375) 212
quant SpearmanrResult(correlation=-0.010918542177352423, pvalue=0.82959940413866007) 391
they SpearmanrResult(correlation=-0.093656097368635477, pvalue=0.45447557843948994) 66
ipron SpearmanrResult(correlati

### Nice

In [10]:
for feature in nice_feature_dict:
    X = zip(*nice_feature_dict[feature])[0]
    Y = zip(*nice_feature_dict[feature])[1]
    
    print feature, spearmanr(X, Y), len(X)

ppron SpearmanrResult(correlation=-0.03060893386244436, pvalue=0.034728258414860276) 4759
differ SpearmanrResult(correlation=0.059973736875852099, pvalue=0.081626962573456446) 844
tentat SpearmanrResult(correlation=-0.0020734303251507494, pvalue=0.94887010177534448) 959
pronoun SpearmanrResult(correlation=0.011413635694116988, pvalue=0.17697956873224946) 13994
i SpearmanrResult(correlation=-0.056199246082839632, pvalue=0.10015419494279018) 857
certain SpearmanrResult(correlation=-0.07823070042675867, pvalue=0.2511693607478267) 217
discrep SpearmanrResult(correlation=0.034641472104831705, pvalue=0.51175834594447855) 361
we SpearmanrResult(correlation=0.21962409342573727, pvalue=0.41376534557879097) 16
Dic SpearmanrResult(correlation=0.0095675370769240722, pvalue=0.20894995142952152) 17248
quant SpearmanrResult(correlation=-0.019624571883851527, pvalue=0.71922749743190473) 338
shehe SpearmanrResult(correlation=-0.10154882897828892, pvalue=0.65294926514858864) 22
they SpearmanrResult(corr