In [1]:
import csv
import collections

def classify_editor(bias, param):
    if bias > param:
        editor_bias = 'i'
    elif bias < param*-1:
        editor_bias = 'p'
    else:
        editor_bias = 'n'
        
    return editor_bias

In [2]:
# Build dictionary of article biases
sum_biases = {}
csvpath = '/home/michael/school/cprose_research/wp/wp_articles/ipc_article_biases.csv'

with open(csvpath, 'r') as csvfile:
    reader = csv.reader(csvfile)
    next(reader) # Skip first row
    for row in reader:
        username = row[1]
        avg_bias = float(row[2])
        if not username in sum_biases:
            sum_biases[username] = avg_bias
        else:
            sum_biases[username] += avg_bias
            
len(sum_biases.keys())

167119

In [4]:
# Build interactions dictionary
interactions = collections.defaultdict(list) # form editor:[(thread_starter, pos_neg), (previous_editor, pos_neg)]
threshold = 50

# Make sure arranged by article, in chronological order
talk_csvpath = '/home/michael/school/cprose_research/wp/wp-talk/talkpages_articles.csv'

with open(talk_csvpath, 'r') as talkcsv:
    reader = csv.reader(talkcsv)
    next(reader)
    prev_editor = None
    prev_editor_thread = None
    thread_starter = None
    talk_editors = []
    n_arcs = 0
    for row in reader:
        revision_id = row[0]
        thread = row[2]
        editor = row[3]
        if editor not in talk_editors:
            talk_editors.append(editor)
        if editor in sum_biases:
            bias = classify_editor(sum_biases[editor], threshold)
        else: # if an editor doesn't make any article edits default neutral
            bias = 'n'
        if prev_editor != editor:
            if prev_editor_thread == thread:
                
                # Relation to thread starter
                if editor != thread_starter:
                    if bias == thread_starter_bias or bias == 'n' or thread_starter_bias == 'n':
                        interactions[editor].append((thread_starter, 1))
                    else:
                        interactions[editor].append((thread_starter, -1))
                    n_arcs += 1
                    
                # Relation to previous poster
                if bias == prev_editor_bias or bias == 'n' or prev_editor_bias == 'n':
                    interactions[editor].append((prev_editor, 1))
                else:
                    interactions[editor].append((prev_editor, -1))
                n_arcs += 1
                    
            else:
                thread_starter = editor
                thread_starter_bias = bias
                
        prev_editor = editor
        prev_editor_thread = thread
        prev_editor_bias = bias
    
    n_nodes = len(talk_editors)
        
print("Number of nodes: ", n_nodes)
print("Number of arcs: ", n_arcs)

Number of nodes:  5719
Number of arcs:  28401


In [8]:
# Trim the graph
min_connections = 15
trimmed_interactions = {}
n_nodes = 0
n_arcs = 0

for editor in interactions.keys():
    if len(interactions[editor]) >= min_connections:
        trimmed_interactions[editor] = interactions[editor]
        n_nodes += 1
        n_arcs += len(interactions[editor])
        
print("Number of nodes: ", n_nodes)
print("Number of arcs: ", n_arcs)

Number of nodes:  310
Number of arcs:  19717


In [9]:
# Create mappings from editor to editor ID

editor_ids = collections.OrderedDict()
ctr = 1
for editor in list(talk_editors):
    editor_ids[editor] = ctr
    ctr += 1
len(editor_ids)

6252

In [11]:
talk_editors[4]

'Lihaas'

In [17]:
list(editor_ids.keys())[6]

'SmokeyJoe'

In [18]:
interactions['SmokeyJoe']

[('Fuhghettaboutit', -1), ('Fuhghettaboutit', -1)]

In [20]:
# Build alignment matrix for Yu+ 2015 from interactions dictionary
alignment_matrix = []
num_editors = 100
for i in range(num_editors):
    editor = talk_editors[i]
    editor_vec = []
    editor_vec.append(editor_ids[editor]) # add editor id
    for j in range(num_editors):
        other_editor = talk_editors[j]
        if (other_editor, 1) in interactions[editor]: # Not weighting connections
            editor_vec.append(1)
        elif (other_editor, -1) in interactions[editor]: # Not weighting connections
            editor_vec.append(-1)
        else:
            editor_vec.append("")
    alignment_matrix.append(editor_vec)

len(alignment_matrix)

100

In [24]:
# Write input file for Yu+ 2015 cluster code
out_filepath = '/home/michael/school/cprose_research/yu+_2015_killer/wp-talk_threshold50/wp-talk_threshold50_100editors.txt'
with open(out_filepath, 'w') as out:
    for row in alignment_matrix:
        out.write('\t'.join([str(item) for item in row]) + '\n')

In [6]:
# Write undirected DOT file
dotpath = '/home/michael/school/cprose_research/wp/wp-talk/talkpages_undirected.dot'

with open(dotpath, 'w') as dotfile:
    dotfile.write('graph g {\n node [shape="point"];\n')
    for key in interactions.keys():
        dotfile.write('"' + key + '" -- { ')
        for editor, polarity in interactions[key]:
            dotfile.write('"' + editor + '"; ') # no polarity yet
        dotfile.write('}\n')
    dotfile.write('}')

In [5]:
# Write directed DOT file
dotpath = '/home/michael/school/cprose_research/wp/wp-talk/talkpages.dot'

with open(dotpath, 'w') as dotfile:
    dotfile.write('digraph g {\n node [shape="point"];\n')
    for key in interactions.keys():
        dotfile.write('"' + key + '" -> { ')
        for editor, polarity in interactions[key]:
            dotfile.write('"' + editor + '"; ') # no polarity yet
        dotfile.write('}\n')
    dotfile.write('}')

In [52]:
# Build alignment matrix for Yu+ 2015
alignment_matrix = []
for i in range(num_editors):
    editor = list(editor_biases)[i]
    editor_bias = classify_editor(editor_biases[editor], threshold)
    #print(editor, "bias: ", editor_bias)
    editor_vec = []
    #editor_vec.append(editor) # add editor name
    editor_vec.append(editor_ids[editor]) # add editor id
    for j in range(num_editors):
        other_editor = list(editor_biases)[j]
        other_editor_bias = classify_editor(editor_biases[other_editor], threshold)
        #print("\t", other_editor, "bias: ", other_editor_bias)
        if editor_bias == 'i':
            if other_editor_bias == 'i':
                editor_vec.append(1)
            elif other_editor_bias == 'p':
                editor_vec.append(-1)
            else:
                editor_vec.append('')
        elif editor_bias == 'p':
            if other_editor_bias == 'p':
                editor_vec.append(1)
            elif other_editor_bias == 'i':
                editor_vec.append(-1)
            else:
                editor_vec.append('')
        elif editor_bias == 'n':
            editor_vec.append('')
        
    # Add in useless vote
    #editor_vec.append('none')
    
    alignment_matrix.append(editor_vec)

alignment_matrix

[[1, 1, -1, 1, 1, 1, -1, -1, -1, 1, -1],
 [2, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1],
 [3, 1, -1, 1, 1, 1, -1, -1, -1, 1, -1],
 [4, 1, -1, 1, 1, 1, -1, -1, -1, 1, -1],
 [5, 1, -1, 1, 1, 1, -1, -1, -1, 1, -1],
 [6, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1],
 [7, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1],
 [8, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1],
 [9, 1, -1, 1, 1, 1, -1, -1, -1, 1, -1],
 [10, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1]]

In [54]:
# Create editor mappings file
editor_ids_filepath = '/home/michael/school/cprose_research/yu+_2015_killer/wp-talk_editor_mappings.txt'
with open(editor_ids_filepath, 'w') as f:
    editor_id_keys = list(editor_ids.keys())
    for i in range(len(editor_biases)):
        editor = editor_id_keys[i]
        editor_id = editor_ids[editor]
        editor_bias = editor_biases[editor]
        f.write(str(editor_id) + '\t' + editor + '\t' + str(editor_bias) + '\n')