In [1]:
import dask.dataframe as ddf
import pandas as pd
import json
import os
import numpy as np
import hashlib
import re
import pickle
import itertools

In [2]:
group_rv = pd.read_csv('result/TCM-group-revert-graph.csv', sep="\t",encoding='utf-16').set_index('Unnamed: 0')

In [3]:
group_rv.columns = group_rv.columns.map(int)

In [4]:
all_groups = list(set(list(group_rv.index) + list(group_rv.columns)))

In [5]:
all_possible_pairs = list(itertools.combinations(all_groups,2))

In [6]:
in_group = [(g,g) for g in all_groups]

In [7]:
all_possible_pairs = all_possible_pairs + in_group

In [8]:
df = pd.DataFrame(all_possible_pairs, columns =['group1', 'group2'])

In [9]:
def find_mutual_revert(d):
    g1 = d['group1']
    g2 = d['group2']
    r1 = 0
    r2 = 0
    if g1 in group_rv.index and g2 in group_rv.columns:
        r1 = group_rv.loc[g1][g2]
    if g1 in group_rv.columns and g2 in group_rv.index:
        r2 = group_rv[g1][g2]
    return r1 + r2

In [10]:
df['mutual_revert'] =  df.apply(find_mutual_revert,axis=1)

In [11]:
df = df.sort_values(['mutual_revert'],ascending=False)

In [12]:
def find_min_revert(d):
    g1 = d['group1']
    g2 = d['group2']
    r1 = 0
    r2 = 0
    if g1 in group_rv.index and g2 in group_rv.columns:
        r1 = group_rv.loc[g1][g2]
    if g1 in group_rv.columns and g2 in group_rv.index:
        r2 = group_rv[g1][g2]
    return min(r1,r2)

In [13]:
df['min_revert'] =  df.apply(find_min_revert,axis=1)

In [14]:
df_greater_than_0 = df[df['min_revert']>0]

In [15]:
df_greater_than_0 = df_greater_than_0.sort_values('min_revert',ascending = False)

In [16]:
df_greater_than_0.to_csv('result/TCM-mutual-and-min-revert.csv', index=False,sep="\t",encoding='utf-16')

# mutual revert for ranked articles

In [18]:
revert_info = pd.read_parquet('../../../intermediate-result/TCM/TCM-revert-user-index-info')

In [19]:
user_group = pd.read_csv('result/cluster-result-original-distance-0.2.csv', sep="\t",encoding='utf-16').set_index('contributor.username')

In [20]:
revert_info = revert_info.merge(user_group,left_on='user1',right_index=True).rename(columns={'group':'group1'})

In [21]:
revert_info = revert_info.merge(user_group,left_on='user2',right_index=True).rename(columns={'group':'group2'})

In [22]:
mutual_revert = df_greater_than_0

In [23]:
mutual_revert_info = revert_info[['page.title','group1','group2']]

In [24]:
# switch group1 and group2 if group1 is greater than group2
def switch_group(d):
    g1 = d['group1']
    g2 = d['group2']
    if g1 > g2:
        tmp = g1
        d['group1'] = g2
        d['group2'] = tmp
    return d

In [25]:
mutual_revert_info = mutual_revert_info.apply(switch_group,axis=1)

In [26]:
res = mutual_revert.merge(mutual_revert_info,on=['group1','group2'])

In [27]:
fin = res.groupby(['group1','group2','page.title']).agg({'page.title':'count'}).rename(columns={'page.title':'mutual_revert'})

In [28]:
fin.reset_index(inplace=True)

In [29]:
fin.to_csv('result/TCM-group-pair-article-mutual-revert-info.csv', index=False,sep="\t",encoding='utf-16')