In [1]:
import pandas as pd
import os
import matplotlib.pyplot as pyplt
import seaborn as sns
sns.set()
import math

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

In [10]:
coord_dir = '../frenchdata'
output_file_name = 'coord_output_simple_multiply.csv'

In [11]:
nodeset_file_1 = 'French Hashtag-network-Agent x Agent.csv'
df_node_1 = pd.read_csv(os.path.join(coord_dir, nodeset_file_1))

In [12]:
nodeset_file_2 = 'French Mentions-network-Agent x Agent.csv'
df_node_2 = pd.read_csv(os.path.join(coord_dir, nodeset_file_2))

In [13]:
nodeset_file_3 = 'French URLs-network-Agent x Agent.csv'
df_node_3 = pd.read_csv(os.path.join(coord_dir, nodeset_file_3))

In [8]:
df_node_1.head()

Unnamed: 0,Source Node ID,Target Node ID,Link Value
0,rumbera1055,venezuelaadiar1,106.0
1,RumberaF,rumbera1055,118.0
2,RumberaF,venezuelaadiar1,90.0
3,RumberaF,RumberanetworkF,110.0
4,DoralNewsFL,rumbera1055,118.0


### Ranking Users

In [14]:
def get_link_total(df):
    link_value = 0
    for idx, row in df.iterrows():
        link_value += row['Link Value']
    
    return link_value

def get_unique_nodes(df):
    df_1_source = df['Source Node ID'].unique().tolist()
    df_1_target = df['Target Node ID'].unique().tolist()
    df_all_nodes_joined = df_1_source + df_1_target
    df_unique_nodes = list(set(df_all_nodes_joined))

    return df_unique_nodes
    
def get_user_counts(df_node_1, df_node_2, df_node_3):
    user_rank_arr = []
    
    df_1_unique_nodes = get_unique_nodes(df_node_1)
    df_2_unique_nodes = get_unique_nodes(df_node_2)
    df_3_unique_nodes = get_unique_nodes(df_node_3)
    
    all_users = df_1_unique_nodes + df_2_unique_nodes + df_3_unique_nodes
    all_unique_users = list(set(all_users))
    
    for user in all_unique_users:
        # Get number of coordination types they participate in 
        num_coordination = 0
        if user in df_1_unique_nodes:
            num_coordination += 1
        if user in df_2_unique_nodes:
            num_coordination += 1
        if user in df_3_unique_nodes:
            num_coordination += 1

        # Get strength of coordination - (CSI-UserPair)
        df_source_1 = df_node_1[(df_node_1['Source Node ID'] == user)]
        df_target_1 = df_node_1[(df_node_1['Source Node ID'] == user)]

        df_source_2 = df_node_2[(df_node_2['Source Node ID'] == user)]
        df_target_2 = df_node_2[(df_node_2['Source Node ID'] == user)]

        df_target_3 = df_node_3[(df_node_3['Source Node ID'] == user)]
        df_target_3 = df_node_3[(df_node_3['Source Node ID'] == user)]
        
        link_value_1 = get_link_total(df_source_1)
        link_value_1 += get_link_total(df_target_1)
        link_value_2 = get_link_total(df_source_2)
        link_value_2 += get_link_total(df_target_2)
        link_value_3 = get_link_total(df_target_3)
        link_value_3 += get_link_total(df_target_3)
        
        total_link_value = link_value_1 + link_value_2 + link_value_3 - num_coordination #CSI-UserPair
        
        user_obj = {'user': user, 'link_value_1': link_value_1, 'link_value_2': link_value_2, 'link_value_3': link_value_3,
                    'num_coordination': num_coordination, 'total_link_value': total_link_value}
        user_rank_arr.append(user_obj)
        
    user_rank_df = pd.DataFrame(user_rank_arr)
    return user_rank_df

In [15]:
user_rank_df = get_user_counts(df_node_1, df_node_2, df_node_3)

In [16]:
def normalize_column(df, col_name):
    col_name_normalized = col_name + 'normalized'
    df[col_name_normalized] = (df[col_name]-df[col_name].min())/(df[col_name].max()-df[col_name].min())
    
    return df

### Coordination Index Simple Multiply

In [17]:
user_rank_df = normalize_column(user_rank_df, 'total_link_value')
user_rank_df['total_link_valuenormalized'] = user_rank_df['total_link_valuenormalized'] + 1
user_rank_df['coord_index'] = user_rank_df['total_link_valuenormalized'] * user_rank_df['num_coordination']
user_rank_df.to_csv(os.path.join(coord_dir, output_file_name) , index=False)