In [1]:
import os
import sys
import numpy as np
import pandas as pd

import sklearn
import networkx as nx

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rc('text', usetex=True)

import time, datetime
import pickle

In [2]:
def load_data(data_name):
    data_list = ['alpha', 'amazon', 'epinions', 'otc']
    assert data_name in data_list
    network_df = pd.read_csv('../../rev2data/%s/%s_network.csv' %(data_name, data_name), header=None, names=['src', 'dest', 'rating', 'timestamp'], parse_dates=[3], infer_datetime_format=True)
    gt_df = pd.read_csv('../../rev2data/%s/%s_gt.csv' %(data_name, data_name), header=None, names=['id', 'label'])
    if data_name in ['alpha', 'amazon', 'epinions', 'otc']:
        network_df['timestamp'] = pd.to_datetime(network_df['timestamp'], unit='s')
    return network_df, gt_df

In [3]:
def get_attacker_df(data_name = 'alpha'):
    network_df, gt_df = load_data(data_name)
    attacker_list = gt_df.loc[gt_df['label'] == -1]['id'].tolist()
    print('attacker list {}: {}...'.format(len(attacker_list), attacker_list[:10]))
    attacker_df = network_df.loc[network_df['src'].isin(attacker_list)]
    return attacker_df

In [4]:
data_stats = {}
for data_name in ['alpha', 'amazon', 'epinions', 'otc']:
    attacker_df = get_attacker_df(data_name)
    data_stats[data_name] = attacker_df
    attacker_stats = attacker_df.groupby('src').size()
    defender_stats = attacker_df.groupby('dest').size()
    print('data: {}\n reviews: mean {}, std {}\n accounts: mean {}, std {}'.format(data_name, attacker_stats.mean(), attacker_stats.std(), defender_stats.mean(), defender_stats.std()))

attacker list 102: [338, 211, 177, 7380, 7590, 257, 255, 7457, 7599, 7598]...
data: alpha
 attacker: mean 19.313253012048193, std 40.79542289930042
 defender: mean 1.8278221208665906, std 1.789428986067001
attacker list 241: ['A1FEP94TP15RG0', 'A3HXME9ZRYK5P', 'A2RME4353Y911C', 'A2A5RQRIUHGVOM', 'A31MZR41SM9JU5', 'A18RIOMAB7DJQ', 'ABK32ED0M15MR', 'A3L9NGPTBJBQEL', 'A3EH0O05YNABWG', 'A3US1QSEMTLW18']...
data: amazon
 attacker: mean 9.04564315352697, std 8.601477083741491
 defender: mean 1.6666666666666667, std 1.615406568968179
attacker list 1013: [213885, 219050, 431231, 305277, 3036450692, 5301702532, 363189, 297788, 13477973892, 499406]...
data: epinions
 attacker: mean 68.07185628742515, std 237.41069038354846
 defender: mean 3.629050279329609, std 9.985174682532275
attacker list 180: [2712, 5172, 5171, 5170, 5177, 2717, 2962, 2718, 2657, 2096]...
data: otc
 attacker: mean 13.939130434782609, std 37.63333883964695
 defender: mean 1.8112994350282485, std 2.334932199750521
