In [2]:
import pickle
from math import *
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, '../../Utils')
from iohelper import *
from eval_helper import *
sys.path.insert(0, '../../SpEagle/featureExtractorPy')
from featureExtraction import *
import numpy as np
import copy

from SVD import *

dataset_name = 'YelpChi'
prefix = '../Yelp_Dataset/' + dataset_name + '/'
evasive_prefix = '../../Attack/' + dataset_name + '2' +'/'

metadata_filename = prefix + 'metadata.gz'

evasion_filename = evasive_prefix + 'priors/'+ 'Weak/' + 'new_spammer_new_business.pickle'


# read the graph and node priors
user_product_graph, product_user_graph = read_graph_data(metadata_filename)

feature_suspicious_filename = 'feature_configuration.txt'
review_feature_list = ['RD', 'EXT', 'EXT', 'DEV', 'ETF', 'ISR']
user_feature_list = ['MNR', 'PR', 'NR', 'avgRD', 'BST', 'ERD', 'ETG']
product_feature_list = ['MNR', 'PR', 'NR', 'avgRD', 'ERD', 'ETG']

# read the graph and node priors
feature_config = load_feature_config('../../SpEagle/', feature_suspicious_filename)
numerical_eps = 1e-5
user_review_potential = np.log(np.array([[1-numerical_eps, numerical_eps], [numerical_eps, 1 - numerical_eps]]))
eps = 0.1
review_product_potential = np.log(np.array([[1 - eps, eps], [eps, 1 - eps]]))
    
potentials = {'u_r': user_review_potential, 'r_u': user_review_potential,
            'r_p': review_product_potential, 'p_r': review_product_potential}

text_features = []
new_user_graph = {}
new_product_graph = {}

## add edges from evasions and camouflage

with open(evasion_filename, 'rb') as f:
    evasions = pickle.load(f)
    spammer_ids = evasions[0]   
    target_ids = evasions[1]
    new_edges = evasions[2]

for added_edge in new_edges:
    added_account = added_edge[0]
    target = added_edge[1]
    if added_account not in new_user_graph.keys():
        # a tuple of (product_id, rating, label, posting_date)
        new_user_graph[added_account] = [(target, 5, -1, '2012-06-01')]
    else:
        new_user_graph[added_account].append((target, 5, -1, '2012-06-01'))
    if target not in new_product_graph.keys():
        # a tuple of (user_id, rating, label, posting_date)
        new_product_graph[target] = [(added_account, 5, -1, '2012-06-01')]
    else:
        new_product_graph[target].append((added_account, 5, -1, '2012-06-01'))

feature_extractor = FeatureExtractor()
UserFeatures, ProdFeatures, ReviewFeatures = feature_extractor.construct_all_features(user_product_graph, prod_user_graph)
UserFeatures, ProdFeatures, ReviewFeatures = feature_extractor.update_all_features(user_product_graph, new_user_graph,
                                                                                   prod_user_graph,
                                                                                   new_product_graph, UserFeatures,
                                                                                   ProdFeatures, ReviewFeatures)
new_upriors = feature_extractor.calculateNodePriors(user_feature_list, UserFeatures, feature_config)
new_ppriors = feature_extractor.calculateNodePriors(product_feature_list, ProdFeatures, feature_config)
new_rpriors = feature_extractor.calculateNodePriors(review_feature_list, ReviewFeatures, feature_config)

user_priors = new_upriors
review_priors = new_rpriors
prod_priors = new_ppriors

#create ground truth
evasive_spams = {}
for added_edge in new_edges:
    added_account = added_edge[0]
    target = added_edge[1]
    if target not in evasive_spams.keys():
        evasive_spams[target] = [(added_account, 5, 1, '2012-06-01')]
    evasive_spams[target].append((added_account, 5, 1, '2012-06-01'))

user_ground_truth, review_ground_truth = create_evasion_ground_truth(user_product_graph, evasive_spams)

# add new edges into the original graph
for e in new_edges:
    u_id = str(e[0])
    p_id = str(e[1])
    user_product_graph[u_id].append((p_id, 5, 1, '2012-06-01'))
    prod_user_graph[p_id].append((u_id, 5, 1, '2012-06-01')) 

print('number of users = %d' % len(user_product_graph))
print('number of users = %d' % len(user_priors))
print('number of products = %d' % len(prod_priors))  

percent = 0.9 
    
#run SVD on user-product graph
model = SVD(user_product_graph, user_priors, prod_priors)
svd_output = model.run(percent)

#     print(len(svd_output[1,:]))
#     print(svd_output)

#evaluate the SVD based detection with SVM
#result contains the [userid, pred_probas], prediction is the binary pred result, y_ture is the true label vector
result, predictions, y_true = model.evaluate_SVD(svd_output, user_product_graph, user_priors, prod_priors, spammer_ids, percent)




number of users = 38063
number of products = 201
number of users = 38063
number of users = 38063
number of products = 201
[ 62.66578014   0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.           0.           0.           0.           0.           0.           0.
   0.        ]
number of positive 7739
number of negative 30324
number of all users 38063
AUC = 0.497467


  'precision', 'predicted', average, warn_for)
