# Simulating Data Poisoning Attacks against Twitter Recommender

This tutorial details experiments designed to simulate attacks against social network recommendation mechanisms, and evaluate their effectiveness. All experiments documented here follow the procedure outlined below:

1. Load a dataset of anonymized retweet interactions collected from actual Twitter data.
2. Train a collaborative filtering model on the loaded data.
3. Select a target account to be "amplified" such that it is recommended to a set of users who have interacted with a separate, high-profile user also in the dataset. We select 20 such users as a "control" set.
4. Implement recommendation logic based on cosine similarity of the vector representations of the trained model, and observe recommendations for the control set.
5. Select a set of "amplifier accounts" that have not interacted with either the target account or the high-profile account, and are not members of the control set.
6. For a number of different proposed sets of amplifier accounts and parameter choices, create a new dataset containing additional interactions between each selected amplifier account and both the target account and the high-profile account. In practise, this process involves appending two new rows per amplifier account - one adding a retweet count for the target account and another adding retweet count for the high-profile account.
7. Train a new model on the modified dataset.
8. Run both target-based and source-based recommendations for each member of the control group and record the number of times the target appeared in the top-n (3) recommendations.
9. Present and discuss the results.

In [None]:
!pip install -U fastai

In [None]:
# Note this is broken for pytorch 1.7.x, so please use pytorch 1.6
# pip install torch==1.6.0 torchvision==0.7.0

from sklearn.metrics.pairwise import cosine_similarity

from fastai.tabular.all import *
from fastai.collab import *

import networkx as nx
import community
import community.community_louvain as community_louvain

import pandas as pd
import numpy as np
import json
import os

import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

random.seed(1)

# Helper functions
def save_ratings(df, fn):
    with open(fn, "w") as f:
        f.write("Source,Target,Weight\n")
        for item in zip(df["Source"], df["Target"], df["Weight"]):
            s, t, w = item
            f.write(str(s)+","+str(t)+","+str(w)+"\n")

def key_with_max_value(d):  
     v = list(d.values())
     k = list(d.keys())
     return k[v.index(max(v))], max(v)

def key_with_min_value(d):  
     v = list(d.values())
     k = list(d.keys())
     return k[v.index(min(v))], min(v)

def get_most_similar(uid, matrix, max_matches):
    if uid >= len(matrix):
        return None
    all_matches = matrix[uid]
    top_matches = np.flip(np.argsort(all_matches))
    match_rating = [[top_matches[i], all_matches[top_matches[i]]] for i in range(max_matches)]
    return match_rating

def print_similar_to_targets(samples, t_matrix):
    for n in samples:
        print("Target: " + tid_name[n] + " similar to:")
        matches = get_most_similar(n, t_matrix, t_max_matches)
        if matches == None:
            return
        for item in matches[:10]:
            tid, rating = item
            print(tid_name[tid] + " " + "%.4f"%rating)
        print()

def print_similar_to_sources(samples, s_matrix):
    for n in samples:
        print("User: " + sid_name[n] + " similar to:")
        matches = get_most_similar(n, s_matrix, s_max_matches)
        if matches == None:
            return
        for item in matches[:10]:
            sid, rating = item
            print(sid_name[sid] + " " + "%.4f"%rating)
        print()
        
def print_target_similarity(t1, t2, t_matrix):
    sim = t_matrix[t1][t2]
    print(tid_name[t1] + " similarity to " + tid_name[t2] + ": " + "%.4f"%sim)

def make_nx_graph(inter):
    mapping = []
    names = set()
    for source, targets in inter.items():
        names.add(source)
        for target, count in targets.items():
            mapping.append((source, target, count))
            names.add(target)
    g=nx.Graph()
    g.add_weighted_edges_from(mapping)
    return g, names

def get_median(var, source_list):
    vals = []
    for s in source_list:
        if s in var:
            vals.append(var[s])
    return np.median(vals)

def get_mean(var, source_list):
    vals = []
    for s in source_list:
        if s in var:
            vals.append(var[s])
    return np.mean(vals)

def get_jaccard_median(inter, source_list, target):
    g, names = make_nx_graph(inter)
    pairs = []
    for s in source_list:
        if s in names:
            pairs.append((s, target))
    preds = nx.jaccard_coefficient(g, pairs)
    vals = []
    for s, t, p in preds:
        vals.append(p)
    return np.median(vals)

def get_jaccard_mean(inter, source_list, target):
    g, names = make_nx_graph(inter)
    pairs = []
    for s in source_list:
        if s in names:
            pairs.append((s, target))
    preds = nx.jaccard_coefficient(g, pairs)
    vals = []
    for s, t, p in preds:
        vals.append(p)
    return np.mean(vals)

def get_communities(inter):
    g, names = make_nx_graph(inter)
    communities = community_louvain.best_partition(g)

    clusters = {}
    for node, mod in communities.items():
        if mod not in clusters:
            clusters[mod] = []
        clusters[mod].append(node)
    return clusters

def get_mean_distance(inter, target, source_list):
    g, names = make_nx_graph(inter)

    distance_vals = []
    for source in source_list:
        if source in names:
            length = nx.shortest_path_length(g, source=target, target=source)
            distance_vals.append(length)
    return np.mean(distance_vals)

# Return a new poisoned dataframe
def get_poisoned_dataset(ratings, amplifier_candidates, num_amplifiers, rating_val, save_path):
    ratings2 = pd.DataFrame(ratings)
    # For base set measurements
    if num_amplifiers < 1 or rating_val < 1:
        return ratings2
    new_data = []
    amplifiers = random.sample(amplifier_candidates, num_amplifiers)

    for uid in amplifiers:
        new_data.append([uid, target_tid, rating_val])
        new_data.append([uid, high_profile_tid, rating_val])
    new_ratings_df = pd.DataFrame(new_data, columns=['Source', 'Target', 'Weight'])
    ratings2 = ratings2.append(new_ratings_df, ignore_index=True)
    
    # Save poisoned dataset for further inspection or visualization in gephi
    interactions2 = {}
    for item in zip(ratings2['Source'], ratings2['Target'], ratings2['Weight']):
        s, t, r = item
        sid_label = sid_name[s]
        tid_label = tid_name[t]
        if sid_label not in interactions2:
            interactions2[sid_label] = Counter()
        interactions2[sid_label][tid_label] += r
    with open(save_path, "w") as f:
        f.write("Source,Target,Weight\n")
        for s, tw in interactions2.items():
            for t, w in tw.items():
                f.write(str(s)+","+str(t)+","+str(w)+"\n")
    return ratings2

## Load and process dataset

In [None]:
!wget -q --show-progress https://github.com/r0zetta/collaborative_filtering/raw/main/US2020/anonymized_interactions.csv



In [None]:
# Load and prepare raw data
raw = pd.read_csv("anonymized_interactions.csv")

# Source ids (accounts that retweeted)
sid_name = {}
name_sid = {}
sid = 0
for name in raw['Source']:
    if name not in name_sid:
        name_sid[name] = sid
        sid_name[sid] = name
        sid += 1

# Target ids (accounts that received retweets)
tid_name = {}
name_tid = {}
tid = 0
for name in raw['Target']:
    if name not in name_tid:
        name_tid[name] = tid
        tid_name[tid] = name
        tid += 1

print("Number of retweeters: " + str(len(name_sid)))
print("Number of retweeted: " + str(len(name_tid)))
# Assemble ratings dataframe used to train the model
ratings = pd.DataFrame()
ratings['Source'] = [name_sid[x] for x in raw['Source']]
ratings['Target'] = [name_tid[x] for x in raw['Target']]
ratings['Weight'] = raw['Weight']
ratings.head()

Number of retweeters: 25137
Number of retweeted: 8405


Unnamed: 0,Source,Target,Weight
0,0,0,1
1,0,1,4
2,0,2,1
3,0,3,1
4,0,4,1


In [None]:
source_list = list(set(ratings['Source']))
target_list = list(set(ratings['Target']))
target_retweeted_by = {}
target_retweeted_count = {}
target_retweeters = Counter()
target_source_count = Counter()
source_retweeted = {}
source_retweets = Counter()
source_target_count = Counter()
interactions = {}
for item in zip(ratings['Source'], ratings['Target'], ratings['Weight']):
    s, t, r = item
    if sid_name[s] not in interactions:
        interactions[sid_name[s]] = Counter()
    interactions[sid_name[s]][tid_name[t]] += r
    source_retweets[s] += r
    if t not in target_retweeted_count:
        target_retweeted_count[t] = Counter()
    target_retweeted_count[t][s] = r
    if s not in source_retweeted:
        source_retweeted[s] = []
    if t not in source_retweeted[s]:
        source_retweeted[s].append(t)
        source_target_count[s] += 1
    if t not in target_retweeted_by:
        target_retweeted_by[t] = []
    if s not in target_retweeted_by[t]:
        target_retweeted_by[t].append(s)
        target_source_count[t] += 1
    target_retweeters[t] += 1
with open("labeled_ratings.csv", "w") as f:
    f.write("Source,Target,Weight\n")
    for s, tw in interactions.items():
        for t, w in tw.items():
            f.write(str(s)+","+str(t)+","+str(w)+"\n")
print("Number of sources: " + str(len(source_list)))
print("Number of targets: " + str(len(target_list)))
print("Total number of retweet interactions: " + str(sum(ratings['Weight'])))
print()
print("Targets with most retweets")
print("tid\tretweets")
for x, c in target_retweeters.most_common(10):
    print(tid_name[x] + "\t" + str(c))
print()
print("Targets with most unique sources retweeting them")
print("tid\tsources")
for x, c in target_source_count.most_common(10):
    print(tid_name[x] + "\t" + str(c))
print()
for x, c in target_retweeters.most_common(10):
    also_retweeted = Counter()
    for sid, tids in source_retweeted.items():
        if len(tids) > 1:
            if x in tids:
                for tid in tids:
                    if tid != x:
                        also_retweeted[tid] += 1
    msg = "Sources that retweeted " + tid_name[x]
    msg += " also retweeted " + str(len(also_retweeted)) + " other accounts."
    print(msg)
    for x, c in also_retweeted.most_common(10):
        print("Retweeted " + tid_name[x] + " " + str(c) + " times.")
    print("")

# People who retweeted x also retweeted y
print()
print("Sources that published the most retweets")
print("sid\tretweets")
for x, c in source_retweets.most_common(10):
    print(sid_name[x] + "\t" + str(c))
print()
print("Sources that retweeted the most unique targets")
print("sid\ttargets")
for x, c in source_target_count.most_common(10):
    print(sid_name[x] + "\t" + str(c))
    
communities = get_communities(interactions)
community_sids = {}
community_sizes = Counter()
for mod, names in communities.items():
    community_sizes[mod] = len(names)
print(len(communities))
print(community_sizes)

Number of sources: 25137
Number of targets: 8405
Total number of retweet interactions: 95893

Targets with most retweets
tid	retweets
user_025343	5477
user_000009	3480
user_020135	2350
user_001418	2323
user_001690	1708
user_000755	1100
user_004286	1067
user_025303	788
user_004000	750
user_000596	638

Targets with most unique sources retweeting them
tid	sources
user_025343	5477
user_000009	3480
user_020135	2350
user_001418	2323
user_001690	1708
user_000755	1100
user_004286	1067
user_025303	788
user_004000	750
user_000596	638

Sources that retweeted user_025343 also retweeted 873 other accounts.
Retweeted user_000009 195 times.
Retweeted user_001418 116 times.
Retweeted user_020135 105 times.
Retweeted user_000755 85 times.
Retweeted user_004286 62 times.
Retweeted user_000596 52 times.
Retweeted user_001690 52 times.
Retweeted user_000107 46 times.
Retweeted user_004000 30 times.
Retweeted user_000340 27 times.

Sources that retweeted user_000009 also retweeted 1992 other accounts.
Retw

## Choose accounts for poisoning experiment

In [None]:
# Here a target to be boosted and a high-profile account that the target should be seen as similar are picked
# high_profile_tid was chosen from the original labeled dataset.
# It is a high-profile Twitter account that gets a lot of engagement
high_profile_tid = 191
high_profile_name = tid_name[high_profile_tid]
high_profile_sid = None
if high_profile_name in name_sid:
    high_profile_sid = name_sid[high_profile_name]
print("high_profile_tid: " + str(high_profile_tid) + " == " + tid_name[high_profile_tid])
print("high_profile_sid: " + str(high_profile_sid))
# The target account was selected based on a few criteria:
# - it is highly retweeted in the original dataset (top 10)
# - the original dataset contains plenty of accounts that haven't retweeted it and high_profile_tid
#   (thus enabling us to create a large number of amplifier candidates below)
target_tid = 4451
target_name = tid_name[target_tid]
target_sid = None
if target_name in name_sid:
    target_sid = name_sid[target_name]
print("target_tid: " + str(target_tid) + " == " + tid_name[target_tid])
print("target_sid: " + str(target_sid))
# Feel free to change these values for other experiments

# Pick a list of accounts that engaged with the high profile account in order to compare
# similarity values before and after poisoning
num_controls = 20
control_candidates = []
for sid, tids in source_retweeted.items():
    if len(tids) > 50:
        if high_profile_tid in tids:
            control_candidates.append(sid)
print("Candidates for control accounts: " + str(len(control_candidates)))
controls = random.sample(control_candidates, num_controls)
# For consistency's sake, here's a hard-coded list of control candidates 
# (selected by running the above code once)
# controls = [229, 6266, 340, 124, 25, 4000, 89, 4347, 1947, 20144, 14, 22, 107, 13426, 237, 708, 1560, 62, 9, 11]
print(controls)

high_profile_tid: 191 == user_025303
high_profile_sid: None
target_tid: 4451 == user_004286
target_sid: 4286
Candidates for control accounts: 53
[41, 1399, 6266, 14, 128, 25, 708, 408, 614, 1560, 280, 89, 22, 4286, 3, 1608, 350, 0, 4000, 130]


In [None]:
# Find accounts that engaged with both high_profile and target
retweeted_target = target_retweeted_by[target_tid]
print("Number of accounts that retweeted target:")
print(len(retweeted_target))
retweeted_high_profile = target_retweeted_by[high_profile_tid]
print("Number of accounts that retweeted high-profile:")
print(len(retweeted_high_profile))
retweeted_both = set(retweeted_target).intersection(set(retweeted_high_profile))
print("Number of accounts that retweeted both:")
print(len(retweeted_both))

Number of accounts that retweeted target:
1067
Number of accounts that retweeted high-profile:
788
Number of accounts that retweeted both:
13


In [None]:
# Feature analysis of communities 

retweeted_target = target_retweeted_by[target_tid]
retweeted_target_count = target_retweeted_count[target_tid]
retweeted_high_profile = target_retweeted_by[high_profile_tid]
retweeted_high_profile_count = target_retweeted_count[high_profile_tid]

G, N = make_nx_graph(interactions)
pr = nx.pagerank(G)

num_amplifiers = 200
community_features = {}
for mod, names in communities.items():
    amplifier_candidates = []
    for name in names:
        if name in name_sid:
            sid = name_sid[name]
            if sid in source_retweeted:
                rtw = source_retweeted[sid]
                if high_profile_tid not in rtw and target_tid not in rtw:
                    amplifier_candidates.append(sid)
    if len(amplifier_candidates) < num_amplifiers:
        continue
    print("Mod: " + str(mod) + " size: " + str(len(names)))
    community_features[mod] = {}
    community_features[mod]["Community size"] = len(names)
    
    community_total_retweets = 0
    for n in names:
        if n in name_sid:
            community_total_retweets += source_retweets[name_sid[n]]
    community_features[mod]["Total retweets"] = community_total_retweets

    mpr = get_mean(pr, names)
    community_features[mod]['Mean pagerank * 10e5'] = mpr * 100000

    jaccard = get_jaccard_mean(interactions, names, target_name)
    mod_name = 'Mean Jaccard coefficient between accounts in community and target * 10e4'
    community_features[mod][mod_name] = jaccard * 10000

    sid_list = [name_sid[x] for x in names if x in name_sid]
    rtw_target_sids = set(retweeted_target).intersection(set(sid_list))
    community_features[mod]['Unique accounts in community that retweeted target'] = len(rtw_target_sids)
    rtw_target_count = 0
    for sid, count in retweeted_target_count.items():
        if sid in rtw_target_sids:
            rtw_target_count += count
    community_features[mod]['Total retweets of target'] = rtw_target_count
    rhps = set(retweeted_high_profile).intersection(set(sid_list))
    community_features[mod]['Unique accounts in community that retweeted high-profile'] = len(rhps)
    rhpc = 0
    for sid, count in retweeted_high_profile_count.items():
        if sid in rhps:
            rhpc += count
    community_features[mod]['Total retweets of high-profile'] = rhpc
    community_retweet_counts = [source_retweets[x] for x in sid_list]
    community_features[mod]['Mean retweets per account'] = np.mean(community_retweet_counts)
    community_features[mod]['Max retweet count'] = max(community_retweet_counts)
    
    controls_in_mod = set(sid_list).intersection(set(controls))
    community_features[mod]['Number of control accounts'] = len(controls_in_mod)
        
    rtw_controls_sids = 0
    rtw_controls_count = 0
    for sid in controls:
        if sid in target_retweeted_count:
            rcl = target_retweeted_count[sid]
            for s, c in rcl.items():
                if s in sid_list:
                    rtw_controls_sids += 1
                    rtw_controls_count += c
    community_features[mod]['Accounts in this community that retweeted control accounts'] = rtw_controls_sids
    community_features[mod]['Total control account retweets published by this community'] = rtw_controls_count
    
    target_mean_path_len = get_mean_distance(interactions, target_name, names)
    community_features[mod]['Mean path length between community nodes and target'] = target_mean_path_len

print(json.dumps(community_features, indent=4))

Mod: 5 size: 1732
Mod: 4 size: 6788
Mod: 13 size: 2027
Mod: 6 size: 2128
Mod: 7 size: 2200
Mod: 8 size: 1790
Mod: 9 size: 368
Mod: 11 size: 1011
Mod: 0 size: 316
Mod: 14 size: 2832
Mod: 15 size: 2118
Mod: 16 size: 4731
Mod: 18 size: 1700
Mod: 19 size: 1403
Mod: 1 size: 263
{
    "5": {
        "Community size": 1732,
        "Total retweets": 6993,
        "Mean pagerank * 10e5": 3.3609910753994643,
        "Mean Jaccard coefficient between accounts in community and target * 10e4": 2.464483096763413,
        "Unique accounts in community that retweeted target": 25,
        "Total retweets of target": 30,
        "Unique accounts in community that retweeted high-profile": 17,
        "Total retweets of high-profile": 179,
        "Mean retweets per account": 12.4875,
        "Max retweet count": 3614,
        "Number of control accounts": 3,
        "Accounts in this community that retweeted control accounts": 16,
        "Total control account retweets published by this community": 41,

In [None]:
def highlight_max(s):
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]
def highlight_min(s):
    is_min = s == s.min()
    return ['background-color: orange' if v else '' for v in is_min]
fe = pd.DataFrame(community_features)
fe.style.apply(highlight_max, axis=1).apply(highlight_min, axis=1).format("{:.2f}")

Unnamed: 0,5,4,13,6,7,8,9,11,0,14,15,16,18,19,1
Community size,1732.0,6788.0,2027.0,2128.0,2200.0,1790.0,368.0,1011.0,316.0,2832.0,2118.0,4731.0,1700.0,1403.0,263.0
Total retweets,6993.0,30847.0,8341.0,4002.0,2937.0,2903.0,916.0,2753.0,422.0,8948.0,7996.0,7462.0,5060.0,4509.0,300.0
Mean pagerank * 10e5,3.36,3.87,3.04,2.66,2.71,2.46,2.45,2.53,2.42,3.28,3.07,2.94,3.15,2.96,2.73
Mean Jaccard coefficient between accounts in community and target * 10e4,2.46,2.6,10.34,0.48,0.23,3.02,1.12,1.5,0.38,0.65,8.1,0.08,0.49,1.35,0.1
Unique accounts in community that retweeted target,25.0,45.0,810.0,5.0,12.0,8.0,2.0,16.0,0.0,17.0,87.0,6.0,3.0,26.0,1.0
Total retweets of target,30.0,45.0,1866.0,6.0,12.0,15.0,2.0,16.0,0.0,18.0,133.0,6.0,4.0,29.0,1.0
Unique accounts in community that retweeted high-profile,17.0,451.0,29.0,24.0,18.0,143.0,4.0,5.0,2.0,48.0,20.0,3.0,5.0,13.0,0.0
Total retweets of high-profile,179.0,1918.0,195.0,93.0,21.0,144.0,26.0,67.0,8.0,146.0,175.0,3.0,8.0,25.0,0.0
Mean retweets per account,12.49,10.43,4.95,1.91,1.35,1.66,3.74,4.07,1.51,3.19,4.78,1.58,2.99,3.35,1.14
Max retweet count,3614.0,3762.0,2230.0,316.0,99.0,14.0,453.0,1337.0,83.0,474.0,1559.0,47.0,200.0,839.0,4.0


## Build model

In [None]:
# Given a ratings dataframe containing columns "Source", "Target", "Weight"
# train a collaborative filtering model and return the target and source weight embeds
def make_model_collab(ratings, epochs):
    min_rating = min(ratings["Weight"])
    max_rating = max(ratings["Weight"])
    print("Min rating: " + str(min_rating) + " Max rating: " + str(max_rating))
    dls = CollabDataLoaders.from_df(ratings, item_name='Target', bs=64)
    learn = collab_learner(dls, n_factors=50, y_range=(min_rating, max_rating))
    learn.fit_one_cycle(epochs)
    # Model weights
    target_w = learn.model.weight(dls.classes['Target'], is_item=True)
    source_w = learn.model.weight(dls.classes['Source'], is_item=False)
    return target_w, source_w

def make_model_nn(ratings, epochs):
    min_rating = min(ratings["Weight"])
    max_rating = max(ratings["Weight"])
    print("Min rating: " + str(min_rating) + " Max rating: " + str(max_rating))
    dls = CollabDataLoaders.from_df(ratings, item_name='Target', bs=64)
    learn = collab_learner(dls, use_nn=True, 
                           emb_szs={'userId': 50, 'movieId':50}, 
                           layers=[256, 128], y_range=(min_rating, max_rating))

    learn.fit_one_cycle(epochs)
    target_w = to_np(learn.model.embeds[1].weight[1:])
    source_w = to_np(learn.model.embeds[0].weight[1:])
    return target_w, source_w

def make_model(ratings, model_type, epochs):
    print("Model type: " + model_type)
    if model_type == "nn":
        return make_model_nn(ratings, epochs)
    else:
        return make_model_collab(ratings, epochs)

epochs = 5
model_type = "default"

In [None]:
# Train collab model on base dataset
target_w, source_w = make_model(ratings, model_type, epochs)

print(target_w.shape)
print(source_w.shape)

# Calculate cosine similarity matrix between all targets in the set
t_matrix = cosine_similarity(target_w)
                            
print(t_matrix.shape)
print()

# Calculate cosine similarity matrix between all sources in the set
s_matrix = cosine_similarity(source_w)

print(s_matrix.shape)
print()

Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8812.553711,8495.390625,00:15
1,4816.820312,4669.12207,00:17
2,2437.08252,3102.752197,00:17
3,1614.862305,2611.380371,00:15
4,1467.526245,2539.028076,00:13


torch.Size([7251, 50])
torch.Size([21071, 50])
(7251, 7251)

(21071, 21071)



## Recommendations by target similarity

In [None]:
t_max_matches = 100 # top_n matches when doing target similarity

# Show closest matches to selected targets
samples = [target_tid, high_profile_tid]
print_similar_to_targets(samples, t_matrix)
print_target_similarity(target_tid, high_profile_tid, t_matrix)

Target: user_004286 similar to:
user_004286 1.0000
user_029017 0.9463
user_029316 0.9452
user_029150 0.9427
user_028743 0.9376
user_029228 0.9373
user_029816 0.9370
user_028817 0.9361
user_013935 0.9360
user_029312 0.9356

Target: user_025303 similar to:
user_025303 1.0000
user_025403 0.9370
user_025581 0.9361
user_025511 0.9351
user_025376 0.9341
user_026830 0.9320
user_025505 0.9313
user_025148 0.9290
user_025582 0.9282
user_025413 0.9279

user_004286 similarity to user_025303: 0.8578


In [None]:
# graph tid similarity, to make a visualization
tid_inter = {}
threshold = 0.99
print(len(tid_name))
for tid, name in tid_name.items():
    matches = get_most_similar(tid, t_matrix, 50)
    if matches is not None:
        for item in matches:
            tid2, sim = item
            name2 = tid_name[tid2]
            if name != name2:
                if sim >= threshold:
                    if name not in tid_inter:
                        tid_inter[name] = Counter()
                    tid_inter[name][name2] = sim
print("Saving")
with open("tid_inter.csv", "w") as f:
    f.write("Source,Target,Weight\n")
    for source, targets in tid_inter.items():
        for target, weight in targets.items():
            f.write(str(source)+","+str(target)+","+str(weight)+"\n")
print("Done")

8405
Saving
Done


In [None]:
# Build recommendations for source based on who they've retweeted
# For each target retweeted by a source, see if we have an entry in most_similar
# If we do, add each item to the recommended counter
# Assign the value to be the source's rating multiplied by the similarity score
# We'll also record what the user has already retweeted so we can recommend a target they haven't yet retweeted

# for target, num_retweets in get_source_retweets(source):
#    for similar, similarity in get_most_similar(target):
#        recommended[similar] += num_retweets * similarity

def get_user_recommendations_by_target(ratings, sid, t_matrix):
    s_ratings = ratings.loc[ratings['Source'] == sid]
    s_ratings = s_ratings.sort_values(by="Weight", ascending=False)
    s_r = list(zip(s_ratings['Target'], s_ratings['Weight']))
    recommended = Counter()
    seen = set()
    for item in s_r:
        tid, trating = item
        if tid > len(t_matrix):
            continue
        seen.add(tid)
        matches = get_most_similar(tid, t_matrix, t_max_matches)
        if matches != None:
            for entry in matches:
                t, r = entry
                recommended[t] += r * trating

    # Now we'll build a recomendations list that contains the highest scored items
    # calculated above that the user hasn't already rated
    seen_recommendations = Counter()
    not_seen_recommendations = Counter()
    for tid, score in recommended.most_common():
        if len(seen_recommendations) >= 10 and len(not_seen_recommendations) >= 10:
            break
        if tid not in seen:
            not_seen_recommendations[tid] = score
        else:
            seen_recommendations[tid] = score
    return seen_recommendations, not_seen_recommendations

def print_recommendations_by_target(sid, seen_recommendations, not_seen_recommendations):
    s_ratings = ratings.loc[ratings['Source'] == sid]
    s_ratings = s_ratings.sort_values(by="Weight", ascending=False)
    s_r = list(zip(s_ratings['Target'], s_ratings['Weight']))
    num_ratings = len(s_r)

    ind_rating = {}
    for item in s_r:
        ind, trating = item
        ind_rating[ind] = trating

    # Now let's print the output and see if it's sane
    print("User: " + sid_name[sid] + " retweeted " + str(num_ratings) + " different accounts.")
    print()
    top10 = []
    for item in s_r[:10]:
        tid, trating = item
        top10.append(tid)
        msg = "Retweeted by user: " + str(trating) + " times, total retweets: " + str(target_retweeters[tid]) 
        msg += "\t  " + tid_name[tid]
        print(msg)
    print()
    print("Recommended (seen):")
    for x, c in seen_recommendations.most_common(10):
        msg = "%.4f"%c + "\t(retweeted by user: " + str(ind_rating[x]) + " times,"
        msg += " total retweets: " + str(target_retweeters[x]) + ")" + "\t" + tid_name[x]
        if x == target_tid:
            msg += " [X]"
        if x in top10:
            msg += " [*]"
        print(msg)
    print()
    print("Recommended (not seen):")
    for x, c in not_seen_recommendations.most_common(10):
        msg = "%.4f"%c + "\t" + " (total retweets: " 
        msg += str(target_retweeters[x]) + ")\t" + tid_name[x]                  
        if x == target_tid:
            msg += " [X]"
        if x in top10:
            msg += " [*]"
        print(msg)
    print("=====================================================")
    print()

def print_user_recommendations_by_target(ratings, sid, t_matrix):
    seen, not_seen = get_user_recommendations_by_target(ratings, sid, t_matrix)
    print_recommendations_by_target(sid, seen, not_seen)

In [None]:
# Display target-based recommendations for control set
for n in controls:
    print_user_recommendations_by_target(ratings, n, t_matrix)

User: user_000041 retweeted 171 different accounts.

Retweeted by user: 14 times, total retweets: 788	  user_025303
Retweeted by user: 4 times, total retweets: 2	  user_027283
Retweeted by user: 3 times, total retweets: 6	  user_004906
Retweeted by user: 3 times, total retweets: 101	  user_025218
Retweeted by user: 2 times, total retweets: 77	  user_025764
Retweeted by user: 2 times, total retweets: 43	  user_025266
Retweeted by user: 2 times, total retweets: 20	  user_024491
Retweeted by user: 2 times, total retweets: 82	  user_027313
Retweeted by user: 2 times, total retweets: 1	  user_027277
Retweeted by user: 2 times, total retweets: 126	  user_025171

Recommended (seen):
48.8365	(retweeted by user: 1 times, total retweets: 29)	user_025942
42.7630	(retweeted by user: 2 times, total retweets: 126)	user_025171 [*]
40.4249	(retweeted by user: 2 times, total retweets: 245)	user_025233
35.6384	(retweeted by user: 1 times, total retweets: 1)	user_009511
32.3609	(retweeted by user: 1 time

## Recommendations by source similarity

In [None]:
s_max_matches = 100 # top_n matches when doing source similarity

# Print 10 closest sources for control set
print_similar_to_sources(controls, s_matrix)

User: user_000041 similar to:
user_000041 1.0000
user_000009 0.9965
user_001337 0.9951
user_001294 0.9941
user_000081 0.9940
user_001196 0.9940
user_001382 0.9935
user_000004 0.9926
user_000023 0.9922
user_000113 0.9921

User: user_001399 similar to:
user_001399 1.0000
user_002612 0.9282
user_010184 0.9250
user_008121 0.9249
user_007165 0.9222
user_020202 0.9214
user_020235 0.9211
user_003773 0.9195
user_018445 0.9193
user_007467 0.9189

User: user_006266 similar to:
user_006266 1.0000
user_012485 0.8891
user_010670 0.8844
user_006267 0.8771
user_014171 0.8707
user_003740 0.8653
user_000946 0.8622
user_004077 0.8565
user_014186 0.8555
user_009665 0.8504

User: user_000014 similar to:
user_000014 1.0000
user_000304 0.9957
user_001213 0.9945
user_001382 0.9943
user_001104 0.9938
user_001214 0.9922
user_000009 0.9920
user_001617 0.9918
user_002581 0.9910
user_000204 0.9907

User: user_000128 similar to:
user_000128 1.0000
user_014245 0.9254
user_010120 0.9219
user_002604 0.9141
user_01251

In [None]:
# Recommendations based on source similarity
# From the previously calculated source similarities, calculate recommendations thus:
# For each similar source, obtain their list of target ratings
# Record a counter for each target where we add a value: similarity * rating
# Once we have a ranked list of recommendations, choose the top items
# based on whether the original user has rated the target or not

# for similar_source, similarity in get_most_similar(source):
#     for target, num_retweets in get_source_retweets(similar_source):
#         recommended[target] += similarity * num_retweets


def get_user_recommendations_by_source(ratings, sid, s_matrix):
    s_ratings = ratings.loc[ratings['Source'] == sid]
    s_ratings = s_ratings.sort_values(by="Weight", ascending=False)
    s_r = list(zip(s_ratings['Target'], s_ratings['Weight']))
    seen = set()
    tid_rating = {}
    for item in s_r:
        tid, trating = item
        tid_rating[tid] = trating
        seen.add(tid)

    recommended = Counter()
    matches = get_most_similar(sid, s_matrix, s_max_matches)
    if matches != None:
        for item in matches:
            sid, similarity = item
            ur = ratings.loc[ratings['Source'] == sid]
            ur = list(zip(ur['Target'], ur['Weight']))
            for entry in ur:
                tid, mr = entry
                recommended[tid] += similarity * mr

    # Now we'll build a recomendations list that contains the highest scored items
    # calculated above that the user hasn't already rated
    seen_recommendations = Counter()
    not_seen_recommendations = Counter()
    for tid, score in recommended.most_common():
        if len(seen_recommendations) >= 10 and len(not_seen_recommendations) >= 10:
            break
        if tid not in seen:
            not_seen_recommendations[tid] = score
        else:
            seen_recommendations[tid] = score
    return seen_recommendations, not_seen_recommendations

def print_recommendations_by_source(sid, seen_recommendations, not_seen_recommendations):
    s_ratings = ratings.loc[ratings['Source'] == sid]
    s_ratings = s_ratings.sort_values(by="Weight", ascending=False)
    s_r = list(zip(s_ratings['Target'], s_ratings['Weight']))
    num_ratings = len(s_r)

    tid_rating = {}
    for item in s_r:
        tid, mrating = item
        tid_rating[tid] = mrating

    # Now let's print the output and see if it's sane
    print("User: " + sid_name[sid] + " retweeted " + str(num_ratings) + " different accounts.")
    print()
    top10 = []
    for item in s_r[:10]:
        tid, trating = item
        top10.append(tid)
        msg = "Retweeted by user: " + str(trating) + " times, total retweets: " + str(target_retweeters[tid]) 
        msg += "\t  " + tid_name[tid]
        print(msg)
    print()
    print("Recommended (seen):")
    for x, c in seen_recommendations.most_common(10):
        msg = "%.4f"%c
        msg += " (retweeted by user: " + str(tid_rating[x]) + " times,"
        msg += " total retweets: " + str(target_retweeters[x]) + ")" + "\t" 
        msg += tid_name[x]
        if x == target_tid:
            msg += " [X]"
        if x in top10:
            msg += " [*]"
        print(msg)
    print()
    print("Recommended (not seen):")
    for x, c in not_seen_recommendations.most_common(10):
        msg = "%.4f"%c 
        msg += " (total retweets: " + str(target_retweeters[x]) + ")\t"
        msg += tid_name[x]
        if x == target_tid:
            msg += " [X]"
        if x in top10:
            msg += " [*]"
        print(msg)
    print("=====================================================")
    print()

def print_user_recommendations_by_source(ratings, sid, s_matrix):
    seen, not_seen = get_user_recommendations_by_source(ratings, sid, s_matrix)
    print_recommendations_by_source(sid, seen, not_seen)

In [None]:
# Print source-based recommendations for control set
for n in controls:
    print_user_recommendations_by_source(ratings, n, s_matrix)

User: user_000041 retweeted 171 different accounts.

Retweeted by user: 14 times, total retweets: 788	  user_025303
Retweeted by user: 4 times, total retweets: 2	  user_027283
Retweeted by user: 3 times, total retweets: 6	  user_004906
Retweeted by user: 3 times, total retweets: 101	  user_025218
Retweeted by user: 2 times, total retweets: 77	  user_025764
Retweeted by user: 2 times, total retweets: 43	  user_025266
Retweeted by user: 2 times, total retweets: 20	  user_024491
Retweeted by user: 2 times, total retweets: 82	  user_027313
Retweeted by user: 2 times, total retweets: 1	  user_027277
Retweeted by user: 2 times, total retweets: 126	  user_025171

Recommended (seen):
392.8223 (retweeted by user: 14 times, total retweets: 788)	user_025303 [*]
182.9197 (retweeted by user: 2 times, total retweets: 245)	user_025233
128.1706 (retweeted by user: 3 times, total retweets: 101)	user_025218 [*]
103.9492 (retweeted by user: 1 times, total retweets: 3480)	user_000009
83.8019 (retweeted by

## Validation: count how many times target appeared in top_n recommendations for each source

In [None]:
def validate_target_target(ratings, sid, t_matrix, target, top_n):
    ret = False
    seen, not_seen = get_user_recommendations_by_target(ratings, sid, t_matrix)
    top_tids = [x for x, c in not_seen.most_common(top_n)]
    if target in top_tids:
        ret = True
    return ret

def validate_target_source(ratings, sid, s_matrix, target, top_n):
    ret = False
    seen, not_seen = get_user_recommendations_by_source(ratings, sid, s_matrix)
    top_tids = [x for x, c in not_seen.most_common(top_n)]
    if target in top_tids:
        ret = True
    return ret

In [None]:
ret = 0
top_n = 3
for sid in controls:
    found = validate_target_target(ratings, sid, t_matrix, target_tid, top_n)
    if found == True:
        ret += 1
print("Target was in top "+str(top_n)+" target-based recommendations for "+str(ret)+" users in control list.")

Target was in top 3 target-based recommendations for 0 users in control list.


In [None]:
res = 0
top_n = 3
for sid in controls:
    found = validate_target_source(ratings, sid, s_matrix, target_tid, top_n)
    if found == True:
        res += 1
print("Target was in top "+str(top_n)+" source-based recommendations for "+str(res)+" users in control list.")

Target was in top 3 source-based recommendations for 1 users in control list.


In [None]:
# STOP

## Poisoning experiment 1 - randomly chosen amplifiers, variable amps, retweets
- with differing numbers of amplifiers and retweets:
    - repeat "iterations" times
        - create new poisoned dataframe based on supplied parameters
        - save csv for gephi visualization
        - train model
        - run source-based and target-based recommendations, see how often target appears in top_n recommendations
        - record all results to be graphed later

In [None]:
# Run poisoning experiment 1
# Note that this cell can take a number of hours to run

# These are the experiments to run
# Each pair of numbers denotes [num_amplifiers, num_retweets]
# A poisoned copy of the dataset is generated as follows:
# 1. Make a copy of the original dataset
# 2. Randomly select num_amplifiers accounts from amplifier candidates
# 3. For each selected amplifier, add two rows to the copied dataset:
#    amplifier - target - num_retweets
#    amplifier - high_profile_user - num_retweets
# 4. Perform the rest of the experiment (train model, analyze recomendations)
experiments = [[0,0],
               [10,10], [10,20], [10,50], [10,100],
               [20,10], [20,20], [20,50], [20,100],
               [50,10], [50,20], [50,50], [50,100],
               [100,1], [100,5], [100,10], [100,20], [100,50],
               [200,1], [200,5], [200,10], [200,20], [200,50],
               [500,1], [500,5], [500,10], [500,20], [500,50],
               [1000,1], [1000,5], [1000,10], [1000,20], [1000,50],
               [2000,1], [2000,5], [2000,10], [2000,20], [2000,50],
               [4000,1], [4000,5], [4000,10], [4000,20], [4000,50]]

samples = [target_tid, high_profile_tid]
top_n = 3
iterations = 10
epochs = 5

# 1. Pick random accounts (not in control set) to do the boosting 
# that havent engaged with either high profile or target
amplifier_candidates = []
for sid, tids in source_retweeted.items():
    if len(tids) > 0:
        inter = set(tids).intersection(set(controls))
        if len(inter) == 0:
            if high_profile_tid not in tids and target_tid not in tids:
                amplifier_candidates.append(sid)
print("Number of random amplifier candidates: " + str(len(amplifier_candidates)))

# Loop through the experiment parameters
# For each set of parameters, perform the experiment iterations number of times
result_source = []
result_target = []
i = 1
save_dir = "US2020/exp1"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
for item in experiments:
    amps, r = item
    for n in range(iterations):
        print()
        print("Experiment:" + str(i) + " amps:" + str(amps) + " r:" + str(r) + " take:" + str(n))
        i = i + 1
        save_path = save_dir + "/" + str(amps) + "_" + str(r) + "_" + str(n) + ".csv"
        new_ratings = get_poisoned_dataset(ratings, amplifier_candidates, amps, r, save_path)
        msg = "Base dataset length: " + str(len(ratings))
        msg += " Poisoned dataset length: " + str(len(new_ratings))
        print(msg)
        new_target_w, new_source_w = make_model(new_ratings, model_type, epochs)
        new_t_matrix = cosine_similarity(new_target_w)
        print_target_similarity(target_tid, high_profile_tid, new_t_matrix)
        new_s_matrix = cosine_similarity(new_source_w)
        ret = 0
        for sid in controls:
            found = validate_target_target(new_ratings, sid, new_t_matrix, target_tid, top_n)
            if found == True:
                ret += 1
        msg = "Target was in top " + str(top_n) 
        msg += " target recommendations for " + str(ret) 
        msg += " users in control list."
        print(msg)
        result_target.append([amps, r, n, ret])
        with open(save_dir + "/result_target.json", "w") as f:
            f.write(json.dumps(result_target, indent=4))
        res = 0
        for sid in controls:
            found = validate_target_source(new_ratings, sid, new_s_matrix, target_tid, top_n)
            if found == True:
                res += 1
        msg = "Target was in top " + str(top_n) 
        msg += " source recommendations for " + str(res) 
        msg += " users in control list."
        print(msg)
        result_source.append([amps, r, n, res])
        with open(save_dir + "/result_source.json", "w") as f:
            f.write(json.dumps(result_source, indent=4))

Number of random amplifier candidates: 23256

Experiment:1 amps:0 r:0 take:0
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8794.023438,8474.701172,00:15
1,4793.515137,4715.09668,00:17
2,2482.21167,3186.33667,00:18
3,1646.312012,2704.375244,00:16
4,1493.106689,2633.339844,00:13


user_004286 similarity to user_025303: 0.8395
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:2 amps:0 r:0 take:1
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8797.510742,8507.350586,00:16
1,4780.299316,4720.615723,00:17
2,2479.473877,3168.660645,00:18
3,1572.855591,2682.748535,00:16
4,1452.523438,2611.063965,00:14


user_004286 similarity to user_025303: 0.8706
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:3 amps:0 r:0 take:2
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8806.810547,8533.323242,00:16
1,4774.619141,4752.689941,00:17
2,2405.743896,3209.59375,00:17
3,1637.464355,2723.090088,00:15
4,1468.29126,2651.237061,00:13


user_004286 similarity to user_025303: 0.8998
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:4 amps:0 r:0 take:3
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8800.289062,8450.008789,00:15
1,4727.339844,4671.356934,00:17
2,2389.022705,3153.755371,00:17
3,1585.594849,2680.343262,00:15
4,1407.343872,2610.307129,00:12


user_004286 similarity to user_025303: 0.8317
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:5 amps:0 r:0 take:4
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8786.551758,8509.749023,00:15
1,4764.883301,4729.192871,00:16
2,2436.162598,3179.987793,00:17
3,1574.234619,2694.327393,00:15
4,1385.106567,2622.516602,00:12


user_004286 similarity to user_025303: 0.9125
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:6 amps:0 r:0 take:5
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8789.201172,8528.486328,00:14
1,4756.785156,4793.092285,00:16
2,2397.587158,3268.953613,00:16
3,1599.799316,2787.300537,00:15
4,1438.879395,2715.955322,00:12


user_004286 similarity to user_025303: 0.7124
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:7 amps:0 r:0 take:6
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8811.308594,8512.632812,00:14
1,4774.03418,4721.980957,00:16
2,2436.042236,3182.324951,00:16
3,1637.547607,2698.079102,00:14
4,1403.356812,2626.590332,00:12


user_004286 similarity to user_025303: 0.8037
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:8 amps:0 r:0 take:7
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8819.961914,8561.300781,00:14
1,4836.584473,4781.94873,00:16
2,2443.851074,3236.755371,00:16
3,1667.016968,2749.552246,00:15
4,1432.996582,2677.547119,00:12


user_004286 similarity to user_025303: 0.8629
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:9 amps:0 r:0 take:8
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8786.44043,8481.875977,00:15
1,4729.607422,4759.030273,00:16
2,2448.022949,3216.440674,00:17
3,1600.604736,2730.206299,00:15
4,1417.925659,2658.614746,00:13


user_004286 similarity to user_025303: 0.8646
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:10 amps:0 r:0 take:9
Base dataset length: 52920 Poisoned dataset length: 52920
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8796.760742,8487.323242,00:15
1,4758.299316,4780.20752,00:16
2,2416.272949,3257.828369,00:17
3,1640.935059,2774.660156,00:15
4,1443.584595,2702.44751,00:12


user_004286 similarity to user_025303: 0.8780
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:11 amps:10 r:10 take:0
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8808.707031,8474.046875,00:15
1,4743.541016,4652.546875,00:16
2,2444.273438,3093.519043,00:17
3,1600.788086,2608.022949,00:15
4,1443.431396,2536.634766,00:13


user_004286 similarity to user_025303: 0.8046
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:12 amps:10 r:10 take:1
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8793.120117,8475.194336,00:15
1,4755.482422,4731.113281,00:17
2,2390.22168,3206.805176,00:17
3,1573.802734,2729.777588,00:15
4,1443.324097,2659.507812,00:13


user_004286 similarity to user_025303: 0.8938
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:13 amps:10 r:10 take:2
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8790.007812,8508.808594,00:15
1,4787.677246,4769.755371,00:16
2,2469.616455,3225.629395,00:17
3,1647.756714,2738.922852,00:15
4,1432.081177,2666.900146,00:13


user_004286 similarity to user_025303: 0.7019
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:14 amps:10 r:10 take:3
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8788.432617,8528.240234,00:15
1,4788.944336,4775.685059,00:16
2,2455.377197,3225.890137,00:17
3,1631.072388,2738.332764,00:15
4,1464.075684,2666.340332,00:13


user_004286 similarity to user_025303: 0.7601
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:15 amps:10 r:10 take:4
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8803.124023,8559.93457,00:14
1,4756.876953,4894.143066,00:16
2,2459.088867,3370.19165,00:16
3,1665.274902,2883.771973,00:14
4,1451.872925,2811.467041,00:12


user_004286 similarity to user_025303: 0.8463
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:16 amps:10 r:10 take:5
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8791.091797,8486.429688,00:15
1,4767.844238,4734.036621,00:17
2,2435.709717,3181.603027,00:17
3,1611.807617,2694.039307,00:16
4,1493.256836,2622.057373,00:13


user_004286 similarity to user_025303: 0.7504
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:17 amps:10 r:10 take:6
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8795.868164,8540.536133,00:16
1,4785.84375,4774.623047,00:17
2,2448.574707,3218.909424,00:18
3,1628.49585,2729.824219,00:16
4,1437.807739,2657.430908,00:14


user_004286 similarity to user_025303: 0.8930
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:18 amps:10 r:10 take:7
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8819.953125,8552.588867,00:16
1,4812.76709,4801.780273,00:17
2,2482.839355,3250.499023,00:17
3,1664.030518,2763.837402,00:16
4,1484.305542,2691.714844,00:14


user_004286 similarity to user_025303: 0.8708
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:19 amps:10 r:10 take:8
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8798.942383,8457.160156,00:16
1,4780.125977,4725.51416,00:17
2,2540.175049,3201.95459,00:18
3,1601.154663,2722.319336,00:16
4,1456.911133,2651.349365,00:14


user_004286 similarity to user_025303: 0.9242
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:20 amps:10 r:10 take:9
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8798.813477,8546.665039,00:16
1,4793.113281,4765.26416,00:18
2,2507.332764,3220.179443,00:18
3,1690.422119,2732.29834,00:17
4,1475.110596,2660.557861,00:14


user_004286 similarity to user_025303: 0.8936
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:21 amps:10 r:20 take:0
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8780.974609,8553.141602,00:16
1,4753.237305,4737.008789,00:18
2,2385.070801,3179.966309,00:18
3,1603.580444,2693.985352,00:16
4,1409.143555,2622.313965,00:14


user_004286 similarity to user_025303: 0.9180
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:22 amps:10 r:20 take:1
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8798.19043,8554.675781,00:15
1,4802.921875,4787.532227,00:17
2,2399.631836,3243.80835,00:17
3,1629.916016,2758.656494,00:15
4,1450.937256,2686.831543,00:13


user_004286 similarity to user_025303: 0.8027
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:23 amps:10 r:20 take:2
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8815.477539,8526.21875,00:16
1,4797.008789,4800.039062,00:17
2,2455.447021,3254.63501,00:17
3,1621.991211,2768.188721,00:16
4,1439.141235,2695.909424,00:13


user_004286 similarity to user_025303: 0.7883
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:24 amps:10 r:20 take:3
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8811.725586,8574.025391,00:16
1,4734.93457,4822.448242,00:17
2,2430.025879,3278.858398,00:18
3,1576.518677,2791.177246,00:16
4,1409.987305,2718.580322,00:14


user_004286 similarity to user_025303: 0.7125
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:25 amps:10 r:20 take:4
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8793.803711,8546.857422,00:16
1,4739.700195,4724.033691,00:17
2,2399.787598,3163.98291,00:17
3,1614.043335,2679.874756,00:16
4,1446.440552,2608.497803,00:13


user_004286 similarity to user_025303: 0.7606
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:26 amps:10 r:20 take:5
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8799.549805,8559.621094,00:17
1,4738.349121,4772.459473,00:18
2,2443.533203,3227.371338,00:19
3,1628.529907,2739.644775,00:16
4,1426.366699,2667.732178,00:13


user_004286 similarity to user_025303: 0.9176
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:27 amps:10 r:20 take:6
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8783.335938,8501.136719,00:15
1,4756.878418,4806.251465,00:16
2,2413.177002,3292.182373,00:17
3,1584.674683,2812.642334,00:15
4,1428.46814,2741.117188,00:13


user_004286 similarity to user_025303: 0.8330
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:28 amps:10 r:20 take:7
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8808.446289,8525.053711,00:15
1,4760.345703,4791.961426,00:16
2,2463.470215,3254.810791,00:17
3,1614.002319,2767.026367,00:15
4,1459.014404,2695.094238,00:13


user_004286 similarity to user_025303: 0.6595
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:29 amps:10 r:20 take:8
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8822.923828,8518.181641,00:15
1,4771.578613,4785.081055,00:16
2,2476.592285,3257.640625,00:16
3,1654.928711,2777.108887,00:15
4,1525.684448,2705.259033,00:12


user_004286 similarity to user_025303: 0.7899
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:30 amps:10 r:20 take:9
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8807.162109,8541.983398,00:15
1,4776.37793,4781.553223,00:16
2,2449.573242,3249.408447,00:17
3,1601.465332,2760.238037,00:15
4,1453.101318,2687.806641,00:13


user_004286 similarity to user_025303: 0.9310
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:31 amps:10 r:50 take:0
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8805.078125,8519.025391,00:15
1,4767.146484,4757.890625,00:17
2,2399.9021,3232.088379,00:17
3,1626.989868,2753.715088,00:15
4,1419.236206,2682.855957,00:13


user_004286 similarity to user_025303: 0.8997
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:32 amps:10 r:50 take:1
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8800.249023,8538.121094,00:15
1,4742.441895,4809.541504,00:17
2,2436.886719,3281.43335,00:17
3,1586.362061,2794.071289,00:16
4,1443.037842,2721.597168,00:13


user_004286 similarity to user_025303: 0.8371
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:33 amps:10 r:50 take:2
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8782.996094,8516.333984,00:15
1,4769.981934,4768.62207,00:16
2,2445.594971,3227.575684,00:16
3,1606.058838,2745.284912,00:15
4,1464.631348,2674.0354,00:12


user_004286 similarity to user_025303: 0.9422
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:34 amps:10 r:50 take:3
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8812.543945,8525.854492,00:15
1,4775.780273,4791.561035,00:16
2,2452.539307,3239.622559,00:17
3,1653.356079,2750.595459,00:15
4,1457.16394,2678.005859,00:13


user_004286 similarity to user_025303: -0.2303
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:35 amps:10 r:50 take:4
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8810.545898,8559.728516,00:15
1,4737.398438,4775.363281,00:17
2,2417.352783,3230.903564,00:17
3,1627.99231,2741.044922,00:15
4,1427.243164,2668.054688,00:13


user_004286 similarity to user_025303: 0.9158
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:36 amps:10 r:50 take:5
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8807.530273,8521.240234,00:15
1,4781.78418,4813.154785,00:16
2,2497.021484,3271.107666,00:17
3,1646.561035,2785.413086,00:15
4,1405.392944,2713.651367,00:13


user_004286 similarity to user_025303: 0.6283
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:37 amps:10 r:50 take:6
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8781.466797,8450.182617,00:15
1,4740.285645,4704.192383,00:17
2,2459.076904,3154.532959,00:17
3,1615.811035,2672.484619,00:15


epoch,train_loss,valid_loss,time
0,8781.466797,8450.182617,00:15
1,4740.285645,4704.192383,00:17
2,2459.076904,3154.532959,00:17
3,1615.811035,2672.484619,00:15
4,1435.589355,2601.530029,00:13


user_004286 similarity to user_025303: 0.9581
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:38 amps:10 r:50 take:7
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8798.066406,8528.246094,00:15
1,4738.808594,4770.564453,00:17
2,2437.368652,3228.359619,00:17
3,1631.314575,2743.417236,00:15
4,1436.24585,2671.550537,00:13


user_004286 similarity to user_025303: 0.7098
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:39 amps:10 r:50 take:8
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8804.804688,8584.53125,00:15
1,4803.521973,4793.26123,00:17
2,2499.200928,3231.708008,00:17
3,1647.535034,2741.141602,00:15
4,1523.411255,2668.64917,00:13


user_004286 similarity to user_025303: 0.8476
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:40 amps:10 r:50 take:9
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8802.824219,8584.016602,00:16
1,4701.069824,4794.078125,00:17
2,2425.793945,3232.389893,00:17
3,1624.361938,2740.306396,00:16
4,1419.491943,2667.679688,00:13


user_004286 similarity to user_025303: 0.8704
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:41 amps:10 r:100 take:0
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8809.606445,8508.792969,00:15
1,4819.467285,4773.939941,00:17
2,2499.852539,3231.049561,00:17
3,1639.16687,2746.132568,00:15
4,1444.360352,2674.031982,00:13


user_004286 similarity to user_025303: 0.8335
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:42 amps:10 r:100 take:1
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8803.908203,8580.807617,00:16
1,4747.412598,4780.334473,00:17
2,2433.149902,3215.108887,00:17
3,1592.031128,2721.724854,00:16
4,1452.627319,2648.306396,00:13


user_004286 similarity to user_025303: 0.8730
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:43 amps:10 r:100 take:2
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8782.020508,8547.837891,00:15
1,4745.358887,4755.474609,00:17
2,2439.842773,3196.858643,00:17
3,1631.240967,2706.344238,00:16
4,1432.574829,2633.629883,00:13


user_004286 similarity to user_025303: 0.8505
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:44 amps:10 r:100 take:3
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8815.626953,8544.487305,00:15
1,4807.850586,4760.908691,00:17
2,2481.494141,3198.898438,00:17
3,1639.950073,2708.077148,00:15
4,1470.036377,2635.293945,00:13


user_004286 similarity to user_025303: 0.8346
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:45 amps:10 r:100 take:4
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8819.237305,8597.933594,00:15
1,4746.650879,4844.384277,00:17
2,2463.968994,3317.925293,00:17
3,1633.692505,2831.377197,00:15
4,1484.343506,2759.227295,00:13


user_004286 similarity to user_025303: 0.8316
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:46 amps:10 r:100 take:5
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8803.789062,8492.953125,00:15
1,4797.182617,4794.79834,00:16
2,2445.313232,3286.307129,00:17
3,1711.53772,2808.140137,00:15
4,1462.856201,2737.312256,00:13


user_004286 similarity to user_025303: 0.7647
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:47 amps:10 r:100 take:6
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8780.500977,8607.870117,00:15
1,4763.606445,4867.049805,00:16
2,2398.178955,3314.60376,00:16
3,1600.865479,2827.730957,00:15
4,1405.616211,2755.590576,00:12


user_004286 similarity to user_025303: 0.8341
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:48 amps:10 r:100 take:7
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8810.322266,8524.928711,00:15
1,4780.695312,4760.790039,00:17
2,2424.529541,3208.682373,00:17
3,1686.049927,2721.263672,00:15
4,1396.551392,2648.84375,00:13


user_004286 similarity to user_025303: 0.7771
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:49 amps:10 r:100 take:8
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8818.679688,8585.488281,00:16
1,4780.084473,4766.351562,00:17
2,2417.57959,3190.791748,00:17
3,1647.071777,2699.55542,00:16
4,1424.235962,2627.521729,00:13


user_004286 similarity to user_025303: 0.8308
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:50 amps:10 r:100 take:9
Base dataset length: 52920 Poisoned dataset length: 52940
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8810.621094,8470.623047,00:15
1,4850.898926,4752.925293,00:17
2,2497.548584,3213.686768,00:20
3,1656.138184,2731.858643,00:16
4,1456.338989,2660.455322,00:13


user_004286 similarity to user_025303: 0.7752
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:51 amps:20 r:10 take:0
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8796.941406,8476.817383,00:15
1,4767.751953,4733.550781,00:17
2,2440.637695,3182.780273,00:18
3,1585.321411,2698.390381,00:16
4,1402.745605,2627.069336,00:13


user_004286 similarity to user_025303: 0.8717
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:52 amps:20 r:10 take:1
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8818.178711,8490.055664,00:16
1,4796.015137,4700.481445,00:17
2,2463.258057,3159.353027,00:18
3,1624.369751,2678.322021,00:16
4,1443.363525,2606.605713,00:13


user_004286 similarity to user_025303: 0.8761
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:53 amps:20 r:10 take:2
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8801.263672,8575.777344,00:16
1,4775.178711,4787.228027,00:17
2,2445.840576,3226.913574,00:17
3,1654.611938,2736.857178,00:16
4,1457.885742,2664.4104,00:14


user_004286 similarity to user_025303: 0.7712
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:54 amps:20 r:10 take:3
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8802.766602,8566.680664,00:16
1,4734.439941,4728.602539,00:18
2,2330.820557,3171.555908,00:19
3,1595.970459,2683.416016,00:17
4,1398.089233,2611.286621,00:14


user_004286 similarity to user_025303: 0.7513
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:55 amps:20 r:10 take:4
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8800.055664,8528.185547,00:16
1,4754.272461,4788.391602,00:17
2,2433.666748,3280.466064,00:17
3,1618.677002,2807.911377,00:16
4,1410.057007,2737.168457,00:13


user_004286 similarity to user_025303: 0.8151
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:56 amps:20 r:10 take:5
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8782.182617,8548.694336,00:15
1,4717.021973,4770.873535,00:17
2,2455.254395,3248.19751,00:17
3,1639.429932,2762.62085,00:15
4,1393.35791,2690.72168,00:13


user_004286 similarity to user_025303: 0.9295
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:57 amps:20 r:10 take:6
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8819.491211,8537.93457,00:15
1,4817.989746,4781.413574,00:17
2,2418.148682,3225.301758,00:17
3,1642.745239,2738.294678,00:15
4,1431.060669,2666.541504,00:13


user_004286 similarity to user_025303: 0.6646
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:58 amps:20 r:10 take:7
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8812.199219,8545.358398,00:16
1,4799.328125,4814.138184,00:17
2,2437.91748,3291.533936,00:17
3,1599.381104,2808.676758,00:16
4,1428.555786,2737.091309,00:14


user_004286 similarity to user_025303: 0.7834
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:59 amps:20 r:10 take:8
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8768.500977,8557.525391,00:16
1,4749.188965,4829.53418,00:18
2,2442.020264,3293.32959,00:18
3,1636.372681,2802.741211,00:16
4,1469.84082,2730.172607,00:14


user_004286 similarity to user_025303: 0.8712
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:60 amps:20 r:10 take:9
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8803.15918,8513.412109,00:16
1,4735.312012,4738.773926,00:18
2,2386.760498,3221.466797,00:18
3,1556.781372,2741.54248,00:16
4,1402.672974,2670.467285,00:14


user_004286 similarity to user_025303: 0.9113
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:61 amps:20 r:20 take:0
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8789.836914,8588.081055,00:15
1,4768.988281,4834.049316,00:17
2,2403.164795,3287.368164,00:17
3,1612.566528,2797.66626,00:16
4,1451.000488,2725.216553,00:13


user_004286 similarity to user_025303: 0.6985
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:62 amps:20 r:20 take:1
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8765.699219,8542.075195,00:16
1,4770.463379,4825.253906,00:18
2,2371.97583,3282.134033,00:18
3,1611.066162,2792.916504,00:16
4,1400.67627,2720.231201,00:14


user_004286 similarity to user_025303: 0.9081
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:63 amps:20 r:20 take:2
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8799.5,8533.304688,00:16
1,4808.687012,4786.65918,00:17
2,2455.176025,3249.141357,00:17
3,1612.452637,2764.802002,00:16
4,1491.38623,2693.381104,00:13


user_004286 similarity to user_025303: 0.8604
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 5 users in control list.

Experiment:64 amps:20 r:20 take:3
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8807.376953,8568.688477,00:15
1,4769.256348,4765.151367,00:17
2,2404.253906,3214.611328,00:17
3,1603.53418,2727.931885,00:15
4,1391.671021,2655.979248,00:13


user_004286 similarity to user_025303: 0.9522
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:65 amps:20 r:20 take:4
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8793.637695,8563.064453,00:15
1,4784.022949,4783.27832,00:17
2,2458.300293,3232.183105,00:17
3,1653.856812,2748.230469,00:15
4,1453.70752,2676.886475,00:13


user_004286 similarity to user_025303: 0.8562
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:66 amps:20 r:20 take:5
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8832.15918,8570.356445,00:16
1,4814.20459,4803.470703,00:17
2,2455.58252,3257.888916,00:18
3,1629.271973,2773.572021,00:16
4,1454.292847,2702.101318,00:13


user_004286 similarity to user_025303: 0.7727
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:67 amps:20 r:20 take:6
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8811.139648,8468.457031,00:15
1,4735.931152,4667.52002,00:17
2,2408.483643,3111.486572,00:17
3,1557.92334,2623.959717,00:15
4,1425.85498,2552.17749,00:13


user_004286 similarity to user_025303: 0.7933
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:68 amps:20 r:20 take:7
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8817.0625,8454.841797,00:15
1,4769.369629,4703.325684,00:17
2,2448.55542,3149.218262,00:17
3,1596.973999,2664.643311,00:16
4,1426.95874,2593.592773,00:13


user_004286 similarity to user_025303: 0.7407
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:69 amps:20 r:20 take:8
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8814.056641,8495.18457,00:15
1,4813.996582,4707.577637,00:17
2,2415.718506,3163.542969,00:17
3,1664.276489,2679.878174,00:15
4,1423.952148,2608.71875,00:13


user_004286 similarity to user_025303: 0.8797
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:70 amps:20 r:20 take:9
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8776.90625,8589.285156,00:15
1,4721.504395,4858.836914,00:17
2,2406.060303,3324.913574,00:17
3,1616.11792,2838.054688,00:18
4,1431.835327,2765.149902,00:13


user_004286 similarity to user_025303: 0.8455
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:71 amps:20 r:50 take:0
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8817.838867,8530.782227,00:16
1,4762.277832,4793.750977,00:17
2,2456.08667,3255.654053,00:18
3,1615.414673,2769.201172,00:16
4,1447.931519,2696.920898,00:14


user_004286 similarity to user_025303: 0.8314
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:72 amps:20 r:50 take:1
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8792.064453,8480.041992,00:16
1,4854.474609,4746.479004,00:16
2,2438.037842,3222.723145,00:17
3,1597.098389,2741.866211,00:15
4,1431.235474,2670.906006,00:13


user_004286 similarity to user_025303: 0.9159
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:73 amps:20 r:50 take:2
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8804.612305,8564.12207,00:16
1,4743.947754,4772.047363,00:17
2,2370.965576,3207.64624,00:17
3,1631.093628,2719.107178,00:16
4,1390.633179,2647.418701,00:13


user_004286 similarity to user_025303: 0.7987
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:74 amps:20 r:50 take:3
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8818.242188,8525.724609,00:15
1,4752.06543,4730.681152,00:17
2,2468.895752,3187.938477,00:17
3,1611.058594,2703.572998,00:16
4,1419.593018,2632.118652,00:13


user_004286 similarity to user_025303: 0.8255
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:75 amps:20 r:50 take:4
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8771.163086,8449.40918,00:15
1,4770.516602,4713.837402,00:17
2,2417.653076,3190.339111,00:17
3,1590.527466,2708.59375,00:16
4,1408.545776,2637.359131,00:13


user_004286 similarity to user_025303: 0.7775
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:76 amps:20 r:50 take:5
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8776.205078,8489.884766,00:15
1,4769.638184,4747.845703,00:17
2,2442.998779,3207.694092,00:17
3,1663.929565,2723.163818,00:15
4,1467.758789,2652.064209,00:13


user_004286 similarity to user_025303: 0.7445
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:77 amps:20 r:50 take:6
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8817.296875,8588.855469,00:16
1,4773.444336,4812.292969,00:17
2,2434.566895,3285.060791,00:18
3,1609.421265,2802.230469,00:16
4,1430.923584,2730.453369,00:14


user_004286 similarity to user_025303: 0.8896
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:78 amps:20 r:50 take:7
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8835.523438,8510.869141,00:16
1,4754.123047,4747.089355,00:17
2,2467.109619,3200.331299,00:17
3,1657.06958,2715.687256,00:16
4,1437.686646,2643.937988,00:13


user_004286 similarity to user_025303: 0.9188
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:79 amps:20 r:50 take:8
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8784.681641,8488.766602,00:15
1,4789.953125,4750.29834,00:17
2,2396.849121,3230.754639,00:17
3,1686.950317,2755.006592,00:16
4,1407.834595,2684.217041,00:14


user_004286 similarity to user_025303: 0.8561
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:80 amps:20 r:50 take:9
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8768.567383,8492.605469,00:16
1,4760.191406,4772.082031,00:18
2,2444.963867,3235.256836,00:18
3,1604.144653,2750.921631,00:16
4,1462.073853,2679.42627,00:14


user_004286 similarity to user_025303: 0.8424
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:81 amps:20 r:100 take:0
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8797.722656,8551.947266,00:17
1,4844.049805,4822.040527,00:18
2,2438.666504,3263.712646,00:18
3,1645.779785,2776.66333,00:16
4,1521.031982,2704.96875,00:14


user_004286 similarity to user_025303: 0.7797
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:82 amps:20 r:100 take:1
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8779.660156,8556.373047,00:16
1,4737.574219,4811.222168,00:17
2,2448.272949,3245.470215,00:18
3,1598.134277,2752.47583,00:16
4,1416.733276,2679.787842,00:14


user_004286 similarity to user_025303: 0.8423
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:83 amps:20 r:100 take:2
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8770.844727,8498.882812,00:17
1,4741.743164,4732.462402,00:18
2,2440.879883,3193.788086,00:18
3,1648.236084,2714.036133,00:17
4,1445.400024,2643.101318,00:15


user_004286 similarity to user_025303: 0.8152
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:84 amps:20 r:100 take:3
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8806.787109,8440.50293,00:18
1,4768.68457,4637.521484,00:19
2,2443.979492,3071.224121,00:19
3,1654.10144,2580.973877,00:17
4,1440.185425,2509.109863,00:15


user_004286 similarity to user_025303: 0.8204
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:85 amps:20 r:100 take:4
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8813.374023,8549.014648,00:16
1,4808.339355,4727.875488,00:18
2,2360.178711,3147.863281,00:19
3,1637.247803,2659.272949,00:16
4,1418.872314,2587.373779,00:14


user_004286 similarity to user_025303: 0.7838
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 5 users in control list.

Experiment:86 amps:20 r:100 take:5
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8810.96875,8506.545898,00:17
1,4773.731445,4729.804199,00:18
2,2515.162109,3183.050537,00:19
3,1650.541382,2700.227783,00:17
4,1457.019165,2629.189209,00:14


user_004286 similarity to user_025303: 0.8258
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:87 amps:20 r:100 take:6
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8800.449219,8518.960938,00:17
1,4801.791504,4786.14502,00:18
2,2481.719727,3233.314697,00:19
3,1623.383057,2743.893311,00:17
4,1480.443848,2671.924561,00:14


user_004286 similarity to user_025303: 0.9342
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:88 amps:20 r:100 take:7
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8784.056641,8527.219727,00:16
1,4775.000977,4825.081543,00:18
2,2436.987305,3285.133789,00:18
3,1641.307373,2798.172119,00:16
4,1472.233643,2725.571289,00:14


user_004286 similarity to user_025303: 0.8795
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:89 amps:20 r:100 take:8
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8792.314453,8585.663086,00:17
1,4753.901855,4844.739746,00:18
2,2434.203613,3296.900391,00:19
3,1636.364624,2808.195068,00:17
4,1405.786011,2735.433594,00:15


user_004286 similarity to user_025303: 0.8234
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:90 amps:20 r:100 take:9
Base dataset length: 52920 Poisoned dataset length: 52960
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8780.132812,8498.87207,00:17
1,4784.094238,4772.062012,00:21
2,2438.818604,3240.653809,00:17
3,1616.680908,2758.257568,00:16
4,1413.091187,2687.223389,00:13


user_004286 similarity to user_025303: 0.9273
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:91 amps:50 r:10 take:0
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8765.604492,8504.057617,00:16
1,4727.154297,4766.581055,00:18
2,2480.45752,3228.299072,00:18
3,1659.895996,2741.864746,00:16
4,1415.038452,2669.543457,00:14


user_004286 similarity to user_025303: 0.7885
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:92 amps:50 r:10 take:1
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8785.327148,8558.313477,00:17
1,4764.829102,4780.614746,00:18
2,2380.956787,3225.456055,00:18
3,1608.225952,2735.838135,00:17
4,1467.604858,2663.076904,00:15


user_004286 similarity to user_025303: 0.9005
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:93 amps:50 r:10 take:2
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8811.936523,8523.726562,00:17
1,4755.263672,4760.012207,00:18
2,2442.532471,3209.618652,00:18
3,1635.468872,2722.041992,00:17
4,1470.456177,2650.11084,00:14


user_004286 similarity to user_025303: 0.9680
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:94 amps:50 r:10 take:3
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8802.686523,8499.555664,00:17
1,4715.041016,4733.461914,00:18
2,2381.900391,3218.817871,00:18
3,1576.911499,2742.854004,00:17
4,1382.56897,2672.187744,00:14


user_004286 similarity to user_025303: 0.8446
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:95 amps:50 r:10 take:4
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8801.771484,8554.087891,00:18
1,4755.930176,4774.971191,00:19
2,2449.018555,3247.993164,00:20
3,1615.410156,2764.643799,00:18
4,1465.660156,2692.869141,00:15


user_004286 similarity to user_025303: 0.8673
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:96 amps:50 r:10 take:5
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8807.879883,8544.84082,00:17
1,4795.15625,4802.790527,00:18
2,2424.879883,3244.324463,00:18
3,1616.490845,2752.670898,00:16
4,1420.921021,2680.512207,00:14


user_004286 similarity to user_025303: 0.7752
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:97 amps:50 r:10 take:6
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8813.50293,8577.080078,00:17
1,4819.471191,4846.915039,00:19
2,2389.773438,3307.526123,00:19
3,1557.909912,2822.383057,00:17
4,1516.200439,2750.134033,00:15


user_004286 similarity to user_025303: 0.6545
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 1 users in control list.

Experiment:98 amps:50 r:10 take:7
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8812.244141,8526.391602,00:17
1,4799.679688,4699.377441,00:19
2,2435.34668,3135.203613,00:19
3,1625.883301,2649.486328,00:17
4,1435.488281,2577.547363,00:15


user_004286 similarity to user_025303: 0.8431
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 0 users in control list.

Experiment:99 amps:50 r:10 take:8
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8807.922852,8527.878906,00:17
1,4753.312012,4774.973633,00:18
2,2416.752441,3221.624023,00:18
3,1609.497559,2739.90332,00:17
4,1438.040527,2668.641846,00:15


user_004286 similarity to user_025303: 0.7462
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:100 amps:50 r:10 take:9
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8795.006836,8475.402344,00:16
1,4719.629883,4681.944824,00:18
2,2398.73584,3138.373535,00:18
3,1581.874268,2652.388428,00:17
4,1475.947876,2581.140625,00:14


user_004286 similarity to user_025303: 0.7937
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:101 amps:50 r:20 take:0
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8776.266602,8549.444336,00:17
1,4770.085938,4738.714355,00:18
2,2397.193115,3158.993896,00:19
3,1628.639648,2669.901855,00:16
4,1464.909912,2597.587891,00:14


user_004286 similarity to user_025303: 0.7494
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:102 amps:50 r:20 take:1
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8781.439453,8482.155273,00:16
1,4775.862793,4751.328125,00:18
2,2425.732422,3204.630615,00:18
3,1647.562378,2718.484619,00:16
4,1396.609375,2646.925293,00:14


user_004286 similarity to user_025303: 0.7265
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:103 amps:50 r:20 take:2
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8815.504883,8568.433594,00:17
1,4808.56543,4827.652344,00:18
2,2484.465332,3280.026367,00:19
3,1733.110718,2790.358398,00:16
4,1453.687256,2717.269043,00:14


user_004286 similarity to user_025303: 0.8652
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 3 users in control list.

Experiment:104 amps:50 r:20 take:3
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8769.517578,8506.448242,00:16
1,4745.452637,4756.022461,00:17
2,2462.615479,3212.465576,00:18
3,1577.086182,2729.489502,00:16
4,1453.087158,2658.17627,00:14


user_004286 similarity to user_025303: 0.8854
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:105 amps:50 r:20 take:4
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8822.141602,8529.401367,00:17
1,4789.171875,4719.748047,00:18
2,2460.475098,3168.352783,00:18
3,1637.836304,2683.716797,00:17
4,1415.376343,2612.457031,00:14


user_004286 similarity to user_025303: 0.8924
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 2 users in control list.

Experiment:106 amps:50 r:20 take:5
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time
0,8815.672852,8547.629883,00:16
1,4758.927734,4723.821777,00:18
2,2409.365967,3181.205322,00:18
3,1607.258789,2701.637939,00:16
4,1419.175049,2630.638672,00:14


user_004286 similarity to user_025303: 0.7818
Target was in top 3 target recommendations for 0 users in control list.
Target was in top 3 source recommendations for 4 users in control list.

Experiment:107 amps:50 r:20 take:6
Base dataset length: 52920 Poisoned dataset length: 53020
Model type: default
Min rating: 1 Max rating: 205


epoch,train_loss,valid_loss,time


## Display results as a plot

In [None]:
filename = "exp1/result_source.json"
title = "US2020 Experiment 1 - source-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (num_accounts, num_retweets)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
filename = "exp1/result_target.json"
title = "US2020 Experiment 1 - target-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (num_accounts, num_retweets)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
STOP

## Poisoning experiment 2 - amplifiers chosen based on community
- with fixed number of amplifiers and retweets:
    - iterate through communities (discovered from louvain method)
    - if a community contains at least num_amplifiers, select a set of amplifiers randomly from the community
    - repeat "iterations" times
        - create new poisoned dataframe based on supplied parameters
        - train model
        - run source-based and target-based recommendations, see how often target appears in top_n recommendations
        - record all results to be graphed later

In [None]:
# Run poisoning experiment 2

num_amplifiers = 200
num_retweets = 20

samples = [target_tid, high_profile_tid]
top_n = 3
iterations = 10
epochs = 5

result_source = []
result_target = []
i = 1
save_dir = "exp2"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

target_name = tid_name[target_tid]
high_profile_name = tid_name[high_profile_tid]
print("Community labels: " + ", ".join([str(x) for x in communities.keys()]))
for mod, names in communities.items():
    if target_name in names:
        print("target: " + target_name + " in community: " + str(mod) + " size: " + str(len(names)))
    if high_profile_name in names:
        print("high_profile: " + high_profile_name + " in community: " + str(mod) + " size: " + str(len(names)))

for mod, names in sorted(communities.items()):
    print("Community: " + str(mod) + " contains " + str(len(names)) + " names.")
    amplifier_candidates = []
    for name in names:
        if name in name_sid:
            sid = name_sid[name]
            if sid in source_retweeted:
                rtw = source_retweeted[sid]
                if high_profile_tid not in rtw and target_tid not in rtw:
                    amplifier_candidates.append(sid)
    if len(amplifier_candidates) < num_amplifiers:
        print("Skipping community: "+str(mod)+" (only found "+str(len(amplifier_candidates))+" candidates).")
        continue
    for n in range(iterations):
        print()
        print("Experiment:" + str(i) + " community:" + str(mod) + " take:" + str(n))
        i = i + 1
        save_path = save_dir + "/" + str(mod) + "_" + str(n) + ".csv"
        new_ratings = get_poisoned_dataset(ratings, amplifier_candidates, 
                                           num_amplifiers, num_retweets, save_path)
        msg = "Base dataset length: " + str(len(ratings))
        msg += " Poisoned dataset length: " + str(len(new_ratings))
        print(msg)
        new_target_w, new_source_w = make_model(new_ratings, model_type, epochs)
        new_t_matrix = cosine_similarity(new_target_w)
        print_target_similarity(target_tid, high_profile_tid, new_t_matrix)
        new_s_matrix = cosine_similarity(new_source_w)
        ret = 0
        for sid in controls:
            found = validate_target_target(new_ratings, sid, new_t_matrix, target_tid, top_n)
            if found == True:
                ret += 1
        msg = "Target was in top " + str(top_n) 
        msg += " target recommendations for " + str(ret) 
        msg += " users in control list."
        print(msg)
        result_target.append([mod, n, ret])
        with open(save_dir + "/result_target.json", "w") as f:
            f.write(json.dumps(result_target, indent=4))
        res = 0
        for sid in controls:
            found = validate_target_source(new_ratings, sid, new_s_matrix, target_tid, top_n)
            if found == True:
                res += 1
        msg = "Target was in top " + str(top_n) 
        msg += " source recommendations for " + str(res) 
        msg += " users in control list."
        print(msg)
        result_source.append([mod, n, res])
        with open(save_dir + "/result_source.json", "w") as f:
            f.write(json.dumps(result_source, indent=4))

In [None]:
filename = "exp2/result_source.json"
title = "US2020 Experiment 2 - source-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (community label)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
filename = "exp2/result_target.json"
title = "US2020 Experiment 2 - target-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (community label)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
STOP

## Poisoning experiment 3 - amplifiers chosen based on similarity to control accounts
- with varying number of amplifiers and retweets:
    - select a set of amplifiers that are similar to control accounts
    - repeat "iterations" times
        - create new poisoned dataframe based on supplied parameters
        - train model
        - run source-based and target-based recommendations, see how often target appears in top_n recommendations
        - record all results to be graphed later

In [None]:
# Run poisoning experiment 3

experiments = [[0,0],
               [100,1], [100,5], [100,10], [100,20],
               [200,1], [200,5], [200,10], [200,20], 
               [500,1], [500,5], [500,10], [500,20], 
               [1000,1], [1000,5], [1000,10], [1000,20],
               [2000,1], [2000,5], [2000,10], [2000,20]]

samples = [target_tid, high_profile_tid]
top_n = 3
iterations = 10
epochs = 5

# 1. Pick accounts most similar to those in the control set
# that havent engaged with either high profile or target
# and aren't in the control group
amplifier_candidates = []
sims = set()
for sid in controls:
    sim = get_most_similar(sid, s_matrix, 250)
    for s, _ in sim:
        if s not in controls:
            if s in source_retweeted:
                rtw = source_retweeted[s]
                if high_profile_tid not in rtw and target_tid not in rtw:
                    sims.add(s)
amplifier_candidates = list(sims)
print("Number of amplifier candidates: " + str(len(amplifier_candidates)))
# Loop through the experiment parameters
# For each set of parameters, perform the experiment iterations number of times
result_source = []
result_target = []
i = 1
save_dir = "exp3"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
for item in experiments:
    amps, r = item
    for n in range(iterations):
        print()
        print("Experiment:" + str(i) + " amps:" + str(amps) + " r:" + str(r) + " take:" + str(n))
        i = i + 1
        save_path = save_dir + "/" + str(amps) + "_" + str(r) + "_" + str(n) + ".csv"
        new_ratings = get_poisoned_dataset(ratings, amplifier_candidates, amps, r, save_path)
        msg = "Base dataset length: " + str(len(ratings))
        msg += " Poisoned dataset length: " + str(len(new_ratings))
        print(msg)
        new_target_w, new_source_w = make_model(new_ratings, model_type, epochs)
        new_t_matrix = cosine_similarity(new_target_w)
        print_target_similarity(target_tid, high_profile_tid, new_t_matrix)
        new_s_matrix = cosine_similarity(new_source_w)
        ret = 0
        for sid in controls:
            found = validate_target_target(new_ratings, sid, new_t_matrix, target_tid, top_n)
            if found == True:
                ret += 1
        msg = "Target was in top " + str(top_n) 
        msg += " target recommendations for " + str(ret) 
        msg += " users in control list."
        print(msg)
        result_target.append([amps, r, n, ret])
        with open(save_dir + "/result_target.json", "w") as f:
            f.write(json.dumps(result_target, indent=4))
        res = 0
        for sid in controls:
            found = validate_target_source(new_ratings, sid, new_s_matrix, target_tid, top_n)
            if found == True:
                res += 1
        msg = "Target was in top " + str(top_n) 
        msg += " source recommendations for " + str(res) 
        msg += " users in control list."
        print(msg)
        result_source.append([amps, r, n, res])
        with open(save_dir + "/result_source.json", "w") as f:
            f.write(json.dumps(result_source, indent=4))

In [None]:
filename = "exp3/result_source.json"
title = "US2020 Experiment 3 - source-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (num_accounts, num_retweets)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
filename = "exp3/result_target.json"
title = "US2020 Experiment 3 - target-based recommendations"
results = []
with open(filename, "r") as f:
    results = json.loads(f.read())
results2 = []
order = []
for item in results:
    if len(item) == 4:
        a, r, t, v = item
        l = str(a) + "_" + str(r)
    else:
        m, t, v = item
        l = str(m)
    v = (v/20)*100
    if l not in order:
        order.append(l)
    results2.append([l, t, v])
df = pd.DataFrame(results2, columns=["params", "take", "val"])

plt.figure()
ax = None
fig = plt.figure(figsize=(20,8))
sns.set(style="whitegrid")
ax = sns.barplot(x="params", y="val", data=df, order=order, capsize=.2)
ax.set_title(title)
xlab = "Experiment parameters (num_accounts, num_retweets)"
ylab = "Percentage of control set that saw target account in top-3 recommendations"
plt.xlabel(xlab)
plt.ylabel(ylab)
for item in ax.get_xticklabels():
    item.set_rotation(45)

In [None]:
STOP