## 4 VICTIMIZATION-BASED MECHANISM

In [1]:
import pandas as pd
import numpy as np
import get_simulation
import summarize_results as sr

In [2]:
td = spark.read.parquet("s3://social-research-cheating/edges/vic_data.parquet")
td.registerTempTable("td")

nodes = spark.read.parquet("s3://social-research-cheating/nodes.parquet")
nodes.registerTempTable("nodes")

### 4.1 MOTIF COUNTS IN THE DATA

In [None]:
vic_data_with_damage = sr.add_damage(td, 30)
vic_data_with_damage.registerTempTable("vic_data_with_damage")

experiences = spark.sql("""SELECT * FROM vic_data_with_damage 
                           WHERE dst_sd != 'NA' AND src_flag == 1 AND m_date >= src_sd AND m_date < dst_sd""")
experiences.registerTempTable("experiences")

In [8]:
summary_table = sr.get_vic_summary_tab(experiences)
summary_table.show(5)

OUTPUT_FILE_PATH = "s3://social-research-cheating/summary-tables/emp-net/vic.parquet"
summary_table.write.parquet(OUTPUT_FILE_PATH)

+--------------------+----------+----------+------+---------+-------------------+
|                  id|start_date|    m_date|period|total_exp|total_severe_damage|
+--------------------+----------+----------+------+---------+-------------------+
|account.175b7548e...|2019-03-18|2019-03-08|    10|        2|                  1|
|account.5dc434432...|2019-03-20|2019-03-18|     2|        1|                  0|
|account.b81481155...|2019-03-28|2019-03-26|     2|        1|                  1|
|account.d3a38978b...|2019-03-26|2019-03-05|    21|        8|                  3|
|account.4bf9eed0f...|2019-03-15|2019-03-12|     3|        1|                  1|
+--------------------+----------+----------+------+---------+-------------------+
only showing top 5 rows



### 4.2 RANDOMIZED NETWORKS

In [25]:
# To include self-loops, use 'obs_data' instead of 'vic_data' when we create a mapping table.
td = spark.read.parquet("s3://social-research-cheating/edges/obs_data.parquet")
td.registerTempTable("td")

# The parquet file below contains team IDs of players from team matches in 'obs_data'.
team_ids = spark.read.parquet("s3://social-research-cheating/edges/tiny_team_data.parquet")
team_ids.registerTempTable("team_ids")

In [3]:
for i in range(101, 106):
    mapping_table = get_simulation.permute_node_labels(td, nodes, team_ids)
    mapping_table.write.parquet("s3://social-research-cheating/mapping-tables/map_" + str(i) + ".parquet")

### 4.3 MOTIF COUNTS IN SIMULATIONS

In [3]:
for i in range(1, 6):
    mapping_table = spark.read.parquet("s3://social-research-cheating/mapping-tables/map_" 
                                       + str(i) + ".parquet")
    mapping_table.registerTempTable("mapping_table")
    
    temp = spark.sql("""SELECT mid, src, randomised AS new_src, dst, time, m_date 
                        FROM td t JOIN mapping_table m 
                        ON t.src = m.original AND t.mid = m.match_id""")
    temp.registerTempTable("temp")
    
    rand_data = spark.sql("""SELECT mid, new_src AS src, randomised AS dst, time, m_date 
                             FROM temp t JOIN mapping_table m 
                             ON t.dst = m.original AND t.mid = m.match_id""")
    rand_data.registerTempTable("rand_data")
    
    add_flags = spark.sql("""SELECT mid, src, start_date AS src_sd, ban_date AS src_bd, 
                             cheating_flag AS src_flag, dst, time, m_date 
                             FROM rand_data r JOIN nodes n ON r.src = n.id""")
    add_flags.registerTempTable("add_flags")
    
    rand_data = spark.sql("""SELECT mid, src, src_sd, src_bd, src_flag, 
                             dst, start_date AS dst_sd, ban_date AS dst_bd, 
                             cheating_flag AS dst_flag, time, m_date 
                             FROM add_flags r JOIN nodes n ON r.dst = n.id""")
    rand_data.registerTempTable("td")
    
    rand_data_with_damage = sr.add_damage(rand_data, 30)
    rand_data_with_damage.registerTempTable("rand_data_with_damage")
    
    experiences = spark.sql("""SELECT * FROM rand_data_with_damage 
                               WHERE dst_sd != 'NA' AND src_flag == 1 
                               AND m_date >= src_sd AND m_date < dst_sd""")
    experiences.registerTempTable("experiences")

    summary_table = sr.get_vic_summary_tab(experiences)

    summary_table.write.parquet("s3://social-research-cheating/summary-tables/rand-net/vic/vic_" 
                                + str(i) + ".parquet")