In [1]:
# passive reverts = times when bot was reverted by someone else
# active reverts = times when bot reverts someone else

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

In [None]:
df = pd.read_csv('revs_scored_jan.tsv', sep='\t', header=0)

In [None]:
days = df.rev_timestamp.map(lambda ts: datetime.utcfromtimestamp(ts).day)
df['day'] = days

In [None]:
df_bots = df[df.day<22]
df_bots = df_bots[df_bots.user_is_bot]

df_bot_passive_reverts = df_bots[df_bots.is_reverted]
df_bot_passive_reverts = df_bot_passive_reverts[df_bot_passive_reverts.seconds_to_revert.astype('str').astype('int')<86400]
df_bot_passive_reverts.revert_id = df_bot_passive_reverts.revert_id.astype('int')

df_reverters = df[df.is_revert]

In [None]:
# merging two datafraemes to create a table of nonhuman and human reverters that reverted a bot

df_bot_passive_reverts = pd.merge(df_bot_passive_reverts, df_reverters, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [None]:
df_bot_passive_reverts = df_bot_passive_reverts[['rev_id','user_text','revert_id','user_text_reverter', 'is_self_revert_reverter','user_is_bot_reverter', 'page_namespace','day']]

In [None]:
df_bot_active_reverts = df_bots[df_bots.is_revert]

df_reverted = df[df.is_reverted]
df_reverted = df_reverted[df_reverted.seconds_to_revert.astype('str').astype('int')<86400]
df_reverted.revert_id = df_reverted.revert_id.astype('int')

In [None]:
# merging two dataframes to create list of bot active reverts with reverted edit info

df_bot_active_reverts = pd.merge(df_reverted, df_bot_active_reverts, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [None]:
df_bot_active_reverts = df_bot_active_reverts[['rev_id','user_text', 'user_is_bot','revert_id','user_text_reverter', 'is_self_revert_reverter', 'page_namespace','day']]

In [None]:
# current variable summary:
#   df_bots = all edits made by bots
#   df_reverters = all reverting edits in dataset
#   df_bot_passive_reverts = all revisions by bots which were reverted (by humans or bots), with reverting user info merged
#   df_reverted = all revisions that were reverted
#   df_bot_active_reverts = all active reverts by bots of humans and bots

In [None]:
# creating table and making total edits column

bot_summary = df_bots.groupby("user_text", as_index = False).count()
bot_summary = bot_summary[['user_text','rev_id']]
bot_summary = bot_summary.rename(columns = {'rev_id': 'total_edits'})

In [None]:
# add passive_reverts column 

new_col = df_bot_passive_reverts.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "passive_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [None]:
bot_summary['passive_reverts'] = bot_summary['passive_reverts'].fillna(0)
bot_summary['passive_reverts'] = bot_summary['passive_reverts'].astype(int)

In [None]:
# add active_reverts column 

new_col = df_bot_active_reverts.groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "active_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [None]:
del bot_summary["user_text_reverter"]
bot_summary['active_reverts'] = bot_summary['active_reverts'].fillna(0)
bot_summary['active_reverts'] = bot_summary['active_reverts'].astype(int)

In [None]:
# add human_active_reverts column 

new_col = df_bot_active_reverts[df_bot_active_reverts.user_is_bot == False].groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "human_active_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [None]:
del bot_summary["user_text_reverter"]
bot_summary['human_active_reverts'] = bot_summary['human_active_reverts'].fillna(0)
bot_summary['human_active_reverts'] = bot_summary['human_active_reverts'].astype(int)

In [None]:
# add self_reverts column 

new_col = df_bot_active_reverts[df_bot_active_reverts.user_text == df_bot_active_reverts.user_text_reverter].groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "self_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [None]:
del bot_summary['user_text_reverter']
bot_summary['self_reverts'] = bot_summary['self_reverts'].fillna(0)
bot_summary['self_reverts'] = bot_summary['self_reverts'].astype(int)

In [None]:
# add bot_active_reverts column - will not include self-reverts

new_col = df_bot_active_reverts[df_bot_active_reverts.user_is_bot == True]
new_col = new_col[new_col.user_text != new_col.user_text_reverter]

new_col = new_col.groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "bot_active_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [None]:
del bot_summary['user_text_reverter']
bot_summary['bot_active_reverts'] = bot_summary['bot_active_reverts'].fillna(0)
bot_summary['bot_active_reverts'] = bot_summary['bot_active_reverts'].astype(int)

### active_reverts column could be less than sum of other active revert columns because bot may revert multiple users / types of users with one revert

In [None]:
# add human_passive_reverts column 

new_col = df_bot_passive_reverts[df_bot_passive_reverts.user_is_bot_reverter == False]
new_col = new_col.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "human_passive_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [None]:
bot_summary['human_passive_reverts'] = bot_summary['human_passive_reverts'].fillna(0)
bot_summary['human_passive_reverts'] = bot_summary['human_passive_reverts'].astype(int)

In [None]:
# add bot_passive_reverts column 

new_col = df_bot_passive_reverts[df_bot_passive_reverts.user_is_bot_reverter == True]
new_col = new_col.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "bot_passive_reverts"})

In [None]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [None]:
bot_summary['bot_passive_reverts'] = bot_summary['bot_passive_reverts'].fillna(0)
bot_summary['bot_passive_reverts'] = bot_summary['bot_passive_reverts'].astype(int)

In [None]:
# re-order columns of table

bot_summary = bot_summary[['user_text', 'total_edits', 'active_reverts', 'passive_reverts', 'self_reverts',
       'human_active_reverts', 'bot_active_reverts', 'human_passive_reverts', 'bot_passive_reverts']]

In [None]:
bot_summary

In [None]:
bot_summary[bot_summary.user_text == "AvicBot"]

In [None]:
bot_summary[bot_summary.user_text == "AnomieBOT"]

In [None]:
bot_summary[bot_summary.user_text == "Cyberbot I"]

In [None]:
bot_summary[bot_summary.user_text == "RonBot"]

In [None]:
#creating a function to calculcate the percent of active reverts (how often does this revert?)
def pcta(data):
    val = data[2] / data[1]
    return ("%.4f" % val) #this just limits the floats to four decimal points

bot_summary["pct_active_reverts"] = bot_summary.apply(pcta, axis=1)   
bot_summary['pct_active_reverts'] = bot_summary['pct_active_reverts'].fillna(0)
#bot_summary['pct_active_reverts'] = bot_summary['pct_active_reverts'].astype(float)

In [None]:
#creating a function to calculcate the percent of self reverts (how often does this bot revert itself?)
def pcts(data):
    if data[2] == 0:
        return 0
    else:
        val = data[3] / data[2]
        return ("%.4f" % val)

bot_summary["pct_self_reverts"] = bot_summary.apply(pcts, axis=1)
bot_summary['pct_self_reverts'] = bot_summary['pct_self_reverts'].fillna(0)
#bot_summary['pct_self_reverts'] = bot_summary['pct_self_reverts'].astype(float)

In [None]:
#creating a function to calculcate the percent of human active reverts (out of how many times this bot reverts, how often does it revert a human?)
def pctha(data):
    if data[2] == 0:
        return 0
    else:
        val = data[4] / data[2]
        return ("%.4f" % val)

bot_summary["pct_(h)active_reverts"] = bot_summary.apply(pctha, axis=1)
bot_summary['pct_(h)active_reverts'] = bot_summary['pct_(h)active_reverts'].fillna(0)
#bot_summary['pct_(h)active_reverts'] = bot_summary['pct_(h)active_reverts'].astype(float)

In [None]:
#creating a function to calculcate the percent of bot active reverts (out of how many times this bot reverts, how often does it revert a bot?)
def pctba(data):
    if data[2] == 0:
        return 0
    else:
        val = data[4] / data[2]
        return ("%.4f" % val)

bot_summary["pct_(b)active_reverts"] = bot_summary.apply(pctba, axis=1)
bot_summary['pct_(b)active_reverts'] = bot_summary['pct_(b)active_reverts'].fillna(0)
#bot_summary['pct_(b)active_reverts'] = bot_summary['pct_(b)active_reverts'].astype(int)

In [None]:
#creating a function to calculcate the percent of passive reverts (how often does this bot get reverted?)
def pctp(data):
    if data[2] == 0:
        return 0
    else:
        val = data[3] / data[2]
        return ("%.4f" % val)

bot_summary["pct_passive_reverts"] = bot_summary.apply(pctp, axis=1)
bot_summary['pct_passive_reverts'] = bot_summary['pct_passive_reverts'].fillna(0)
#bot_summary['pct_passive_reverts'] = bot_summary['pct_passive_reverts'].astype(int)

In [None]:
#creating a function to calculcate the percent of human passive reverts (out of how many times this bot get reverted, how often does it get reverted by a human?)
def pcthp(data):
    if data[3] == 0:
        return 0
    else:
        val = data[7] / data[3]
        return ("%.4f" % val)

bot_summary["pct_(h)passive_reverts"] = bot_summary.apply(pcthp, axis=1)
bot_summary['pct_(h)passive_reverts'] = bot_summary['pct_(h)passive_reverts'].fillna(0)
#bot_summary['pct_(h)passive_reverts'] = bot_summary['pct_(h)passive_reverts'].astype(float)

In [None]:
#creating a function to calculcate the percent of bot passive reverts (out of how many times this bot get reverted, how often does it get reverted by a bot?)
def pctbp(data):
    if data[3] == 0:
        return 0
    else:
        val = data[8] / data[3]
        return ("%.4f" % val)

bot_summary["pct_(b)passive_reverts"] = bot_summary.apply(pctbp, axis=1)
bot_summary['pct_(b)passive_reverts'] = bot_summary['pct_(b)passive_reverts'].fillna(0)
#bot_summary['pct_(b)passive_reverts'] = bot_summary['pct_(b)passive_reverts'].astype(float)

In [None]:
# re-order columns of table

bot_summary = bot_summary[['user_text', 'total_edits', 'active_reverts', 'pct_active_reverts', 'self_reverts', 'pct_self_reverts', 'human_active_reverts', 'pct_(h)active_reverts', 'bot_active_reverts', 'pct_(b)active_reverts', 'passive_reverts', 'pct_passive_reverts', 'human_passive_reverts', 'pct_(h)passive_reverts', 'bot_passive_reverts', 'pct_(b)passive_reverts']]

In [None]:
bot_summary