In [1]:
# passive reverts = times when bot was reverted by someone else
# active reverts = times when bot reverts someone else

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

In [4]:
df = pd.read_csv('revs_scored_jan.tsv', sep='\t', header=0)

In [5]:
days = df.rev_timestamp.map(lambda ts: datetime.utcfromtimestamp(ts).day)
df['day'] = days

In [6]:
df_bots = df[df.day<22]
df_bots = df_bots[df_bots.user_is_bot]

df_bot_passive_reverts = df_bots[df_bots.is_reverted]
df_bot_passive_reverts = df_bot_passive_reverts[df_bot_passive_reverts.seconds_to_revert.astype('str').astype('int')<86400]
df_bot_passive_reverts.revert_id = df_bot_passive_reverts.revert_id.astype('int')

df_reverters = df[df.is_revert]

In [7]:
# merging two datafraemes to create a table of nonhuman and human reverters that reverted a bot

df_bot_passive_reverts = pd.merge(df_bot_passive_reverts, df_reverters, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [8]:
df_bot_passive_reverts = df_bot_passive_reverts[['rev_id','user_text','revert_id','user_text_reverter', 'is_self_revert_reverter','user_is_bot_reverter', 'page_namespace','day']]

In [9]:
df_bot_active_reverts = df_bots[df_bots.is_revert]

df_reverted = df[df.is_reverted]
df_reverted = df_reverted[df_reverted.seconds_to_revert.astype('str').astype('int')<86400]
df_reverted.revert_id = df_reverted.revert_id.astype('int')

In [10]:
# merging two dataframes to create list of bot active reverts with reverted edit info

df_bot_active_reverts = pd.merge(df_reverted, df_bot_active_reverts, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [11]:
df_bot_active_reverts = df_bot_active_reverts[['rev_id','user_text', 'user_is_bot','revert_id','user_text_reverter', 'is_self_revert_reverter', 'page_namespace','day']]

In [12]:
# current variable summary:
#   df_bots = all edits made by bots
#   df_reverters = all reverting edits in dataset
#   df_bot_passive_reverts = all revisions by bots which were reverted (by humans or bots), with reverting user info merged
#   df_reverted = all revisions that were reverted
#   df_bot_active_reverts = all active reverts by bots of humans and bots

In [13]:
# creating table and making total edits column

bot_summary = df_bots.groupby("user_text", as_index = False).count()
bot_summary = bot_summary[['user_text','rev_id']]
bot_summary = bot_summary.rename(columns = {'rev_id': 'total_edits'})

In [14]:
# add passive_reverts column 

new_col = df_bot_passive_reverts.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "passive_reverts"})

In [15]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [16]:
bot_summary['passive_reverts'] = bot_summary['passive_reverts'].fillna(0)
bot_summary['passive_reverts'] = bot_summary['passive_reverts'].astype(int)

In [17]:
# add active_reverts column 

new_col = df_bot_active_reverts.groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "active_reverts"})

In [18]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [19]:
del bot_summary["user_text_reverter"]
bot_summary['active_reverts'] = bot_summary['active_reverts'].fillna(0)
bot_summary['active_reverts'] = bot_summary['active_reverts'].astype(int)

In [20]:
# add human_active_reverts column 

new_col = df_bot_active_reverts[df_bot_active_reverts.user_is_bot == False].groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "human_active_reverts"})

In [21]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [22]:
del bot_summary["user_text_reverter"]
bot_summary['human_active_reverts'] = bot_summary['human_active_reverts'].fillna(0)
bot_summary['human_active_reverts'] = bot_summary['human_active_reverts'].astype(int)

In [23]:
# add self_active_reverts column 

new_col = df_bot_active_reverts[df_bot_active_reverts.user_text == df_bot_active_reverts.user_text_reverter].groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "self_active_reverts"})

In [24]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [25]:
del bot_summary['user_text_reverter']
bot_summary['self_active_reverts'] = bot_summary['self_active_reverts'].fillna(0)
bot_summary['self_active_reverts'] = bot_summary['self_active_reverts'].astype(int)

In [26]:
# add bot_active_reverts column - will not include self-reverts

new_col = df_bot_active_reverts[df_bot_active_reverts.user_is_bot == True]
new_col = new_col[new_col.user_text != new_col.user_text_reverter]

new_col = new_col.groupby(["user_text_reverter","revert_id"], as_index = False).count()
new_col = new_col.groupby("user_text_reverter", as_index = False).count()[["user_text_reverter","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "bot_active_reverts"})

In [27]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text_reverter', 
             suffixes=('', '')
)

In [28]:
del bot_summary['user_text_reverter']
bot_summary['bot_active_reverts'] = bot_summary['bot_active_reverts'].fillna(0)
bot_summary['bot_active_reverts'] = bot_summary['bot_active_reverts'].astype(int)

### active_reverts column could be less than sum of other active revert columns because bot may revert multiple users / types of users with one revert

In [29]:
# add self_passive_reverts_column

new_col = df_bot_passive_reverts[df_bot_passive_reverts.user_text_reverter == df_bot_passive_reverts.user_text]
new_col = new_col.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "self_passive_reverts"})

In [30]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [31]:
bot_summary['self_passive_reverts'] = bot_summary['self_passive_reverts'].fillna(0)
bot_summary['self_passive_reverts'] = bot_summary['self_passive_reverts'].astype(int)

In [32]:
# add human_passive_reverts column 

new_col = df_bot_passive_reverts[df_bot_passive_reverts.user_is_bot_reverter == False]
new_col = new_col.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "human_passive_reverts"})

In [33]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [34]:
bot_summary['human_passive_reverts'] = bot_summary['human_passive_reverts'].fillna(0)
bot_summary['human_passive_reverts'] = bot_summary['human_passive_reverts'].astype(int)

In [35]:
# add bot_passive_reverts column 

new_col = df_bot_passive_reverts[df_bot_passive_reverts.user_is_bot_reverter == True]
new_col = new_col.groupby("user_text", as_index = False).count()[["user_text","rev_id"]]
new_col = new_col.rename(columns = {"rev_id": "bot_passive_reverts"})

In [36]:
bot_summary = pd.merge(bot_summary, new_col, 
             how='outer', 
             left_on='user_text', 
             right_on='user_text', 
             suffixes=('', '')
)

In [37]:
bot_summary['bot_passive_reverts'] = bot_summary['bot_passive_reverts'].fillna(0)
bot_summary['bot_passive_reverts'] = bot_summary['bot_passive_reverts'].astype(int)

In [38]:
# re-order columns of table

bot_summary = bot_summary[['user_text', 'total_edits', 'active_reverts', 'passive_reverts', 
                           'self_active_reverts', 'human_active_reverts', 'bot_active_reverts', 
                           'self_passive_reverts','human_passive_reverts', 'bot_passive_reverts']]

In [39]:
bot_summary

Unnamed: 0,user_text,total_edits,active_reverts,passive_reverts,self_active_reverts,human_active_reverts,bot_active_reverts,self_passive_reverts,human_passive_reverts,bot_passive_reverts
0,AAlertBot,12216,4,4,4,0,0,4,0,4
1,Acebot,562,14,20,14,0,0,20,0,20
2,Amalthea (bot),1202,9,12,9,0,0,12,0,12
3,AnomieBOT,21646,44,253,13,32,0,19,232,21
4,AnomieBOT II,21,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
99,Wiki Feed Bot,63,0,0,0,0,0,0,0,0
100,WoodwardBot,61,0,0,0,0,0,0,0,0
101,WugBot,322,1,0,0,1,0,0,0,0
102,Xqbot,2,0,0,0,0,0,0,0,0


In [40]:
bot_summary[(bot_summary.user_text == "AvicBot")|(bot_summary.user_text == "AnomieBOT")|
            (bot_summary.user_text == "Cyberbot I")|(bot_summary.user_text == "RonBot")]

Unnamed: 0,user_text,total_edits,active_reverts,passive_reverts,self_active_reverts,human_active_reverts,bot_active_reverts,self_passive_reverts,human_passive_reverts,bot_passive_reverts
3,AnomieBOT,21646,44,253,13,32,0,19,232,21
6,AvicBot,5876,229,532,229,0,0,532,0,532
22,Cyberbot I,7824,1108,1030,911,196,51,1004,26,1004
82,RonBot,9066,5,687,2,3,0,2,685,2


In [41]:
#creating a function to calculcate the percent of active reverts (how often does this revert?)
def pcta(data):
    val = data[2] / data[1]
    return ("%.4f" % val) #this just limits the floats to four decimal points

bot_summary["pct_active_reverts"] = bot_summary.apply(pcta, axis=1)   
bot_summary['pct_active_reverts'] = bot_summary['pct_active_reverts'].fillna(0)
#bot_summary['pct_active_reverts'] = bot_summary['pct_active_reverts'].astype(float)

In [42]:
#creating a function to calculcate the percent of self reverts (how often does this bot revert itself?)
def pcts(data):
    if data[2] == 0:
        return 0
    else:
        val = data[4] / data[2]
        return ("%.4f" % val)

bot_summary["pct_self_active_reverts"] = bot_summary.apply(pcts, axis=1)
bot_summary['pct_self_active_revertss'] = bot_summary['pct_self_active_reverts'].fillna(0)
#bot_summary['pct_self_active_reverts'] = bot_summary['pct_self_active_reverts'].astype(float)

In [43]:
#creating a function to calculcate the percent of human active reverts (out of how many times this bot reverts, how often does it revert a human?)
def pctha(data):
    if data[2] == 0:
        return 0
    else:
        val = data[5] / data[2]
        return ("%.4f" % val)

bot_summary["pct_(h)active_reverts"] = bot_summary.apply(pctha, axis=1)
bot_summary['pct_(h)active_reverts'] = bot_summary['pct_(h)active_reverts'].fillna(0)
#bot_summary['pct_(h)active_reverts'] = bot_summary['pct_(h)active_reverts'].astype(float)

In [44]:
#creating a function to calculcate the percent of bot active reverts (out of how many times this bot reverts, how often does it revert a bot?)
def pctba(data):
    if data[2] == 0:
        return 0
    else:
        val = data[6] / data[2]
        return ("%.4f" % val)

bot_summary["pct_(b)active_reverts"] = bot_summary.apply(pctba, axis=1)
bot_summary['pct_(b)active_reverts'] = bot_summary['pct_(b)active_reverts'].fillna(0)
#bot_summary['pct_(b)active_reverts'] = bot_summary['pct_(b)active_reverts'].astype(int)

In [45]:
#creating a function to calculcate the percent of passive reverts (how often does this bot get reverted?)
def pctp(data):
    if data[2] == 0:
        return 0
    else:
        val = data[3] / data[1]
        return ("%.4f" % val)

bot_summary["pct_passive_reverts"] = bot_summary.apply(pctp, axis=1)
bot_summary['pct_passive_reverts'] = bot_summary['pct_passive_reverts'].fillna(0)
#bot_summary['pct_passive_reverts'] = bot_summary['pct_passive_reverts'].astype(int)

In [46]:
# creating a function to calculcate the percent of self passive reverts 
#    (out of how many times this bot get reverted, how often does it get reverted by itself?)
def pctsp(data):
    if data[3] == 0:
        return 0
    else:
        val = data[7] / data[3]
        return ("%.4f" % val)

bot_summary["pct_self_passive_reverts"] = bot_summary.apply(pctsp, axis=1)
bot_summary['pct_self_passive_reverts'] = bot_summary['pct_self_passive_reverts'].fillna(0)

In [47]:
#creating a function to calculcate the percent of human passive reverts (out of how many times this bot get reverted, how often does it get reverted by a human?)
def pcthp(data):
    if data[3] == 0:
        return 0
    else:
        val = data[8] / data[3]
        return ("%.4f" % val)

bot_summary["pct_(h)passive_reverts"] = bot_summary.apply(pcthp, axis=1)
bot_summary['pct_(h)passive_reverts'] = bot_summary['pct_(h)passive_reverts'].fillna(0)
#bot_summary['pct_(h)passive_reverts'] = bot_summary['pct_(h)passive_reverts'].astype(float)

In [48]:
#creating a function to calculcate the percent of bot passive reverts (out of how many times this bot get reverted, how often does it get reverted by a bot?)
def pctbp(data):
    if data[3] == 0:
        return 0
    else:
        val = data[9] / data[3]
        return ("%.4f" % val)

bot_summary["pct_(b)passive_reverts"] = bot_summary.apply(pctbp, axis=1)
bot_summary['pct_(b)passive_reverts'] = bot_summary['pct_(b)passive_reverts'].fillna(0)
#bot_summary['pct_(b)passive_reverts'] = bot_summary['pct_(b)passive_reverts'].astype(float)

In [49]:
# re-order columns of table

bot_summary = bot_summary[['user_text', 'total_edits', 'active_reverts', 'pct_active_reverts', 
                           'self_active_reverts', 'pct_self_active_reverts', 
                           'human_active_reverts', 'pct_(h)active_reverts', 
                           'bot_active_reverts', 'pct_(b)active_reverts', 
                           'passive_reverts', 'pct_passive_reverts', 
                           'self_passive_reverts', 'pct_self_passive_reverts',
                           'human_passive_reverts', 'pct_(h)passive_reverts', 
                           'bot_passive_reverts', 'pct_(b)passive_reverts']]

In [50]:
bot_summary

Unnamed: 0,user_text,total_edits,active_reverts,pct_active_reverts,self_active_reverts,pct_self_active_reverts,human_active_reverts,pct_(h)active_reverts,bot_active_reverts,pct_(b)active_reverts,passive_reverts,pct_passive_reverts,self_passive_reverts,pct_self_passive_reverts,human_passive_reverts,pct_(h)passive_reverts,bot_passive_reverts,pct_(b)passive_reverts
0,AAlertBot,12216,4,0.0003,4,1.0000,0,0.0000,0,0.0000,4,0.0003,4,1.0000,0,0.0000,4,1.0000
1,Acebot,562,14,0.0249,14,1.0000,0,0.0000,0,0.0000,20,0.0356,20,1.0000,0,0.0000,20,1.0000
2,Amalthea (bot),1202,9,0.0075,9,1.0000,0,0.0000,0,0.0000,12,0.0100,12,1.0000,0,0.0000,12,1.0000
3,AnomieBOT,21646,44,0.0020,13,0.2955,32,0.7273,0,0.0000,253,0.0117,19,0.0751,232,0.9170,21,0.0830
4,AnomieBOT II,21,0,0.0000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wiki Feed Bot,63,0,0.0000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
100,WoodwardBot,61,0,0.0000,0,0,0,0,0,0,0,0,0,0,0,0,0,0
101,WugBot,322,1,0.0031,0,0.0000,1,1.0000,0,0.0000,0,0.0000,0,0,0,0,0,0
102,Xqbot,2,0,0.0000,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [51]:
bot_summary[(bot_summary.user_text == "AvicBot")|(bot_summary.user_text == "AnomieBOT")|
            (bot_summary.user_text == "Cyberbot I")|(bot_summary.user_text == "RonBot")]

Unnamed: 0,user_text,total_edits,active_reverts,pct_active_reverts,self_active_reverts,pct_self_active_reverts,human_active_reverts,pct_(h)active_reverts,bot_active_reverts,pct_(b)active_reverts,passive_reverts,pct_passive_reverts,self_passive_reverts,pct_self_passive_reverts,human_passive_reverts,pct_(h)passive_reverts,bot_passive_reverts,pct_(b)passive_reverts
3,AnomieBOT,21646,44,0.002,13,0.2955,32,0.7273,0,0.0,253,0.0117,19,0.0751,232,0.917,21,0.083
6,AvicBot,5876,229,0.039,229,1.0,0,0.0,0,0.0,532,0.0905,532,1.0,0,0.0,532,1.0
22,Cyberbot I,7824,1108,0.1416,911,0.8222,196,0.1769,51,0.046,1030,0.1316,1004,0.9748,26,0.0252,1004,0.9748
82,RonBot,9066,5,0.0006,2,0.4,3,0.6,0,0.0,687,0.0758,2,0.0029,685,0.9971,2,0.0029
