# Sample summary table stats

In [1]:
# passive reverts = times when bot was reverted by someone else
# active reverts = times when bot reverts someone else

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

In [4]:
df = pd.read_csv('revs_scored_jan.tsv', sep='\t', header=0)

In [5]:
days = df.rev_timestamp.map(lambda ts: datetime.utcfromtimestamp(ts).day)
df['day'] = days

In [7]:
df_sample = df[df.day<22]
df_bots = df_sample[df_sample.user_is_bot]

df_bot_passive_reverts = df_bots[df_bots.is_reverted]
df_bot_passive_reverts = df_bot_passive_reverts[df_bot_passive_reverts.seconds_to_revert.astype('str').astype('int')<86400]
df_bot_passive_reverts.revert_id = df_bot_passive_reverts.revert_id.astype('int')

df_reverters = df[df.is_revert]

In [8]:
# merging two datafraemes to create a table of nonhuman and human reverters that reverted a bot

df_bot_passive_reverts = pd.merge(df_bot_passive_reverts, df_reverters, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [9]:
df_bot_passive_reverts = df_bot_passive_reverts[['rev_id','user_text','revert_id','user_text_reverter', 'is_self_revert_reverter','user_is_bot_reverter', 'page_namespace','day']]

In [10]:
df_bot_active_reverts = df_bots[df_bots.is_revert]

df_reverted = df[df.is_reverted]
df_reverted = df_reverted[df_reverted.seconds_to_revert.astype('str').astype('int')<86400]
df_reverted.revert_id = df_reverted.revert_id.astype('int')

In [11]:
# merging two dataframes to create list of bot active reverts with reverted edit info

df_bot_active_reverts = pd.merge(df_reverted, df_bot_active_reverts, 
             how='inner', 
             left_on='revert_id', 
             right_on='rev_id', 
             suffixes=('', '_reverter')
)

In [12]:
df_bot_active_reverts = df_bot_active_reverts[['rev_id','user_text', 'user_is_bot','revert_id','user_text_reverter', 'is_self_revert_reverter', 'page_namespace','day']]

In [13]:
# current variable summary:
#   df_sample = all edits in our time frame
#   df_bots = all edits made by bots in time frame
#   df_reverters = all reverting edits in dataset in time frame
#   df_bot_passive_reverts = all revisions by bots which were reverted (by humans or bots), with reverting user info merged, in time frame
#   df_reverted = all revisions that were reverted, in time frame
#   df_bot_active_reverts = all active reverts by bots of humans and bots in time frame

In [14]:
# total edits in sample
len(df_sample)

3196890

In [26]:
# total bot edits
len(df_sample.user_is_bot == True)

3196890

In [21]:
# percent bot edits
len(df_bots) / len(df_sample)

0.10041790615254201

In [24]:
# total human edits
len(df_sample.user_is_bot == False)

3196890

In [25]:
# percent human edits
len(df_sample.user_is_bot == False) / len(df_sample)

1.0

In [27]:
df_sample

Unnamed: 0,rev_timestamp,page_id,rev_id,prev_rev_id,is_minor,user_text,user_id,seconds_to_prev,curr_bytes,delta_bytes,...,damaging_pred,goodfaith_pred,model_version,user_is_bot,user_is_trusted,user_edit_count,page_rev_count,page_namespace,is_page_redirect,day
0,1546300800,47649229,876227289,869530001,True,BD2412,196446,3696519,1195,-9,...,0.021146,0.985808,0.4.0,False,False,1289980,5,118,0,1
1,1546300801,47642850,876227292,869530006,True,BD2412,196446,3696518,1423,-9,...,0.021847,0.983419,0.4.0,False,False,1289981,5,0,0,1
2,1546300801,59537875,876227290,,False,Twozenhauer,14955567,,7835,,...,0.003826,0.998177,0.4.0,False,False,621,7,0,0,1
3,1546300801,60902958,876227291,869530004,True,BD2412,196446,3696519,1445,-9,...,0.021797,0.983336,0.4.0,False,False,1289980,30,0,0,1
4,1546300802,31392285,876227293,876069768,True,Cyberbot I,16266655,86401,125,-1,...,0.011582,0.992475,0.4.0,True,True,1006365,337,10,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3196885,1548115195,52638564,879556719,879556117,False,Zenmaster190,35619661,314,161568,-158,...,0.135811,0.919670,0.5.0,False,False,70,2630,0,0,21
3196886,1548115196,48708688,879556721,875645077,True,Bender the Bot,28903366,2153333,16739,2,...,0.014670,0.993471,0.5.0,True,True,829059,14,0,0,21
3196887,1548115196,8314839,879556720,877629722,False,Loraof,22399950,1044459,43809,7,...,0.031668,0.982230,0.5.0,False,False,20129,24,0,0,21
3196888,1548115197,59690858,879556722,879555191,False,Adamtt9,19993129,801,13390,33,...,0.005529,0.995818,0.5.0,False,False,64916,134,0,0,21


## having issues with df_sample, numbers don't line up at all.