# Bot-vs-bot reverts
## Getting and processing data

In [None]:
# bot-vs-bot revert table: https://quarry.wmflabs.org/query/17237

!wget https://quarry.wmflabs.org/run/161084/output/0/tsv?download=true -O botvbot.tsv

--2017-03-10 02:05:22--  https://quarry.wmflabs.org/run/161084/output/0/tsv?download=true
Resolving quarry.wmflabs.org (quarry.wmflabs.org)... 10.68.21.68
Connecting to quarry.wmflabs.org (quarry.wmflabs.org)|10.68.21.68|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘botvbot.tsv’

botvbot.tsv             [                   <=>]  31.86M  2.81MB/s             

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
df = pd.read_csv("botvbot.tsv", sep="\t")
len(df)

In [None]:
print(df.columns)

In [None]:
df[0:5]

In [None]:
df['reverting_timestamp_dt'] = pd.to_datetime(df['reverting_timestamp'], format="%Y%m%d%H%M%S")

In [None]:
df = df.set_index('reverting_timestamp_dt')

### Total reverts by namespace

In [None]:
df.page_namespace.value_counts()

## Group by reverts per day and namespace

In [None]:
gp = df.groupby([pd.TimeGrouper('1D', closed='left'), 'page_namespace'])

In [None]:
daily_namespace_g = gp.rev_id.count()
daily_namespace_g.head()

In [None]:
namespaces = [0,1,2,3,4,5,6,10,11,14]

In [None]:
daily_namespace_g.unstack()[namespaces].head()

In [None]:
daily_namespace_unstack = daily_namespace_g.unstack()[namespaces]

## Botplots

### Daily reverts subplotted by namespace, linear y-axis

In [None]:
daily_namespace_unstack[namespaces].plot(subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=False)

### Daily reverts subplotted by namespace, log y-axis

In [None]:
daily_namespace_unstack[namespaces].plot(subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=True)

### Daily reverts by namespace, linear y-axis

In [None]:
daily_namespace_unstack[namespaces].plot(kind='line', figsize=[14,12], linewidth=1)

## Group by reverts per week and namespace


In [None]:
gp2 = df.groupby([pd.TimeGrouper('1W', closed='left'), 'page_namespace'])

In [None]:
weekly_namespace_g = gp2.rev_id.count()
weekly_namespace_g.head()

In [None]:
namespaces = [0,1,2,3,4,5,6,10,11,14]

In [None]:
weekly_namespace_g.unstack()[namespaces].head()

In [None]:
weekly_namespace_unstack = weekly_namespace_g.unstack()[namespaces]

## Botplots

### Weekly reverts subplotted by namespace, linear y-axis

In [None]:
weekly_namespace_unstack[namespaces].plot(subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=False)

### Weekly reverts subplotted by namespace, log y-axis

In [None]:
weekly_namespace_unstack[namespaces].plot(subplots=True, figsize=[12,34], sharex=False, sharey=False, logy=True)

### Weekly reverts by namespace, linear y-axis

In [None]:
weekly_namespace_unstack[namespaces].plot(kind='line', figsize=[14,12], linewidth=2)

## Group by reverter and namespace


In [None]:
df.reverting_user_text.value_counts()

In [None]:
len(df.reverting_user_text.value_counts())

In [None]:
topbots = df.reverting_user_text.value_counts().index.tolist()[0:12]
topbots

In [None]:
gp3 = df.groupby([pd.TimeGrouper('1W', closed='left'), 'reverting_user_text'])

In [None]:
user_namespace_g = gp3.rev_id.count()
user_namespace_g.head()

In [None]:
user_namespace_g.unstack().head()

In [None]:
user_namespace_unstack = user_namespace_g.unstack()[topbots]

## Botplots

### Weekly reverts subplotted by reverting bot, linear y-axis, not shared y-axis

In [None]:
fig, axes = plt.subplots(ncols=1,nrows=len(topbots), figsize=[8,8], sharex=True)
user_namespace_unstack[topbots].plot(subplots=True, kind='area', ax=axes)
fig.subplots_adjust(hspace=.25)


### Weekly reverts subplotted by reverting bot, linear y-axis, shared y-axis

In [None]:
fig, axes = plt.subplots(ncols=1,nrows=len(topbots), figsize=[8,8], sharex=True, sharey=True)
user_namespace_unstack[topbots].plot(subplots=True, kind='area', ax=axes)
fig.subplots_adjust(hspace=.25)


In [None]:
user_namespace_unstack['Addbot'].plot(kind='area')

In [None]:
user_namespace_unstack['AvicBot'].plot(kind='area')

In [None]:
user_namespace_unstack['Xqbot'].plot(kind='area')

In [None]:
user_namespace_unstack['KLBot2'].plot(kind='area')


In [None]:
user_namespace_unstack['RussBot'].plot(kind='area')