### This notebook will investigate how Twitter deletes accounts


In [1]:
import sys
sys.path.insert(0, '../src_clean')
from utils import Loader
from utils import convert_twitter_strings_2_dates
import datetime
import numpy as np
import pandas as pd

import os
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
import altair as alt
from altair_saver import save

from utils import mean_diff_bootstrap_ci, cohen_d

from scipy.stats import mannwhitneyu, pearsonr

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Load necessary data

In [2]:
l = Loader()

decahose_users = l.load_user_data(name = "decahose")

superspreader_accounts = l.load_iffyp_top_infl_fiber_users()

errors = l.load_user_data(name="v2",info_type="errors")
errors["user_id"] = errors["user_id"].astype(str)

In [3]:
decahose_users

Unnamed: 0,u_id,followers_count,following_count,tweet_count,listed_count,name,protected,created_at,username,description,verified,url
0,1000003671780884485,251.333333,192.0,2570.00,0.0,Enchanted LifePath,False,Fri May 25 09:20:29 -0400 2018,TruthWeRTheNews,Enchanted LifePath TV - Search For Answers Fin...,False,http://EnchantedLifePath.com
1,100001226,127.000000,232.0,5515.00,1.0,ramjee,False,Mon Dec 28 12:24:56 -0500 2009,ramjeepasam,,False,
2,1000051927,415.000000,3606.0,548.00,6.0,Vishal Tripathi,False,Sun Dec 09 14:58:56 -0500 2012,vishaltrip,Young Professional @NITIAayog | Lawyer by qual...,False,
3,1000060610518896640,4096.750000,1080.5,5532.75,15.5,Mattea Merta 🇨🇦,False,Fri May 25 13:06:44 -0400 2018,MatteaMerta,Free Thinker | Contributor @TPostMillennial | ...,False,https://www.facebook.com/MatteaMerta/
4,1000075699699486722,32.000000,338.0,16696.50,1.0,Deborah Griffin,False,Fri May 25 14:06:42 -0400 2018,Deborah00274412,"Single,never married, Southern belle over 59 u...",False,
...,...,...,...,...,...,...,...,...,...,...,...,...
47007,999864765177565184,437.000000,813.0,19931.00,1.0,"Christopher Ramírez, 🇵🇷🇺🇸🇪🇸#Conservative⛪",False,Fri May 25 00:08:31 -0400 2018,Christo27341164,"I love italy🇮🇹Denmark🇩🇰,Croatia🇭🇷,Greece🇬🇷Spai...",False,
47008,99988999,70.000000,359.0,7662.00,7.0,Marc Boucher de Lign,False,Mon Dec 28 11:26:52 -0500 2009,MarcBdeL,concepteur et co-fondateur de HolaVie. Narurop...,False,http://www.holavie.org
47009,999951635697848320,25.000000,15.0,261.00,1.0,aramchei,False,Fri May 25 05:53:42 -0400 2018,aramchei,,False,
47010,999962540934221826,111.000000,154.0,8295.00,0.0,Tantivy Trig,False,Fri May 25 06:37:02 -0400 2018,TantivyT,Unwavering commitment to saving lives.\n\nTrut...,False,


In [4]:
superspreader_accounts

['275276082',
 '49023129',
 '2451476942',
 '3507100335',
 '16936686',
 '16589206',
 '940430173517811714',
 '57431771',
 '878916812',
 '39344374',
 '16989178',
 '49682880',
 '1177221975921610753',
 '22677397',
 '941729680381550592',
 '850036892',
 '3220084809',
 '25251653',
 '1036239330984054784',
 '1909921760',
 '101779040',
 '1146225861743403008',
 '3394053112',
 '19553409',
 '19422491',
 '1229160464',
 '58691526',
 '997312910748680192',
 '727137760574857216',
 '115279420',
 '1216789842',
 '19329393',
 '95588504',
 '375721095',
 '1134691864425033730',
 '2176476241',
 '806168918754660352',
 '1398479138',
 '802121664',
 '822901284655210497',
 '21150492',
 '4789453135',
 '39743812',
 '1424605753',
 '1157673159128207361',
 '3155188814',
 '898577623885819904',
 '3041775900',
 '959856286061006848',
 '56491860',
 '138889809',
 '903313767596822528',
 '770781940341288960',
 '22240612',
 '435704007',
 '975217231',
 '769739202963517440',
 '86177206',
 '859592034',
 '1148711615686160384',
 '89911

In [5]:
errors.head()

Unnamed: 0,user_id,status,twitter_error
0,1204429316219691008,User has been suspended,https://api.twitter.com/2/problems/resource-no...
1,1229861495326093312,User has been suspended,https://api.twitter.com/2/problems/resource-no...
2,1211401123275296770,Could not find user with ids,https://api.twitter.com/2/problems/resource-no...
3,2994893355,User has been suspended,https://api.twitter.com/2/problems/resource-no...
4,1083180973276229632,User has been suspended,https://api.twitter.com/2/problems/resource-no...


### Append the user account data to the top FIBers and Influentials

In [6]:
ss_account_info = decahose_users[decahose_users['u_id'].isin(superspreader_accounts)].reset_index(drop=True)

In [7]:
ss_account_info = ss_account_info[
    ["u_id",
     "followers_count",
     "following_count",
     "tweet_count",
     "username",
     "description",
     "verified",
     "protected"]
]
ss_account_info

Unnamed: 0,u_id,followers_count,following_count,tweet_count,username,description,verified,protected
0,1000141948269678592,56366.039735,56903.993377,96973.688742,209acesheepdog,🇺🇸🐶1A🗞~2A🔫CHRISTIAN⛪ CONSERVATIVE 💪\nMAR.30yrs...,False,False
1,1010682068131557376,13237.791667,13281.125000,65105.958333,DebHaslam,WWG1WGA👮‍♂️#MAGA🇺🇸❤️#TRUMP2020❤️🇺🇸 MY PAGE IS ...,False,False
2,101779040,85459.535714,199.464286,136737.321429,Sanjay_Dixit,"IAS//DMET//BITS// Author, Krishna Gopeshvara, ...",True,False
3,1018425398869282817,10784.678218,25.217822,7084.831683,Electroversenet,Accurately documenting Earth Changes as two wo...,False,False
4,1023362107,31915.972868,358.982558,7770.236434,sarawei3,女人的品质，决定着一个民族的未来🙏 基督徒 Graduate Certificate in ...,False,False
...,...,...,...,...,...,...,...,...
245,965259182,54812.569182,37505.399371,467860.204403,baalter,"🇺🇸1A, 2A, USMC-Family, Conservative, Country, ...",False,False
246,966716807452258304,34011.982222,34308.720000,42249.004444,SURSDIK,#TrumpTrain 🤣 🤣 #Draintheswamp 🤣 #Menacoverup...,False,False
247,969444209940889600,42852.500000,28818.166667,181323.166667,ernie_plumley,"God, Jesus, wife, family, and the Reborn Ameri...",False,False
248,975217231,152649.985294,98170.279412,323830.455882,John_KissMyBot,"Donald J Trump is my President, Conservative, ...",False,False


In [8]:
ss_account_info = ss_account_info.merge(
    errors,
    left_on="u_id",
    right_on="user_id",
    how="left"
)

# Users with no 'status' variable in the `errors` object are still active!
ss_account_info["status"] = ss_account_info["status"].fillna("Active")

In [9]:
status_map = {
    'Active':'active',
    'User has been suspended':'suspended',
    'Could not find user with ids':'deleted'
}

ss_account_info.status = ss_account_info.status.map(status_map)

In [10]:
ss_account_info.head()

Unnamed: 0,u_id,followers_count,following_count,tweet_count,username,description,verified,protected,user_id,status,twitter_error
0,1000141948269678592,56366.039735,56903.993377,96973.688742,209acesheepdog,🇺🇸🐶1A🗞~2A🔫CHRISTIAN⛪ CONSERVATIVE 💪\nMAR.30yrs...,False,False,,active,
1,1010682068131557376,13237.791667,13281.125,65105.958333,DebHaslam,WWG1WGA👮‍♂️#MAGA🇺🇸❤️#TRUMP2020❤️🇺🇸 MY PAGE IS ...,False,False,1.0106820681315572e+18,suspended,https://api.twitter.com/2/problems/resource-no...
2,101779040,85459.535714,199.464286,136737.321429,Sanjay_Dixit,"IAS//DMET//BITS// Author, Krishna Gopeshvara, ...",True,False,,active,
3,1018425398869282817,10784.678218,25.217822,7084.831683,Electroversenet,Accurately documenting Earth Changes as two wo...,False,False,,active,
4,1023362107,31915.972868,358.982558,7770.236434,sarawei3,女人的品质，决定着一个民族的未来🙏 基督徒 Graduate Certificate in ...,False,False,1023362107.0,suspended,https://api.twitter.com/2/problems/resource-no...


In [11]:
ss_account_info.u_id.nunique()

250

In [12]:
verified_map={
    True : "verified",
    False : "not verified"
}

ss_account_info.verified = ss_account_info.verified.map(verified_map)
ss_account_info

Unnamed: 0,u_id,followers_count,following_count,tweet_count,username,description,verified,protected,user_id,status,twitter_error
0,1000141948269678592,56366.039735,56903.993377,96973.688742,209acesheepdog,🇺🇸🐶1A🗞~2A🔫CHRISTIAN⛪ CONSERVATIVE 💪\nMAR.30yrs...,not verified,False,,active,
1,1010682068131557376,13237.791667,13281.125000,65105.958333,DebHaslam,WWG1WGA👮‍♂️#MAGA🇺🇸❤️#TRUMP2020❤️🇺🇸 MY PAGE IS ...,not verified,False,1010682068131557376,suspended,https://api.twitter.com/2/problems/resource-no...
2,101779040,85459.535714,199.464286,136737.321429,Sanjay_Dixit,"IAS//DMET//BITS// Author, Krishna Gopeshvara, ...",verified,False,,active,
3,1018425398869282817,10784.678218,25.217822,7084.831683,Electroversenet,Accurately documenting Earth Changes as two wo...,not verified,False,,active,
4,1023362107,31915.972868,358.982558,7770.236434,sarawei3,女人的品质，决定着一个民族的未来🙏 基督徒 Graduate Certificate in ...,not verified,False,1023362107,suspended,https://api.twitter.com/2/problems/resource-no...
...,...,...,...,...,...,...,...,...,...,...,...
245,965259182,54812.569182,37505.399371,467860.204403,baalter,"🇺🇸1A, 2A, USMC-Family, Conservative, Country, ...",not verified,False,965259182,suspended,https://api.twitter.com/2/problems/resource-no...
246,966716807452258304,34011.982222,34308.720000,42249.004444,SURSDIK,#TrumpTrain 🤣 🤣 #Draintheswamp 🤣 #Menacoverup...,not verified,False,966716807452258304,suspended,https://api.twitter.com/2/problems/resource-no...
247,969444209940889600,42852.500000,28818.166667,181323.166667,ernie_plumley,"God, Jesus, wife, family, and the Reborn Ameri...",not verified,False,969444209940889600,suspended,https://api.twitter.com/2/problems/resource-no...
248,975217231,152649.985294,98170.279412,323830.455882,John_KissMyBot,"Donald J Trump is my President, Conservative, ...",not verified,False,975217231,suspended,https://api.twitter.com/2/problems/resource-no...


### Make figures

In [13]:
ss_account_info.head()

Unnamed: 0,u_id,followers_count,following_count,tweet_count,username,description,verified,protected,user_id,status,twitter_error
0,1000141948269678592,56366.039735,56903.993377,96973.688742,209acesheepdog,🇺🇸🐶1A🗞~2A🔫CHRISTIAN⛪ CONSERVATIVE 💪\nMAR.30yrs...,not verified,False,,active,
1,1010682068131557376,13237.791667,13281.125,65105.958333,DebHaslam,WWG1WGA👮‍♂️#MAGA🇺🇸❤️#TRUMP2020❤️🇺🇸 MY PAGE IS ...,not verified,False,1.0106820681315572e+18,suspended,https://api.twitter.com/2/problems/resource-no...
2,101779040,85459.535714,199.464286,136737.321429,Sanjay_Dixit,"IAS//DMET//BITS// Author, Krishna Gopeshvara, ...",verified,False,,active,
3,1018425398869282817,10784.678218,25.217822,7084.831683,Electroversenet,Accurately documenting Earth Changes as two wo...,not verified,False,,active,
4,1023362107,31915.972868,358.982558,7770.236434,sarawei3,女人的品质，决定着一个民族的未来🙏 基督徒 Graduate Certificate in ...,not verified,False,1023362107.0,suspended,https://api.twitter.com/2/problems/resource-no...


In [14]:
suspended_users = ss_account_info[ss_account_info['status']=='suspended'].reset_index(drop=True).copy()

suspended_users = suspended_users[['u_id', 'verified','status', 'followers_count']]

verified_split = (suspended_users['verified'].value_counts() / len(suspended_users)).reset_index()
verified_split

Unnamed: 0,index,verified
0,not verified,0.973214
1,verified,0.026786


In [15]:
SPLIT_VALUE = 150_000

suspended_users.loc[:, 'prominent'] = None

suspended_users.loc[suspended_users['followers_count'] > SPLIT_VALUE, "prominent"] = f"> {int(SPLIT_VALUE/1000)}K followers"
suspended_users.loc[suspended_users['followers_count'] < SPLIT_VALUE, "prominent"] = f"< {int(SPLIT_VALUE/1000)}K followers"

prominent_split = (suspended_users['prominent'].value_counts() / len(suspended_users)).reset_index()
prominent_split

Unnamed: 0,index,prominent
0,< 150K followers,0.901786
1,> 150K followers,0.098214


In [16]:
verified_bar = alt.Chart(verified_split).mark_bar(color='black').encode(
    x = alt.X(
        "verified:Q",
        axis=alt.Axis(
            orient='top',
            title = None,
            labels = False,
            ticks = False,
            values = np.arange(0,1.2,.2)
        ),
        scale = alt.Scale(domain = (0,1))
    ),
    y = alt.Y("index:N", title = None)
).properties(width=800)


verified_text = alt.Chart(verified_split).mark_text(
    dx=5,
    fontSize=14,
    align='left'
).encode(
    x = alt.X(
        "verified:Q",
        axis=alt.Axis(
            orient='top',
            title = ['Percent of suspended superspreaders'],
            format = "%"
        ),
        scale = alt.Scale(domain = (0,1))
    ),
    y = alt.Y("index:N", title = None),
    text = alt.Text(
        "verified:Q",
        format = ".1%",
        
    )
).properties(width=800)

verified_fig = alt.layer(verified_bar, verified_text)

# --------------------------------
prominent_bar = alt.Chart(prominent_split).mark_bar(color='dodgerblue').encode(
    x = alt.X(
        "prominent:Q",
        axis=alt.Axis(
            title = ['Percentage of suspended superspreaders'],
            format = "%",
            values = np.arange(0,1.2,.2)
        ),
        scale = alt.Scale(domain = (0,1))
    ),
    y = alt.Y("index:N", title = None)
).properties(width=800)

prominent_text = alt.Chart(prominent_split).mark_text(
    dx=5,
    fontSize=14,
    align='left'
).encode(
    x = alt.X(
        "prominent:Q",
        axis=alt.Axis(
            format = "%"
        ),
        scale = alt.Scale(domain = (0,1))
    ),
    y = alt.Y("index:N", title = None),
    text = alt.Text(
        "prominent:Q",
        format = ".1%",
        
    )
).properties(width=800)

prominent_fig = alt.layer(prominent_bar, prominent_text)

# --------------------------------
chart = alt.vconcat(
    verified_fig,
    prominent_fig,
    spacing=5
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
)

chart

In [17]:
verified_split

Unnamed: 0,index,verified
0,not verified,0.973214
1,verified,0.026786


In [18]:
prominent_split.index

RangeIndex(start=0, stop=2, step=1)

In [19]:
prominent_split.sort_values("prominent", inplace=True)

verified_split.sort_values("verified", inplace=True)

In [20]:
verified_split

Unnamed: 0,index,verified
1,verified,0.026786
0,not verified,0.973214


In [21]:
d['verified']

NameError: name 'd' is not defined

In [None]:
mpl.rcParams['font.size'] = 14

fig, ax = plt.subplots(nrows=2, figsize=(10,2))

nudge = 0.005

ax[0].barh(
    y = verified_split['index'],
    width = verified_split.verified,
    color = 'black',
    zorder=3
)

ax[0].tick_params(axis='x', length=0)
ax[0].set_xticklabels([])
ax[0].grid(axis='x', zorder=0)
ax[0].set_xlim(0,1.01)

ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].spines['bottom'].set_visible(False)
ax[0].spines['left'].set_visible(False)

for i, d in verified_split.iterrows():
    ax[0].text(
        x = d['verified']+nudge,
        y = d['index'],
        s = f"{d['verified']:.0%}",
        va='center'
    )


# -------

ax[1].barh(
    y = prominent_split['index'],
    width = prominent_split.prominent,
    color = '#1e90ff',
    zorder=3
)

for i, d in prominent_split.iterrows():
    ax[1].text(
        x = d['prominent'] + nudge,
        y = d['index'],
        s = f"{d['prominent']:.0%}",
        va='center'
    )

ax[1].tick_params(axis='x', length=0)

ax[1].grid(axis='x', zorder=0)
ax[1].set_xlim(0,1.01)

ax[1].spines['top'].set_visible(False)
ax[1].spines['right'].set_visible(False)
ax[1].spines['bottom'].set_visible(False)
ax[1].spines['left'].set_visible(False)

ax[1].xaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))

ax[1].set_xlabel("Percentage of suspended superspreaders")

plt.tight_layout()

fig.savefig(
    "../figures/suspended_users_150.pdf",
    bbox_inches='tight',
    dpi=600
)