## Topics of human vs bot twitter accounts

Compare the topics (hashtags) of the last 15 tweets for 10 human and bot accounts each.

#### Scrape Data

In [232]:
import requests
import re
import pandas as pd
import altair as alt
from tweepy import Client

from keys import bearer_token, consumer_key, consumer_secret, access_token, access_token_secret

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [233]:
client = Client(
    bearer_token=bearer_token,
    consumer_key=consumer_key,
    consumer_secret=consumer_secret,
    access_token=access_token,
    access_token_secret=access_token_secret,
    return_type=requests.Response,
    wait_on_rate_limit=True
)

In [234]:
human_ids = [
    35010567,
    1176309327331676160,
    1341724669041250305,
    19489239,
    123618103,
    1463283936931430408,
    317200582,
    890011658839441408,
    1427197998744940549,
    705982544,
]
human_hashtags = []

for id in human_ids:
    result = client.get_users_tweets(
        id,
        max_results=15
    ).json()

    for item in result["data"]:
        text = item["text"]
        for tag in re.findall(r"#\w+", text):
            human_hashtags.append(tag.lower())

df_human = pd.DataFrame({"hashtag": human_hashtags})
# group by hashtag and calculate count
df_human = df_human.groupby(['hashtag'])['hashtag'].count().reset_index(name='count')

print("Unique hashtags for human accounts: %s" % len(df_human['hashtag']))


Unique hashtags for human accounts: 95


In [235]:
bot_ids = [
    62438818,
    827113150772236288,
    1406650146,
    50367257,
    1257116113898536961,
    1406650146,
    90466734,
    1574491792346693663,
    1451598702154379265,
    882153804
]
bot_hashtags = []

for id in bot_ids:
    result = client.get_users_tweets(
        id,
        max_results=15
    ).json()

    for item in result["data"]:
        text = item["text"]
        for tag in re.findall(r"#\w+", text):
            bot_hashtags.append(tag.lower())

df_bot = pd.DataFrame({"hashtag": bot_hashtags})
# group by hashtag and calculate count
df_bot = df_bot.groupby(['hashtag'])['hashtag'].count().reset_index(name='count')

print("Unique hashtags for bot accounts: %s" % len(df_bot['hashtag']))

Unique hashtags for bot accounts: 177


#### Data Visualization

In [236]:
chart_human = alt.Chart(df_human).mark_bar().encode(
    x=alt.X('count:Q', axis=alt.Axis(title='Count')),
    y=alt.Y('hashtag:N', axis=alt.Axis(title='Hashtag'), sort='-x'),
).transform_window(
    rank='rank(count)',
    sort=[alt.SortField('count', order='descending')]
).transform_filter(
    (alt.datum.rank < 10)
).properties(
   title="Most Common Hashtags by Human Accounts"
).configure_title(
   color="black",
   fontSize=13,
   anchor="start"
)

chart_human

In [237]:
chart_bot = alt.Chart(df_bot).mark_bar().encode(
    x=alt.X('count:Q', axis=alt.Axis(title='Count')),
    y=alt.Y('hashtag:N', axis=alt.Axis(title='Hashtag'), sort='-x'),
).transform_window(
    rank='rank(count)',
    sort=[alt.SortField('count', order='descending')]
).transform_filter(
    (alt.datum.rank < 10)
).properties(
   title="Most Common Hashtags by Bot Accounts"
).configure_title(
   color="black",
   fontSize=13,
   anchor="start"
)

chart_bot