# How often is X mentioned on Twitter?

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data

#### Define the target

In [5]:
users = [
    {"mentioned": "juul", "display_name": "juullabs"},
]

In [6]:
users_df = pd.DataFrame(users)

In [7]:
users_df

Unnamed: 0,mentioned,display_name
0,juul,juullabs


In [8]:
for index, row in users_df.iterrows():
    mentioned = row["mentioned"]
    display_name = row["display_name"]
    !twarc2 counts --granularity day --csv --archive f'{mentioned}' data/raw/{mentioned}_mention_counts.csv

100%|████████████| Processed 16 years/16 years [03:43<00:00, 2662 tweets total ]


#### Counts pulled with the Twarc library. [Check it out](https://twarc-project.readthedocs.io/en/latest/twarc2_en_us/). 

In [9]:
# !twarc2 counts --granularity day --csv --archive 'JoeBiden OR (Joe Biden)' data/raw/joe_biden_mention_counts.csv

In [10]:
# !twarc2 counts --granularity day --csv --archive f'{mentioned} OR ({display_name})' data/raw/{mentioned}_mention_counts.csv

---

## Read data

#### Get daily mention counts as dataframe

In [11]:
src = pd.read_csv(
    f"data/raw/{mentioned}_mention_counts.csv", parse_dates=["start", "end"]
).sort_values("start", ascending=False)

#### Process dates

In [12]:
src["year"] = pd.to_datetime(src["start"]).dt.strftime("%Y")
src["month_year"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m")
src["date"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m-%d")

In [13]:
src[src["date"] == src["date"].max()]

Unnamed: 0,start,end,day_count,year,month_year,date
30,2022-06-28 00:00:00+00:00,2022-06-28 13:12:44+00:00,0,2022,2022-06,2022-06-28


In [14]:
src_grouped = (
    src.groupby(["date", "year", "month_year"]).agg({"day_count": sum}).reset_index()
)

#### Slim down and re-order the dataframe

In [15]:
df = (
    src_grouped[["year", "month_year", "date", "day_count"]]
    .sort_values("date", ascending=False)
    .copy()
)

In [16]:
df.head()

Unnamed: 0,year,month_year,date,day_count
5943,2022,2022-06,2022-06-28,0
5942,2022,2022-06,2022-06-27,1
5941,2022,2022-06,2022-06-26,0
5940,2022,2022-06,2022-06-25,0
5939,2022,2022-06,2022-06-24,0


#### When was the first mention? 

In [17]:
first = df[df["day_count"] > 0]["date"].tail(1).iloc[0]
first

'2009-10-29'

#### How many total mentions? 

In [18]:
df.day_count.sum()

2662

#### Average mentions? 

In [19]:
df.day_count.mean()

0.4478465679676985

#### Make a new dataframe starting from first mention

In [20]:
df_complete = df[df["date"] >= first]

#### Which day was mentioned the most? 

In [21]:
df_complete[df_complete["day_count"] == df_complete["day_count"].max()]

Unnamed: 0,year,month_year,date,day_count
2972,2014,2014-05,2014-05-10,330


In [22]:
df_complete.sort_values("day_count", ascending=False).head(10)

Unnamed: 0,year,month_year,date,day_count
2972,2014,2014-05,2014-05-10,330
2973,2014,2014-05,2014-05-11,225
2978,2014,2014-05,2014-05-16,110
2969,2014,2014-05,2014-05-07,70
2974,2014,2014-05,2014-05-12,66
3040,2014,2014-07,2014-07-17,40
3619,2016,2016-02,2016-02-16,39
2975,2014,2014-05,2014-05-13,33
3133,2014,2014-10,2014-10-18,29
2967,2014,2014-05,2014-05-05,28


#### Chart it

In [23]:
alt.Chart(df_complete.head(365 * 10)).mark_area().encode(
    x=alt.X("date:T", axis=alt.Axis(format="%b. %Y", tickCount=5), title=""),
    y=alt.Y("day_count:Q", title=" ", axis=alt.Axis(tickCount=5, format="0,M")),
).properties(width=900, title=f"Monthly mentions of {display_name} on Twitter")

---

## Aggregate 

#### Groupby month/year

In [24]:
months = (
    df_complete.groupby(["month_year"])
    .agg({"day_count": sum})
    .sort_values("month_year", ascending=False)
    .reset_index()
)

In [25]:
months

Unnamed: 0,month_year,day_count
0,2022-06,2
1,2022-05,2
2,2022-04,0
3,2022-03,0
4,2022-02,0
5,2022-01,0
6,2021-12,0
7,2021-11,0
8,2021-10,1
9,2021-09,0


#### Chart it

In [26]:
alt.Chart(months.head(120)).mark_area().encode(
    x=alt.X("month_year:T", axis=alt.Axis(format="%b. %Y", tickCount=5), title=""),
    y=alt.Y("day_count:Q", title=" ", axis=alt.Axis(tickCount=5, format="0,M")),
).properties(width=900, title=f"Monthly mentions of {display_name} on Twitter")

## Exports

In [27]:
months[months["month_year"] > "2014-12"].to_csv(
    f"data/processed/twitter_mentions_{mentioned}_months.csv", index=False
)
df_complete[df_complete["date"] > "2014-12-31"].to_csv(
    f"data/processed/twitter_mentions_{mentioned}_days.csv", index=False
)

In [28]:
months[months["month_year"] > "2021-01"].day_count.mean().round()

1.0