# How often is 'x' mentioned on Twitter?
##### *Counts pulled with the Twarc library. [Check it out](https://twarc-project.readthedocs.io/en/latest/twarc2_en_us/)*. 

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

#### Read data

In [4]:
# mentioned = "USC"

# src = pd.read_csv(
#     "../data/raw/usc_mentions_daily.csv", parse_dates=["start", "end"]
# ).sort_values("start", ascending=False)

In [5]:
mentioned = "Elon Musk"

src = pd.read_csv(
    "../data/raw/elonmusk_mentions_daily_full_name.csv", parse_dates=["start", "end"]
).sort_values("start", ascending=False)

#### First five rows

In [6]:
src.head()

Unnamed: 0,start,end,day_count
30,2022-04-14 00:00:00+00:00,2022-04-14 17:43:29+00:00,529321
29,2022-04-13 00:00:00+00:00,2022-04-14 00:00:00+00:00,81340
28,2022-04-12 00:00:00+00:00,2022-04-13 00:00:00+00:00,83145
27,2022-04-11 00:00:00+00:00,2022-04-12 00:00:00+00:00,182471
26,2022-04-10 00:00:00+00:00,2022-04-11 00:00:00+00:00,95829


#### Process dates

In [7]:
src["year"] = pd.to_datetime(src["start"]).dt.strftime("%Y")
src["month_year"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m")
src["date"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m-%d")

#### Slim down and re-order the dataframe

In [8]:
df = src[["year", "date", "month_year", "day_count"]].copy()

In [9]:
df.tail()

Unnamed: 0,year,date,month_year,day_count
5863,2006,2006-03-25,2006-03,0
5862,2006,2006-03-24,2006-03,0
5861,2006,2006-03-23,2006-03,0
5860,2006,2006-03-22,2006-03,0
5859,2006,2006-03-21,2006-03,0


#### When was the first mention? 

In [10]:
df[df["day_count"] > 0]["date"].tail(1).iloc[0]

'2007-03-21'

#### Define that as a variable

In [11]:
first = df[df["day_count"] > 0]["date"].tail(1).iloc[0]

#### How many total mentions? 

In [12]:
df.day_count.sum()

33300103

#### Average mentions? 

In [13]:
round(df.day_count.mean())

5674

#### Make a new dataframe starting from first mention

In [14]:
df_all = df[df["date"] >= first].copy()

#### Which day was mentioned the most? 

In [15]:
# https://twitter.com/elonmusk/status/1463828765151272962

In [16]:
df_all[df_all.day_count == df_all.day_count.max()]

Unnamed: 0,year,date,month_year,day_count
30,2022,2022-04-14,2022-04,529321


In [17]:
df_all.sort_values("day_count", ascending=False).head()

Unnamed: 0,year,date,month_year,day_count
30,2022,2022-04-14,2022-04,529321
21,2022,2022-04-05,2022-04,385590
20,2022,2022-04-04,2022-04,256011
686,2020,2020-05-06,2020-05,239530
642,2020,2020-07-25,2020-07,194997


#### Chart it

In [18]:
# alt.Chart(source).mark_area().encode(
#     x="year:T",
#     y="net_generation:Q",
# )

In [19]:
alt.Chart(df_all.head(1000)).mark_area().encode(
    x=alt.X(
        "date:T", axis=alt.Axis(grid=False, tickCount=6, format="%b. %Y"), title="Day"
    ),
    y=alt.Y(
        "day_count:Q",
        axis=alt.Axis(
            domainOpacity=0,
            gridWidth=0.6,
            gridColor="#dddddd",
            offset=6,
            tickSize=0,
            tickCount=6,
        ),
        title="Daily mentions",
    ),
).properties(
    width=600, height=300, title=f"Mentions of {mentioned} on Twitter"
).configure_view(
    strokeOpacity=0
)

---

## Aggregate 

#### Groupby month/year

In [20]:
df_all.groupby(["year", "month_year"]).agg({"day_count": "sum"}).reset_index().head()

Unnamed: 0,year,month_year,day_count
0,2007,2007-03,4
1,2007,2007-04,0
2,2007,2007-05,4
3,2007,2007-06,0
4,2007,2007-07,0


In [21]:
df_all_months = (
    df_all.groupby(["year", "month_year"]).agg({"day_count": "sum"}).reset_index()
)

#### Which month-year was max? 

In [22]:
max_count = df_all_months[
    df_all_months["day_count"] == df_all_months["day_count"].max()
]

In [23]:
max_count

Unnamed: 0,year,month_year,day_count
181,2022,2022-04,2164874


In [24]:
alt.Chart(df_all_months).mark_area().encode(
    x="month_year:T", y="day_count:Q",
).properties(width=700)

---

#### Exports

In [25]:
df_all_months.to_csv(f"../data/processed/twitter_mentions_{mentioned}.csv", index=False)