# How often is 'x' mentioned on Twitter?
##### *Counts pulled with the Twarc library. [Check it out](https://twarc-project.readthedocs.io/en/latest/twarc2_en_us/)*. 

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

#### Read data

In [4]:
# mentioned = "USC"

# src = pd.read_csv(
#     "../data/raw/usc_mentions_daily.csv", parse_dates=["start", "end"]
# ).sort_values("start", ascending=False)

In [None]:
mentioned = "Elon Musk"

src = pd.read_csv(
     "../data/raw/elonmusk_mentions_daily_full_name.csv", parse_dates=["start", "end"]
 ).sort_values("start", ascending=False)

#### First five rows

In [None]:
src.head()

#### Process dates

In [None]:
src["year"] = pd.to_datetime(src["start"]).dt.strftime("%Y")
src["month_year"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m")
src["date"] = pd.to_datetime(src["start"]).dt.strftime("%Y-%m-%d")

#### Slim down and re-order the dataframe

In [None]:
src.head()

In [None]:
df = src[["year", "month_year", "date", "day_count"]].copy()

#### When was the first mention? 

In [None]:
df[df["day_count"] > 0].tail(1)

#### Define that as a variable

In [None]:
first = df[df["day_count"] > 0]["date"].tail(1).iloc[0]

#### How many total mentions? 

In [None]:
df.day_count.sum()

#### Average mentions? 

In [None]:
df.day_count.mean()

#### Make a new dataframe starting from first mention

In [None]:
df_complete = df[df["date"] >= first]

#### Which day was mentioned the most? 

In [None]:
df_complete[df_complete["day_count"] == df_complete["day_count"].max()]

#### Chart it

In [None]:
alt.Chart(df_complete.head(365 * 2)).mark_area(color="red").encode(
    x="date:T", y="day_count:Q",
).properties(width=900)

---

## Aggregate 

#### Groupby month/year

In [None]:
months = (
    df_complete.groupby(["month_year"])
    .agg({"day_count": sum})
    .sort_values("month_year", ascending=False)
    .reset_index()
)

#### Which month-year was max? 

In [None]:
alt.Chart(months).mark_area(color="red").encode(
    x="month_year:T", y="day_count:Q",
).properties(width=900)

---

#### Exports

In [None]:
df_all_months.to_csv(f"../data/processed/twitter_mentions_{mentioned}.csv", index=False)