# How often is X mentioned on Twitter?

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Get data

#### Counts pulled with the Twarc library. [Check it out](https://twarc-project.readthedocs.io/en/latpst/twarc2_en_us/). 

In [11]:
# !twarc2 counts --granularity minute --start-time '2022-04-24' --csv --archive 'Elon Musk' data/raw/elon_musk_minute_tweet_counts.csv

100%|█████████████| Processed 2 days/2 days [00:03<00:00, 3723675 tweets total ]


In [12]:
# !twarc2 counts --granularity minute --start-time '2022-04-24' --csv --archive '@elonmusk' data/raw/@elonmusk_minute_tweet_counts.csv

100%|█████████████| Processed 2 days/2 days [00:03<00:00, 4365681 tweets total ]


---

## Read data

#### @elonmusk

In [43]:
mentioned = "@elonmusk"

src1 = pd.read_csv(
    "data/raw/@elonmusk_minute_tweet_counts.csv", parse_dates=["start", "end"]
).sort_values("start", ascending=False)

In [94]:
mentioned = "Elon Musk"

src2 = pd.read_csv(
    "data/raw/elon_musk_minute_tweet_counts.csv", parse_dates=["start", "end"]
).sort_values("start", ascending=False)

In [201]:
src = pd.concat([src1, src2]).reset_index(drop=True)

#### Process dates

In [202]:
src["start_date_pst"] = (
    pd.to_datetime(src["start"])
    .dt.tz_convert("US/Pacific")
    .dt.strftime("%Y-%m-%d %H:%M:%S")
)
src["end_date_pst"] = (
    pd.to_datetime(src["end"])
    .dt.tz_convert("US/Pacific")
    .dt.strftime("%Y-%m-%d %H:%M:%S")
)

In [203]:
src["start_pst"] = (
    pd.to_datetime(src["start"]).dt.tz_convert("US/Central").dt.strftime("%H:%M:%S")
)
src["end_pst"] = (
    pd.to_datetime(src["end"]).dt.tz_convert("US/Central").dt.strftime("%H:%M:%S")
)

In [204]:
src.head()

Unnamed: 0,start,end,minute_count,start_date_pst,end_date_pst,start_pst,end_pst
0,2022-04-26 13:40:00+00:00,2022-04-26 13:40:48+00:00,1483,2022-04-26 06:40:00,2022-04-26 06:40:48,08:40:00,08:40:48
1,2022-04-26 13:39:00+00:00,2022-04-26 13:40:00+00:00,1969,2022-04-26 06:39:00,2022-04-26 06:40:00,08:39:00,08:40:00
2,2022-04-26 13:38:00+00:00,2022-04-26 13:39:00+00:00,1914,2022-04-26 06:38:00,2022-04-26 06:39:00,08:38:00,08:39:00
3,2022-04-26 13:37:00+00:00,2022-04-26 13:38:00+00:00,1908,2022-04-26 06:37:00,2022-04-26 06:38:00,08:37:00,08:38:00
4,2022-04-26 13:36:00+00:00,2022-04-26 13:37:00+00:00,1998,2022-04-26 06:36:00,2022-04-26 06:37:00,08:36:00,08:37:00


#### Slim down and re-order the dataframe

In [205]:
df = src[
    ["start_date_pst", "end_date_pst", "start_pst", "end_pst", "minute_count"]
].copy()

In [206]:
df[df["minute_count"] == df["minute_count"].max()]

Unnamed: 0,start_date_pst,end_date_pst,start_pst,end_pst,minute_count
1075,2022-04-25 12:45:00,2022-04-25 12:46:00,14:45:00,14:46:00,17458


In [207]:
df_grouped = df.groupby(["start_date_pst"]).agg({"minute_count": sum}).reset_index()

In [208]:
monday = df_grouped[
    (df_grouped["start_date_pst"] > "2022-04-25")
    & (df_grouped["start_date_pst"] < "2022-04-26")
]

#### Chart it

In [1]:
alt.Chart(monday).mark_area().encode(
    x=alt.X("start_date_pst:T", axis=alt.Axis(format="%-H:%M", tickCount=5), title=""),
    y=alt.Y("minute_count:Q", title="", axis=alt.Axis(tickCount=6, format="0,M")),
).properties(width=900, title="Monday: Mentions per minute of Elon Musk on Twitter")

NameError: name 'alt' is not defined

---

## Exports

In [210]:
monday.to_csv(f"data/processed/elon_twitter_mentions_monday.csv", index=False)