# Analyzing Barstool Sports post metadata

#### Python tools and notebook settings

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import datetime as dt

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
today = dt.date.today().strftime("%Y-%m-%d")

---

In [5]:
src_df = pd.read_csv(
    "/Users/stiles/data/barstool.csv",
    parse_dates=["week_start", "published_date", "timestamp"],
    dtype={"id": str},
)

In [6]:
len(src_df)

224735

#### Parse dates for aggregates

In [7]:
src_df["year"] = src_df["published_date"].dt.year
src_df["month"] = src_df["published_date"].dt.month
src_df["month_year"] = pd.to_datetime(
    (src_df["year"].astype(str) + "-" + src_df["month"].astype(str) + "-" + "01")
)
src_df.drop(["timestamp", "week_start"], axis=1, inplace=True)

In [8]:
df = src_df.copy()

---

## Rico

In [20]:
rico_df = df[df["author_name"].fillna("") == "Rico Bosco"][
    [
        "id",
        "title",
        "author_name",
        "comment_count",
        "branch_url",
        "published_date",
        "published_hour",
        "published_day",
        "first_tag",
        "year",
        "month",
        "month_year",
    ]
]

In [36]:
rico_df.head()

Unnamed: 0,id,title,author_name,comment_count,branch_url,published_date,published_hour,published_day,first_tag,year,month,month_year
0,3442295,Friendly Reminder: A Christmas Story Sucks,Rico Bosco,0,https://bars.tl/3442295,2022-11-02,12,Wednesday,movies,2022,11,2022-11-01
47,3442117,The Newly Announced Adidas x Mighty Ducks Collab Is Fire!,Rico Bosco,13,https://bars.tl/3442117,2022-11-01,16,Tuesday,mighty-ducks,2022,11,2022-11-01
57,3442129,There Is ZERO Need To Wake Someone Up Who Is Sleeping On The Couch To Move Them To Their Bed,Rico Bosco,36,https://bars.tl/3442129,2022-11-01,15,Tuesday,kfc-radio,2022,11,2022-11-01
69,3442089,"Leave It To Derek Jeter To Embrace The ""Mr.November"" Nickname He Got In The World Series That The Yankees Lost After He Hit A Lowly .148",Rico Bosco,21,https://bars.tl/3442089,2022-11-01,13,Tuesday,mlb,2022,11,2022-11-01
94,3441852,P Diddy Fighting As The Joker Is Just An Absurd Visual,Rico Bosco,42,https://bars.tl/3441852,2022-11-01,10,Tuesday,celebrity-fight,2022,11,2022-11-01


---

#### Has Rico's output increased since mid-October, when he [stormed off](https://www.youtube.com/watch?v=7mMa5tU7Lfw) the Dave Portnoy Show?

In [39]:
daily_counts = (
    rico_df.groupby(["month_year", "year"])
    .agg({"id": "count"})
    .rename(columns={"id": "count"})
    .sort_values("month_year", ascending=False)
    .reset_index()
)

In [41]:
daily_counts.head()

Unnamed: 0,month_year,year,count
0,2022-11-01,2022,5
1,2022-10-01,2022,45
2,2022-09-01,2022,32
3,2022-08-01,2022,40
4,2022-07-01,2022,20


In [50]:
alt.Chart(daily_counts[daily_counts["year"] > 2016]).mark_bar().encode(
    x="month_year:T", y="count"
).properties(width=650)