In [None]:
%pip install fastparquet "ipywidgets>=7.0.0" matplotlib pandas seaborn

In [None]:
import os
import pandas as pd

def load_file_to_df(file_name: str) -> pd.DataFrame:
    if os.getcwd() == "/drive":
        data = pd.read_parquet(f'/drive/{file_name}')
    else:
        data = pd.read_parquet(f'{os.getenv("DATA_DIR")}/marts/{file_name}')

    return data

In [None]:
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

df = load_file_to_df("fct_medium_blogs.parquet")
df_blogs_per_day = df.groupby(["published_date"])["story_url"].count().rename("num_blogs").sort_index().reset_index()
df_blogs_per_day["published_date"] = pd.to_datetime(df_blogs_per_day["published_date"])
df_blogs_per_day = df_blogs_per_day[df_blogs_per_day["published_date"] > (datetime.utcnow()- pd.Timedelta(days=90))]

fig, ax = plt.subplots(figsize=(12,8), dpi= 200)    
plt.title("Number of relevant Medium blogs per day")
fig = sns.barplot(x = "published_date", y = "num_blogs", data = df_blogs_per_day, estimator = sum, errorbar=None, ax=ax)
x_dates = df_blogs_per_day['published_date'].dt.strftime('%Y-%m-%d').sort_values().unique()
ax.xaxis.set_major_locator(ticker.MaxNLocator(4))

In [None]:
df_github_action_usage = load_file_to_df("reports/rpt_github_action_usage.parquet")
df_github_action_usage.index = df_github_action_usage["run_started_date"]
df_github_action_usage.drop(columns=["run_started_date"], inplace=True)

fig, ax = plt.subplots(figsize=(12,8), dpi= 200)    
plt.title("Github Action billable minutes per month")
fig = sns.barplot(x = "run_started_date", y = "billable_minutes_cum_sum", data = df_github_action_usage, ax=ax)
ax.xaxis.set_major_locator(ticker.MaxNLocator(4))