In [26]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
%matplotlib inline

In [11]:
df = pd.read_csv(r'data\processed\history\watch-history.csv')
print(df.columns)

Index(['video_title', 'video_url', 'channel_name', 'channel_url',
       'date_watched'],
      dtype='object')


In [12]:
df['date_watched'] = pd.to_datetime(df['date_watched'], utc=True)

In [14]:
# Extract the date components from the date_watched column
df['year'] = df['date_watched'].dt.year
df['month'] = df['date_watched'].dt.month
df['day'] = df['date_watched'].dt.day
df['weekday'] = df['date_watched'].dt.weekday
df['hour'] = df['date_watched'].dt.hour

In [32]:
# Group the data
per_year = df.groupby('year')['video_url'].count()
per_month = df.groupby('month')['video_url'].count()
per_day_of_week = df.groupby('weekday')['video_url'].count()
per_hour = df.groupby('hour')['video_url'].count()

# Plot using plotly
# Create subplots
fig = make_subplots(rows=2, cols=2, subplot_titles=('Videos watched per year', 'Videos watched per month', 'Videos watched per day of week', 'Videos watched per hour'))
fig.add_trace(go.Bar(x=per_year.index, y=per_year.values), row=1, col=1)
fig.add_trace(go.Bar(x=per_month.index, y=per_month.values), row=1, col=2)
fig.add_trace(go.Bar(x=per_day_of_week.index, y=per_day_of_week.values), row=2, col=1)
fig.add_trace(go.Bar(x=per_hour.index, y=per_hour.values), row=2, col=2)

fig.add_annotation(text='Videos watched per year', x=0.5, y=1.2, showarrow=False, font_size=20, xref='paper', yref='paper', xanchor='center', yanchor='top')

fig.show()

In [33]:
# Export the plot
fig.write_html(r'watch-history.html')