In [4]:
import pandas as pd
import plotly.express as px
import nbformat
df = pd.read_csv('urls_cleaned.csv', header=None, encoding='utf-8')

# convert the first column to datetime
df[3] = pd.to_datetime((df[0]).astype(str).str[:8], format="%Y%m%d", errors='coerce')

df.columns = ['id','url','status','date']
df['year_week'] = df['date'].dt.strftime('%Y-%U')
df['week_start_date'] = pd.to_datetime(df['year_week'] + '-0', format='%Y-%U-%w')

# sort the dataframe by date
df_weekly = df.groupby('week_start_date')['url'].count().reset_index()
df_year_week = df.groupby('year_week').count().reset_index()
df_year_week.sort_values('year_week', inplace=True)

# plot the number of urls per day
fig = px.line(
    df_weekly, 
    x='week_start_date', 
    y='url',
    labels={'week_start_date': 'Week Starting', 'url': 'Number of URLs'},
    template='seaborn'
)

fig.update_layout(
    xaxis_title='Week Starting',
    yaxis_title='Number of URLs',
    title='Number of URLs per Week',
    title_x=0.5
)

fig.show()
