In [None]:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime


def assign_tags(line):
    """Assign tags based on the line content."""
    if line.startswith("<DT><A HREF="):
        return "TAG_ITEM"
    elif line.startswith("<DT><p>") or line.startswith("</DL><p>") or line.startswith("<DL><p>"):
        return "TAG_SEP"
    elif line.startswith("<DT><H3"):
        return "TAG_FOLDER"
    else:
        return "TAG_OTHER"


def parse_line(html_string):
    """Parse HTML line and extract required information."""
    soup = BeautifulSoup(html_string, 'html.parser')
    href, add_date, icon, element_value = None, None, None, None
    last_modify = None

    element = soup.find('a') or soup.find('h3')

    if element:
        href = element.get('href')
        add_date = element.get('add_date')
        last_modify = element.get('LAST_MODIFIED')
        icon = element.get('icon')
        element_value = element.text

    return href, add_date, icon, element_value, last_modify


def get_time_string(timestamp):
    """Convert timestamp to formatted string."""
    dt = datetime.fromtimestamp(int(timestamp))
    return dt.strftime("%Y-%m")


def plot_bookmarks(df):
    """Plot bookmarks creation profile."""
    ax = df["add_date_str"].value_counts().sort_index().plot.bar(
        figsize=(20, 3), title="My bookmarks creation profile")
    ax.set_xlabel('Bookmarks Add Month')
    ax.set_ylabel('Number of book marks')


def main():
    """Main function to parse and analyze bookmarks."""
    file_path = "./bookmarks/bookmarks_11_12_23.html"

    with open(file_path, 'r') as file:
        lines = file.readlines()

    df_lines = pd.DataFrame(lines, columns=["line"])
    df_lines["line"] = df_lines["line"].map(str.strip)
    df_lines["tag"] = df_lines["line"].map(assign_tags)

    df_filtered = df_lines.query("tag!='TAG_OTHER' and tag!='TAG_SEP'")
    df_filtered.loc[:, ["v"]] = df_filtered.line.map(parse_line)
    df_filtered.loc[:, ["v_href"]] = df_filtered.v.map(lambda x: x[0])
    df_filtered.loc[:, ["v_add_date"]] = df_filtered.v.map(lambda x: x[1])
    df_filtered.loc[:, ["v_icon"]] = df_filtered.v.map(lambda x: x[2])
    df_filtered.loc[:, ["v_elev"]] = df_filtered.v.map(lambda x: x[3])
    df_filtered.loc[:, ["v_last_m_date"]] = df_filtered.v.map(lambda x: x[4])

    results = []
    current_folder = current_a_date = current_m_date = None
    for _, row in df_filtered.iterrows():
        if row.tag == "TAG_FOLDER":
            current_folder = row.v_elev
            current_a_date = row.v_add_date
            current_m_date = row.v_last_m_date
        else:
            results.append((current_folder, current_a_date, current_m_date, row.v_elev, row.v_href, row.v_add_date, row.v_icon))

    df_results = pd.DataFrame(results, columns=["folder", "folder_add_date", "folder_last_modify", "element", "href", "add_date", "icon"])
    df_results.loc[:, ["add_date_str"]] = df_results.add_date.map(get_time_string)

    plot_bookmarks(df_results)

    df_results.to_csv("./bookmarks/bookmarks.csv", index=False)


if __name__ == "__main__":
    main()