#### Disclaimer

*Note: This was not part of my final project. I just put it here as an example of how I started out. See fantasymap.ipynb for narrative and details.*

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import os
from datetime import datetime, timedelta
import pytz

In [None]:
#Need to import several, since my browser has separate profiles

browser_edge_personal = pd.read_csv("DayInTheLifeData\BrowserHistory_2_25_24_personal.csv")
browser_edge_work = pd.read_csv("DayInTheLifeData\BrowserHistory_2_25_24_work.csv")

In [None]:
# Convert to DateTime in Python
browser_edge_personal['DateTime'] = pd.to_datetime(browser_edge_personal['DateTime'])
browser_edge_work['DateTime'] = pd.to_datetime(browser_edge_work['DateTime'])

# Ensure datetime objects are timezone-aware (if not already)
browser_edge_personal['DateTime'] = browser_edge_personal['DateTime'].dt.tz_convert('UTC')
browser_edge_work['DateTime'] = browser_edge_work['DateTime'].dt.tz_convert('UTC')

# Calculate the date threshold (2 days ago) in UTC timezone
two_days_ago = datetime.now(pytz.utc) - timedelta(days=2)

# Filter the DataFrame to include only rows less than 2 days old
browser_edge_personal = browser_edge_personal[browser_edge_personal['DateTime'] > two_days_ago]
browser_edge_work = browser_edge_work[browser_edge_work['DateTime'] > two_days_ago]

In [None]:
def add_bare_url_column(df, url_column_name):
    # Function to extract domain from URL
    def extract_domain(url):
        parsed_url = urlparse(url)
        return parsed_url.netloc

    # Add a new column with just the bare URL (domain)
    df['BareURL'] = df[url_column_name].apply(extract_domain)
    return df

In [None]:
browser_edge_personal = add_bare_url_column(browser_edge_personal, 'NavigatedToUrl')
browser_edge_work = add_bare_url_column(browser_edge_work, 'NavigatedToUrl')

In [None]:
# For gathering favicons

def fetch_favicon_duckduckgo(domain):
    # Construct the URL for fetching the favicon.ico
    url = f"https://icons.duckduckgo.com/ip3/{domain}.ico"
    try:
        # Fetch the favicon.ico image
        response = requests.get(url)
        response.raise_for_status()
        return response.content
    except requests.exceptions.RequestException as e:
        print(f"Error fetching favicon for {domain}: {e}")
        return None

# Function to fetch favicons for each domain in the DataFrame
def fetch_favicons_df(df):
    favicons = {}
    for domain in df['BareURL']:
        favicon = fetch_favicon_duckduckgo(domain)
        favicons[domain] = favicon
    return favicons




In [None]:
def fetch_favicons_df(df):
    unique_urls = set(df['BareURL'])  # Convert to set to remove duplicates
    favicons = {}
    for domain in unique_urls:
        try:
            favicon = fetch_favicon_duckduckgo(domain)
            favicons[domain] = favicon
        except Exception as e:
            print(f"Error fetching favicon for {domain}: {e}")
    return favicons


In [None]:
import os

def fetch_favicons_df(df, folder_name):
    """
    Fetches favicons for unique domains in a DataFrame and saves them to a folder.

    Args:
        df (DataFrame): The DataFrame containing the 'BareURL' column with domain URLs.
        folder_name (str): The name of the folder to save the favicons.

    Returns:
        DataFrame: The original DataFrame with a new column containing the relative paths of the saved favicons.
    """
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    unique_urls = set(df['BareURL'])
    favicon_paths = {}
    for domain in unique_urls:
        favicon = fetch_favicon_duckduckgo(domain)
        if favicon is not None:
            file_name = f"{domain}_favicon.ico"
            file_path = os.path.join(folder_name, file_name)
            favicon_paths[domain] = file_path
            with open(file_path, "wb") as f:
                f.write(favicon)
            print(f"Favicon for {domain} saved as {file_path}")

    df['FaviconPath'] = df['BareURL'].map(favicon_paths.get)
    return df


In [None]:
# Gather favicons
browser_edge_personal = fetch_favicons_df(browser_edge_personal, "browser_edge_personal_favicons")
browser_edge_work = fetch_favicons_df(browser_edge_work, "browser_edge_work_favicons")

In [None]:
# Export - General
browser_edge_personal.to_csv('browser_edge_personal_processed.csv', index=False)
browser_edge_work.to_csv('browser_edge_work_processed.csv', index=False)

In [None]:
# Export for After Effects

def convert_to_ae_format(df, output_file):
    # Open the output file in write mode
    with open(output_file, 'w') as f:
        # Write the beginning of the script
        f.write('var csvData = [\n')
        
        # Iterate over rows in the DataFrame
        for _, row in df.iterrows():
            # Write each row as an object in the JavaScript array
            f.write('    {\n')
            f.write(f'        DateTime: "{row["DateTime"]}",\n')
            f.write(f'        NavigatedToUrl: "{row["NavigatedToUrl"]}",\n')
            f.write(f'        PageTitle: "{row["PageTitle"]}",\n')
            f.write(f'        BareURL: "{row["BareURL"]}",\n')
            f.write(f'        FaviconPath: "{row["FaviconPath"]}"\n')
            f.write('    },\n')
        
        # Write the end of the script
        f.write('];')

# Example usage:
convert_to_ae_format(browser_edge_personal, 'after_effects_script.jsx')
