# TimeSnapper Activity Log Generator
This notebook extracts activities from TimeSnapper screenshots, including code editing, ChatGPT discussions, web searches, and email checks.
It uses OCR to read the screenshots and generate a log of actions taken during coding sessions.

In [None]:
%pip install pytesseract pillow pandas datetime ace_tools

In [None]:
import pytesseract
from PIL import Image
import re
import json
import pandas as pd
from datetime import datetime
import os

# Function to extract text from image using OCR
def extract_text_from_image(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image)
    return text

# Function to classify activity based on extracted text
def classify_activity(text):
    activity = None
    if "chatgpt" in text.lower():
        activity = "ChatGPT Discussion"
    elif re.search(r"(https?://|www\.)", text):
        activity = "Web Search"
    elif "inbox" in text.lower() or "email" in text.lower():
        activity = "Email Check"
    elif "pycharm" in text.lower() or ".py" in text.lower():
        activity = "Code Editing"
    return activity

# Function to extract metadata from image (timestamps, file names, etc.)
def extract_metadata(image_path):
    metadata = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "filename": image_path
    }
    return metadata

# Function to scan a directory and get all image files
def get_screenshot_files(directory):
    # Filter for files with naming pattern '2024-09-10--XX-XX-XX UTC.jpg'
    image_extensions = ('.png', '.jpg', '.jpeg')
    files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(image_extensions) and "UTC" in f]
    print("Files found:", files)  # Print the files being detected
    return files

# Function to process screenshots and generate activity log
def process_screenshots(screenshot_paths):
    activities = []
    
    for path in screenshot_paths:
        text = extract_text_from_image(path)
        print(f"Extracted text from {path}:", text)  # Print extracted text for debugging
        activity = classify_activity(text)
        metadata = extract_metadata(path)
        if activity:
            activities.append({
                "timestamp": metadata["timestamp"],
                "activity": activity,
                "filename": metadata["filename"],
                "details": text[:100]  # Summary of extracted text
            })
    
    return activities

# Specify the directory containing screenshots
screenshot_directory = '/Users/michaeloboyle/Documents/GitHub/scrantenna/2024-09-10'

# Get list of all screenshot files from the directory
screenshot_files = get_screenshot_files(screenshot_directory)

# Process screenshots and generate log
activities = process_screenshots(screenshot_files)

# Convert to dataframe for visualization
df = pd.DataFrame(activities)

# Display the dataframe
print(df)

# Optionally, save dataframe to a CSV file for further use
df.to_csv('activity_log.csv', index=False)

# Save the log as a JSON file
save_activity_log(activities, 'activity_log.json')