In [1]:
import os
import requests
import json
import base64
import time
from glob import glob

def encode_image_to_base64(image_path):
    """Encode image to base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def analyze_image_with_llava(image_path, prompt, model="llava"):
    """Send image to Ollama LLaVa model and get inference"""
    # Encode image to base64
    base64_image = encode_image_to_base64(image_path)
    
    # Prepare the API request
    API_URL = "http://localhost:11434/api/generate"
    
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "images": [base64_image]
    }
    
    # Send request to Ollama API
    try:
        response = requests.post(API_URL, json=payload)
        response.raise_for_status()
        result = response.json()
        return result.get('response', "No response generated")
    except requests.exceptions.RequestException as e:
        return f"Error connecting to Ollama API: {str(e)}"

def main():
    # Directories containing our visualization images
    directories = ['tab_switch_visuals', 'keystroke_visuals']
    
    # Output file for inferences
    output_file = "graph_inferences.md"
    
    with open(output_file, "w") as f:
        f.write("# AI-Generated Inferences for Data Visualizations\n\n")
        
        # Process each directory
        for directory in directories:
            if not os.path.exists(directory):
                print(f"Directory {directory} not found. Skipping.")
                continue
                
            f.write(f"## {directory.replace('_', ' ').title()}\n\n")
            
            # Get all PNG files in the directory
            image_files = glob(f"{directory}/*.png")
            
            for image_path in image_files:
                image_name = os.path.basename(image_path).replace('.png', '')
                display_name = image_name.replace('_', ' ').title()
                
                print(f"Analyzing {display_name}...")
                
                # Create a specific prompt for each visualization type
                if "heatmap" in image_name:
                    prompt = "This is a heatmap visualization of user activity data. Please analyze the patterns visible in this heatmap. What time periods show the highest activity? Are there any notable patterns across users or time periods? What insights can you provide about user behavior based on this visualization?"
                elif "distribution" in image_name:
                    prompt = "This visualization shows the distribution of keys pressed. Please analyze which keys are most frequently used and what this might indicate about user typing patterns or behavior. Are certain keys dominating the distribution? What might this suggest about the nature of the input?"
                elif "timeline" in image_name:
                    prompt = "This timeline visualization shows activity over time. What patterns are visible in the timeline? Are there periods of high activity or inactivity? Can you identify any rhythms, clusters, or unusual patterns in the data? What might these patterns suggest about user behavior?"
                elif "speed" in image_name:
                    prompt = "This graph shows typing speed over time. Please analyze the variation in typing speed. Are there periods of faster or slower typing? Is the typing speed consistent or highly variable? What might the typing speed patterns suggest about the user's behavior or state?"
                elif "rhythm" in image_name:
                    prompt = "This visualization shows keystroke rhythm patterns. What can you infer about the typing rhythm? Are keystrokes evenly spaced or clustered? Are there patterns that might indicate copying and pasting versus natural typing? What conclusions might be drawn from the rhythm patterns shown?"
                elif "engagement" in image_name:
                    prompt = "This shows user engagement metrics. Please analyze the levels of engagement across different users. Which users appear most engaged? Are there patterns in the types of engagement shown? What conclusions might you draw about different user behaviors?"
                elif "intervals" in image_name:
                    prompt = "This graph shows the distribution of time intervals between keystrokes. What does this distribution tell us about typing patterns? Are the intervals consistent or highly variable? Are there outliers that might indicate pauses or breaks in typing? What might this suggest about the nature of the typing activity?"
                elif "polar" in image_name:
                    prompt = "This is a polar chart showing activity distribution by hour of day. Which hours show the highest activity? Are there clear patterns in the daily cycle? What might these patterns suggest about when users are most active or the nature of their activity?"
                else:
                    prompt = "Please analyze this data visualization and provide insights about the patterns, trends, and anomalies you observe. What are the key takeaways from this visualization? What does it reveal about user behavior or system activity?"
                
                # Add general guidance to the prompt
                prompt += " If you notice any potential indicators of unusual patterns or anomalies that might warrant further investigation, please highlight those as well."
                
                # Get inference from LLaVa
                inference = analyze_image_with_llava(image_path, prompt)
                
                # Write to output file
                f.write(f"### {display_name}\n\n")
                f.write(f"![{display_name}]({image_path})\n\n")
                f.write("**AI Analysis:**\n\n")
                f.write(f"{inference}\n\n")
                
                # Avoid rate limiting
                time.sleep(2)
    
    print(f"Analysis complete! Results saved to {output_file}")

if __name__ == "__main__":
    main()

Analyzing Activity Timeline...
Analyzing Hourly Polar Chart...
Analyzing Key Distribution...
Analysis complete! Results saved to graph_inferences.md
