In [None]:
from pathlib import Path

from syftbox.server.analytics import parse_analytics_logs

# Load logs

In [None]:
logs_dir = Path("/Users/eelco/dev/syft/.server/data/logs/")

logs = parse_analytics_logs(logs_dir)

# Calculate metrics

In [None]:
from collections import defaultdict

latest_events_per_user = defaultdict(lambda: dict())

for log in logs:
    # /log_events are events from the client, we use the event_name instead of the endpoint name
    if log["endpoint"] == "/log_event":
        latest_events_per_user[log["email"]][log["event_name"]] = log
    latest_events_per_user[log["email"]][log["endpoint"]] = log
latest_events_per_user = dict(latest_events_per_user)

In [None]:
import pandas as pd


def create_user_metrics(latest_events_per_user):
    metrics = []

    for user, endpoints in latest_events_per_user.items():
        user_data = {"user": user}

        # Add a column for each endpoint with its last access date
        for endpoint_name, event in endpoints.items():
            user_data[endpoint_name] = event["timestamp"]

        # Calculate last_seen as the maximum date across all endpoints for this user
        last_seen_date = max(event["timestamp"] for event in endpoints.values())
        user_data["last_seen"] = last_seen_date

        # Calculate last_file_modification as the maximum date among specified endpoints
        modification_endpoints = ["/sync/create", "/sync/delete", "/sync/apply_diff"]
        last_file_modification_date = max(
            (event["timestamp"] for name, event in endpoints.items() if name in modification_endpoints), default=None
        )
        user_data["last_file_event"] = last_file_modification_date

        metrics.append(user_data)

    # Convert to DataFrame, setting user as the index
    df = pd.DataFrame(metrics).set_index("user")
    return df


user_metrics = create_user_metrics(latest_events_per_user)

In [None]:
from datetime import datetime, timedelta, timezone


def calculate_metrics_with_start_date(user_metrics, start_date):
    return {
        "number of users that registered": user_metrics["/register"].ge(start_date).sum(),
        "number of users that connected to server": user_metrics["last_seen"].ge(start_date).sum(),
        "number of users that created, modified, or deleted a file": user_metrics["last_file_event"]
        .ge(start_date)
        .sum(),
        "number of users that installed an app": user_metrics["app_install"].ge(start_date).sum(),
    }


def calculate_global_metrics(user_metrics):
    today = datetime.now(timezone.utc)
    one_day_ago = today - timedelta(days=1)
    one_week_ago = today - timedelta(weeks=1)
    one_month_ago = today - timedelta(days=30)

    metrics = {
        "1 day": calculate_metrics_with_start_date(user_metrics, one_day_ago),
        "1 week": calculate_metrics_with_start_date(user_metrics, one_week_ago),
        "1 month": calculate_metrics_with_start_date(user_metrics, one_month_ago),
    }

    return pd.DataFrame(metrics)


global_metrics = calculate_global_metrics(user_metrics)

# Display results

| Metric                  | Description                                                                |
|-------------------------|----------------------------------------------------------------------------|
| `/register`         | Timestamp of when the user registered.                                     |
| `app_install`           | Timestamp of most recent app install by the user.                       |
| `/log_event`      | Last client-side log event recorded for the user.                          |
| `last_seen`             | Timestamp of the user's most recent activity across all tracked events.    |
| `last_file_event`    | Most recent timestamp of any file-related activity (creation/modification/deletion). |
| `/sync/create`     | Timestamp of the last file created by the user.                            |
| `/sync/get_metadata`| Timestamp of the last time file metadata was accessed by the user.         |
| `/sync/apply_diff`    | Timestamp of the last file modification by the user                   |
| `/sync/delete`     | Timestamp of the last file deletion by the user.                           |

In [None]:
user_metrics

In [None]:
global_metrics