In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

In [None]:
def check_tracks(df):
    """
    Analyzes a DataFrame with 'time' (unix timestamp in milliseconds) and 'track_id'.
    
    Returns a row containing:
    - Date (derived from the 'time' column, assuming the filename is not available here)
    - Number of unique track IDs
    - Minimum timestamp (start)
    - Maximum timestamp (end)
    - Number of missing minutes
    - List of missing minutes
    """
    # Convert 'time' from milliseconds to seconds
    df['time'] = df['time'] + 7200000  # Add 2 hours to convert to Cape Town timezone
    df['time'] = pd.to_datetime(df['time'], unit='ms')
    
    # Calculate metrics
    date = df['time'].dt.date.iloc[0]
    rows = df.shape[0]
    unique_tracks = df['track_id'].nunique()
    start_time = df['time'].min()
    end_time = df['time'].max()
    all_minutes = pd.date_range(start=start_time, end=end_time, freq='T')
    observed_minutes = df['time'].dt.floor('T').unique()
    missing_minutes = np.setdiff1d(all_minutes, observed_minutes)
    missing_minutes = [pd.Timestamp(minute).strftime('%H:%M') for minute in missing_minutes]
    missing_minutes_count = len(missing_minutes)
    start_time = start_time.strftime('%H:%M:%S')
    end_time = end_time.strftime('%H:%M:%S')
    
    return pd.Series([date, rows, unique_tracks, start_time, end_time, missing_minutes_count, missing_minutes],
                     index=['Date', 'Entries', 'Unique Tracks', 'Start Time', 'End Time', 'Missing Minutes Count', 'Missing Minutes'])

# Directory containing CSV files
directory = '../data-clean/tracking/unlinked/'

# Initialize a DataFrame to store results
results_df = pd.DataFrame(columns=['Date', 'Entries', 'Unique Tracks', 'Start Time', 'End Time', 'Missing Minutes Count', 'Missing Minutes'])

# Iterate over CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        results_row = check_tracks(df)
        results_df = pd.concat([results_df, results_row.to_frame().T], ignore_index=True)

# Print the final table
print(results_df)