In [None]:
import os
import pandas as pd
import numpy as np
import json
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.dates import DateFormatter
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns

def plot_with_background(ax, alpha = .5):
	"""
	Creates a plot with a background image.

	Parameters:
	- ax: The matplotlib axis object containing the plot.
	"""
	# Path to your image file
	image_path = '../data-raw/background/image3195.png'
	
	# Coordinates for the image placement
	image_extent = (0, 51, -0.02, 14.214)

	# Load the background image
	img = mpimg.imread(image_path)
	
	# If an extent is provided, use it to correctly scale and position the image
	if image_extent:
		ax.imshow(img, aspect='auto', extent=image_extent, zorder=-1, alpha = alpha)
	else:
		ax.imshow(img, aspect='auto', zorder=-1, alpha = alpha)
	
	return ax

## Data

In [None]:
# unlinked tracks
test_unlinked = pd.read_csv('../data-clean/tracking/unlinked/2024-06-20.csv')
n_unlink = test_unlinked['track_id'].nunique()

# linked tracks
mapping = pd.read_csv('../data-clean/tracking/linked/2024-06-20.csv')
test_linked = test_unlinked.merge(mapping, left_on='track_id', right_on='raw_track_id', how='left')
test_linked['track_id'] = test_linked['track_id_y'].combine_first(test_linked['track_id_x'])
test_linked = test_linked.drop(columns=['track_id_x', 'track_id_y', 'raw_track_id'])

# matched ids
matches = pd.read_csv('../data-clean/tracking/matched/2024-06-20.csv')
matches.head(30)

### No. of tracks in TB area

In [None]:
def calculate_tb_time(df, min_time=5, max_time=float('inf')):
    """
    Calculate the total time spent in the TB area per track_id, along with start and end times.

    Parameters:
    df (pd.DataFrame): DataFrame containing columns 'track_id' (int), 'time' (int, unix timestamp in ms), and 'in_tb_pat' (bool).
    min_time (int): Minimum time spent in the TB area in minutes for filtering. Default is 5 minutes.
    max_time (float): Maximum time spent in the TB area in minutes for filtering. Default is infinity.

    Returns:
    pd.DataFrame: DataFrame with columns 'track_id', 'total_time_in_tb', 'start_time', and 'end_time'.
    """
    # Group by track_id
    grouped = df.groupby('track_id')

    # Initialize lists to store results
    results = []

    for track_id, group in grouped:
        # Filter the group to include only rows where in_tb_pat is True
        tb_pat_group = group[group['in_tb_pat']]

        if not tb_pat_group.empty:
            # Calculate total time spent in TB area
            tb_pat_time = group['in_tb_pat'].sum() / 60
            
			# Calculate total time spent in TB staff area:
            tb_staff_time = group['in_tb_cs'].sum() / 60

            # Check if total time is within the specified range
            if (min_time <= tb_pat_time <= max_time) & (tb_staff_time < 1):
                start_time = tb_pat_group['time'].min()
                end_time = tb_pat_group['time'].max()

                # Append results
                results.append({
                    'track_id': track_id,
                    'total_time_in_tb': tb_pat_time,
                    'total_time_in_tb_staff': tb_staff_time,
                    'start_time': pd.to_datetime(start_time, unit='ms').strftime('%H:%M:%S'),
                    'end_time': pd.to_datetime(end_time, unit='ms').strftime('%H:%M:%S')
                })
    
    # Convert results to DataFrame
    result_df = pd.DataFrame(results).sort_values('start_time')

    return result_df

tracks_in_tb_pat = calculate_tb_time(test_linked, 5)
tracks_in_tb_pat

In [None]:
def plot_selected_tracks(track_df, selected_id):
    """
    Plots the tracks of selected track IDs from the given DataFrame.

    Parameters:
    track_df (pd.DataFrame): DataFrame containing columns 'time', 'track_id', 'position_x', 'position_y'.
    selected_id (list of int): List of track IDs to be plotted.

    Returns:
    None
    """
    # Create a subplot with the specified size
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Add the background image to the subplot
    ax = plot_with_background(ax, 1)

    # Filter the DataFrame to include only the selected track IDs
    filtered_df = track_df[track_df['track_id'].isin(selected_id)]

    # Plot each track_id separately
    for track_id in selected_id:
        track_data = filtered_df[filtered_df['track_id'] == track_id]
        
        # Plot the line segments
        ax.plot(track_data['position_x'], track_data['position_y'], label=f'Track {track_id}')
        
        # Plot the first point as an open circle
        ax.scatter(track_data['position_x'].iloc[0], track_data['position_y'].iloc[0], 
                   edgecolor='black', facecolor='none', s=50, label=f'Start {track_id}')
        
        # Plot the last point as a cross
        ax.scatter(track_data['position_x'].iloc[-1], track_data['position_y'].iloc[-1], 
                   color='black', marker='x', s=50, label=f'End {track_id}')
        
        # Plot the intermediate points as closed circles
        if len(track_data) > 2:
            ax.scatter(track_data['position_x'].iloc[1:-1], track_data['position_y'].iloc[1:-1], 
                       color='black', s=10)

    # Add labels and legend
    ax.set_xlabel('Position X')
    ax.set_ylabel('Position Y')
    ax.set_title('Selected Tracks')
    ax.legend()
    plt.show()

for track_id in tracks_in_tb_pat['track_id']:
		plot_selected_tracks(test_linked, [track_id])

## Evaluate matching

### Plot matched tracks

In [None]:
for track_id in matches['track_id']:
	if track_id > 0:
		plot_selected_tracks(test_linked, [track_id])

In [None]:
def plot_tracks_in_tb_pat(track_df, start_time, end_time):
    """
    Plots the tracks of track IDs that were in_tb_pat between start_time and end_time.

    Parameters:
    track_df (pd.DataFrame): DataFrame containing columns 'time', 'track_id', 'position_x', 'position_y', 'in_tb_pat'.
    start_time (int): Start time for filtering the tracks.
    end_time (int): End time for filtering the tracks.

    Returns:
    None
    """
    # Filter the DataFrame to include only the track IDs that were in_tb_pat between start_time and end_time
    filtered_df = track_df[(track_df['time'] >= start_time) & (track_df['time'] <= end_time) & (track_df['in_tb_pat'])]
    if filtered_df.empty:
        print('No tracks found in TB Pat between the specified times. Extending TB Pat boundaries.')
        filtered_df = track_df[(track_df['time'] >= start_time) & (track_df['time'] <= end_time) & (track_df['position_x'] >= 8) & (track_df['position_x'] <= 11) & (track_df['position_y'] >= 2.5) & (track_df['position_y'] <= 5)]
    selected_id = filtered_df['track_id'].unique()

    fig, ax = plt.subplots(figsize=(10, 6))
    ax = plot_with_background(ax, 1)

    # Plot each track_id separately with different colors
    for track_id in selected_id:
        track_data = track_df[track_df['track_id'] == track_id]
        
        # Plot the line segments
        ax.plot(track_data['position_x'], track_data['position_y'], label=f'Track {track_id}')
        
        # Plot the first point as an open circle
        ax.scatter(track_data['position_x'].iloc[0], track_data['position_y'].iloc[0], 
                    edgecolor='black', facecolor='none', s=50, label=f'Start {track_id}')
        
        # Plot the last point as a cross
        ax.scatter(track_data['position_x'].iloc[-1], track_data['position_y'].iloc[-1], 
                    color='black', marker='x', s=50, label=f'End {track_id}')
        
        # Plot the intermediate points as closed circles
        if len(track_data) > 2:
            ax.scatter(track_data['position_x'].iloc[1:-1], track_data['position_y'].iloc[1:-1], 
                        color='black', s=10)

    # Add labels and legend
    ax.set_xlabel('Position X')
    ax.set_ylabel('Position Y')
    ax.set_title('Tracks in TB Pat between Specified Times')
    ax.legend()
    plt.show()

for i in matches.index:
	if matches['track_id'][i] == -1:
		plot_tracks_in_tb_pat(test_unlinked, matches['start_time'][i], matches['completion_time'][i])