## Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import json
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.dates import DateFormatter
from matplotlib.patches import Polygon as matPolygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import LinearSegmentedColormap
import seaborn as sns
from shapely.geometry import Point, Polygon

## Clinic with main areas

In [None]:
def plot_with_background(ax, alpha = .5):
	"""
	Creates a plot with a background image.

	Parameters:
	- ax: The matplotlib axis object containing the plot.
	"""
	# Path to your image file
	image_path = '../data-raw/background/image3195.png'
	
	# Coordinates for the image placement
	image_extent = (0, 51, -0.02, 14.214)

	# Load the background image
	img = mpimg.imread(image_path)
	
	# If an extent is provided, use it to correctly scale and position the image
	if image_extent:
		ax.imshow(img, aspect='auto', extent=image_extent, zorder=-1, alpha = alpha)
	else:
		ax.imshow(img, aspect='auto', zorder=-1, alpha = alpha)
	
	return ax

# Add geomtries
with open('../data-raw/background/config.json') as f:
	data = json.load(f)
geometries = []

# Entries and exits  
exit_top = [[0.0, 12.0], [8.0, 12.0], [8.0, 14.214], [0.0, 14.214]]
exit_top = {
	'geometry': exit_top,
	'type': 'ZONE',
	'name': 'Main exit'
}
geometries.append(exit_top)
exit_left = [[0.0, 5.5], [3.0, 5.5], [3.0, 12.0], [0.0, 12.0]]
exit_left = {
	'geometry': exit_left,
	'type': 'ZONE',
	'name': 'West exit'
}
geometries.append(exit_left)
exit_right = [[47.0, 5.5], [51.0, 5.5], [51.0, 9.75], [47.0, 9.75]]
exit_right = {
	'geometry': exit_right,
	'type': 'ZONE',
	'name': 'East exit'
}
geometries.append(exit_right)

# Care rooms
care_rooms = [[8.0, 8.25], [47.0, 8.25], [47.0, 9.75], [8.0, 9.75]]
care_rooms = {
	'geometry': care_rooms,
	'type': 'ZONE',
	'name': 'Care rooms'
}
geometries.append(care_rooms)

# TB survey area
tb_area = [[9.8, 2.7], [10.9, 2.7], [10.9, 3.4], [9.8, 3.4]]
tb_area = {
	'geometry': tb_area,
	'type': 'ZONE',
	'name': 'TB chair'
}
geometries.append(tb_area)

# Sputum area
sputum_area = [[43.5, 0], [47.5, 0], [47.5, 2.0], [43.5, 2.0]]
sputum_area = {
	'geometry': sputum_area,
	'type': 'ZONE',
	'name': 'Sputum delivery'
}
geometries.append(sputum_area)


def plot_with_background_geom(ax, geometries):
	"""
	Plots the geometries on a background image.

	Parameters:
	- ax: The matplotlib axes object where the plot will be drawn.
	- geometries: A list of geometry dictionaries, each containing 'geometry', 'type', and 'name' keys.

	Returns:
	- The modified axes object with the geometries plotted.
	"""
	# Plot the background image first
	ax = plot_with_background(ax, 1)

	# Color cycle for different polygons
	colors = plt.cm.viridis(np.linspace(0, 1, len(geometries)))

	for i, geometry in enumerate(geometries):
		# Extract the coordinates directly from the 'geometry' key
		coords = geometry['geometry']
		
		# Check if the geometry is a LINE or a ZONE to decide on closure
		if geometry['type'] == 'ZONE':
			closed = True
		else:  # For 'LINE', do not close the polygon
			closed = False
		
		# Create a polygon or line from the coordinates
		polygon = matPolygon(xy=coords, closed=closed, color=colors[i], label=geometry['name'], alpha=0.5)
		
		# Add the polygon or line to the plot
		ax.add_patch(polygon)
		
		# Label the polygon or line with its name
		# Use the first vertex for the label position
		ax.annotate(geometry['name'], xy=coords[0], color='white', weight='bold')

	# Adjust the legend to be below the plot
	ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), shadow=True, ncol=2)

	return ax

fig, ax = plt.subplots(figsize=(16, 12))
ax = plot_with_background_geom(ax, geometries)
plt.show()

## Check number and order

In [None]:
# Load clinical 
tb_cases = pd.read_csv('../data-clean/clinical/tb_cases.csv')
tb_cases['start_time'] = pd.to_datetime(tb_cases['start_time'])
tb_cases['date'] = tb_cases['start_time'].dt.strftime('%Y-%m-%d')

# List all files in the directory
files = os.listdir('../data-clean/tracking/linked-tb/')

# Filter files that match the structure data.csv
filtered_files = [f for f in files if f.endswith('.csv')]

# Extract dates from the filenames
dates = [f.replace('.csv', '') for f in filtered_files]

# Create a dataframe with column dates
date_df = pd.DataFrame({'date': dates})

# For each date, get the number of rows in tb_cases
date_df['num_cases'] = date_df['date'].apply(lambda x: tb_cases[tb_cases['date'].str.startswith(x)].shape[0])

# For each date, load the corresponding file and filter rows with category == "sure"
date_df['tb_tracks'] = date_df['date'].apply(lambda x: pd.read_csv(f'../data-clean/tracking/linked-tb/{x}.csv').query('category == "sure"')['new_track_id'].nunique())
# Check if all integers from 1 to num_cases are present in the order for each date
def check_order(date, num_cases):
    df = pd.read_csv(f'../data-clean/tracking/linked-tb/{date}.csv')
    df_sure = df.query('category == "sure"')
    track_order = df_sure.drop_duplicates('new_track_id')['order'].tolist()
    track_order = set(track_order)
    case_order = set(map(str, range(1, num_cases + 1)))
    missing_integers = case_order - track_order
    if not missing_integers:
        return '0'
    else:
        return missing_integers

date_df['missing_orders'] = date_df.apply(lambda row: check_order(row['date'], row['num_cases']), axis=1)

# Filter date_df for rows where num_cases is not equal to tb_tracks or missing_orders is not '0'
filtered_date_df = date_df[(date_df['num_cases'] != date_df['tb_tracks']) | (date_df['missing_orders'] != '0')]
print(filtered_date_df)

In [None]:
def load_data(date):
    dat = pd.read_csv(os.path.join('../data-clean/tracking/unlinked/', f"{date}.csv"))
    dat['time'] = pd.to_datetime(dat['time'], unit='ms')
    return dat

def subset_tb(dat):
    sub_dat = dat.groupby('track_id').filter(lambda x: x['in_check_tb'].any() and x['in_sputum'].any())
    return sub_dat
    

# dates = [f for f in os.listdir('../data-clean/tracking/unlinked/') if f.endswith('.csv')]
dates = ['2024-06-20']
dat_list = []
for date in dates:
    dat = load_data(date)
    sub_dat = subset_tb(dat)
    dat_list.append(sub_dat)

In [None]:
len(dat_list[0]['track_id'].unique())

In [None]:
# Function to compute total time in a state
def compute_total_time(group, column = None):
    group = group.sort_values(by='time')    
    time_diffs = group['time'].diff().fillna(pd.Timedelta(seconds=0))
    if column is not None:
        total_time = time_diffs[group[column]].sum()
    else:
        total_time = time_diffs.sum()
    return total_time

# Function to format timedelta as minutes and seconds
def format_timedelta(td):
    total_seconds = int(td.total_seconds())
    minutes, seconds = divmod(total_seconds, 60)
    return f"{minutes}m {seconds}s"

# Function to compute and print total times
def compute_and_print_total_times(df_tb):
    # Subset df_tb for track_id not starting with in_tb_pat
    df_tb = df_tb.groupby('track_id').filter(lambda x: not x.iloc[0]['in_tb_pat'])

    # Group by track_id and compute total times
    results = []
    for track_id, group in df_tb.groupby('track_id'):
        tb_time = compute_total_time(group, 'in_check_tb')
        total_time = compute_total_time(group)
        results.append({
            'track_id': track_id,
            'tb_time': format_timedelta(tb_time),
            'total_time': format_timedelta(total_time)
        })

    # Convert results to DataFrame and print
    results_df = pd.DataFrame(results)
    return(results_df)

# Convert results to DataFrame and print
time_in_tb = compute_and_print_total_times(dat_list[0])
print(time_in_tb)

In [None]:
# Define the plot_track function
def plot_track(df, ax):
    # Plot the track with small dots
    ax.plot(df['position_x'], df['position_y'], marker='o', markersize=1, linestyle='-', color='blue')
    
    # Highlight the first and last points with bigger dots
    ax.plot(df['position_x'].iloc[0], df['position_y'].iloc[0], marker='o', markersize=3, color='red')
    ax.plot(df['position_x'].iloc[-1], df['position_y'].iloc[-1], marker='o', markersize=3, color='green')
    
    ax.set_title(f"Track ID: {df['track_id'].iloc[0]}")
    ax.set_xlabel('Position X')
    ax.set_ylabel('Position Y')

# Get unique track_ids
unique_track_ids = dat_list[0]['track_id'].unique()

# Create a figure with subplots
fig, axs = plt.subplots(len(unique_track_ids), 1, figsize=(8, len(unique_track_ids) * 6))

# Loop through each unique track_id and add the track to the corresponding subplot
for i, track_id in enumerate(unique_track_ids):
    track_df = dat_list[0][dat_list[0]['track_id'] == track_id]
    ax = axs[i] if len(unique_track_ids) > 1 else axs  # Handle case with a single subplot
    plot_with_background_geom(ax, geometries)  # Apply the background to each subplot
    plot_track(track_df, ax)

plt.tight_layout()
plt.show()

In [None]:
unique_track_ids = dat_list[0]['track_id'].unique()

for track_id in unique_track_ids:
    track_df = dat_list[0][dat_list[0]['track_id'] == track_id]
    
    # Calculate moving averages
    track_df['MA_5'] = track_df['person_height'].rolling(window=5).mean()
    track_df['MA_10'] = track_df['person_height'].rolling(window=10).mean()
    track_df['MA_30'] = track_df['person_height'].rolling(window=30).mean()
    track_df['MA_60'] = track_df['person_height'].rolling(window=60).mean()
    
    # Compute maximum values of moving averages
    max_ma_5 = track_df['MA_5'].max()
    max_ma_10 = track_df['MA_10'].max()
    max_ma_30 = track_df['MA_30'].max()
    max_ma_60 = track_df['MA_60'].max()
    
    # Compute overall maximum of person_height
    max_person_height = track_df['person_height'].max()
    
    plt.figure()
    plt.plot(track_df['time'], track_df['person_height'], marker='o', label=f'Person Height ({max_person_height:.2f})')
    plt.plot(track_df['time'], track_df['MA_5'], color='red', label=f'MA 5 ({max_ma_5:.2f})')
    plt.plot(track_df['time'], track_df['MA_10'], color='green', label=f'MA 10 ({max_ma_10:.2f})')
    plt.plot(track_df['time'], track_df['MA_30'], color='blue', label=f'MA 30 ({max_ma_30:.2f})')
    plt.plot(track_df['time'], track_df['MA_60'], color='purple', label=f'MA 60 ({max_ma_60:.2f})')
    
    plt.title(f"Track ID: {track_id}")
    plt.xlabel('Time')
    plt.ylabel('Person Height')
    plt.legend()
    plt.grid(True)
    plt.show()