# Inference

Inference for both challenge format and general

In [None]:
import torch 
import torch.nn.functional as F
import numpy as np
import os
import pandas as pd
from utils.padding import pad_batch, FEATURE_PADDING_VALUE, LABEL_PADDING_VALUE, pad_tensor
from utils.features import getFeatures
from models.models import ClassificationModel, RegressionModel
import statistics
from utils.postprocessing import combined_cps_k_focused
from tqdm import tqdm
from torch.utils.data import DataLoader, ConcatDataset
from pathlib import Path
from utils.timeseriesdataset import TimeSeriesDataset
import pickle 
import matplotlib.pyplot as plt 

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('The model is running on:', DEVICE) 

BATCH_SIZE = 32

AlphaModel = RegressionModel().to(DEVICE)
KModel = RegressionModel().to(DEVICE)
StateModel = ClassificationModel().to(DEVICE)

AlphaModel.load_state_dict(torch.load("models/optimal_weights/alpha_weights"))
KModel.load_state_dict(torch.load("models/optimal_weights/k_weights"))
StateModel.load_state_dict(torch.load("models/optimal_weights/state_weights"))

AlphaModel.eval()
KModel.eval()
StateModel.eval()

# General 

For plotting and extracting time series, etc.

In [None]:
PICKLE_FILE = "<data as .pkl dataloader class in torch>"

# Load the dataset
with open(PICKLE_FILE, "rb") as f:
    data = pickle.load(f)

SAVE_DIR, _ = os.path.split(PICKLE_FILE)
os.makedirs(SAVE_DIR, exist_ok=True)
print("Saving to dir:", SAVE_DIR)

concat_data = ConcatDataset(data)
dataloader = DataLoader(concat_data, batch_size=BATCH_SIZE, shuffle=False, collate_fn=pad_batch)
print("Test data: ", len(data), "DataLoader Sizes:", len(dataloader))

In [None]:
# Initialize empty lists for stacking
pred_a_list, pred_k_list, pred_state_list = [], [], []
gt_a_list, gt_k_list, gt_state_list = [], [], []

progress_bar = tqdm(total=len(dataloader), desc='Testing', position=0)

with torch.no_grad():
    for inputs, alpha_labels, k_labels, state_labels in dataloader:
        inputs = inputs.to(DEVICE)
        
        # Get predictions
        pred_alpha_per_track = AlphaModel(inputs).squeeze(-1)
        pred_k_per_track = KModel(inputs).squeeze(-1)
        pred_state_per_track = torch.argmax(StateModel(inputs), dim=-1)

        # Pad predictions before appending
        pred_a_list.append(pad_tensor(pred_alpha_per_track))
        pred_k_list.append(pad_tensor(pred_k_per_track))
        pred_state_list.append(pad_tensor(pred_state_per_track))

        # Append ground truth (do not pad ground truth)
        gt_a_list.append(alpha_labels)
        gt_k_list.append(k_labels)
        gt_state_list.append(state_labels)

        progress_bar.update()

progress_bar.close()

In [None]:
# Stack the lists into tensors
pred_a = torch.cat(pred_a_list, dim=0)
pred_k = torch.cat(pred_k_list, dim=0)
pred_state = torch.cat(pred_state_list, dim=0)

gt_a = torch.cat(gt_a_list, dim=0)
gt_k = torch.cat(gt_k_list, dim=0)
gt_state = torch.cat(gt_state_list, dim=0)

print("Predictions shape:", pred_k.shape)
print("Ground truth shape:", gt_k.shape)

# Convert to numpy and save
print("Converting to numpy and saving...")
np.save(os.path.join(SAVE_DIR, "pred_a.npy"), pred_a.cpu().numpy())
np.save(os.path.join(SAVE_DIR, "pred_k.npy"), pred_k.cpu().numpy())
np.save(os.path.join(SAVE_DIR, "pred_state.npy"), pred_state.cpu().numpy())
np.save(os.path.join(SAVE_DIR, "gt_a.npy"), gt_a.cpu().numpy())
np.save(os.path.join(SAVE_DIR, "gt_k.npy"), gt_k.cpu().numpy())
np.save(os.path.join(SAVE_DIR, "gt_state.npy"), gt_state.cpu().numpy())
print("Processing completed successfully.")


# For challenge format

Function to predict on inputs tracks

In [None]:
def getPredictions(df, max_size=200):

    """
    Created predictions on alpha, k, and state values for a given input series and detects changepoints.
    This function is specifically designed for the challenge.

    Args:
        df (pd.DataFrame): A pandas DataFrame containing the input series.
        max_size (int): The maximum size of the input series. Default

    Returns:
        final_predictions (list): A list containing the predicted alpha, k, and state values along with the changepoints.
    """

    features = np.nan_to_num(getFeatures(df["x"].values, df["y"].values), nan=0.0, posinf=0.0, neginf=0.0)
    features = torch.tensor(features, dtype=torch.float32, device=DEVICE).unsqueeze(0)
    length = features.size(1)

    if length < max_size:
        features = F.pad(features, (0, 0, 0, max_size - length), value=FEATURE_PADDING_VALUE)
    elif length > max_size:
        features = features[:, :max_size]
        print(f"Note that the input series is longer than the maximum size. The input series has been truncated to the first {max_size} values.")

    with torch.no_grad():
        # convert to numpy arrays for downstream analysis
        pred_alpha_list = AlphaModel(features).cpu().numpy().flatten().squeeze()[:length]
        pred_k_list = KModel(features).cpu().numpy().flatten().squeeze()[:length]
        states_log_probs = StateModel(features)
        pred_states_list = torch.argmax(states_log_probs, dim=-1).cpu().numpy().flatten().squeeze()[:length]

    merged_cps, _, _, pred_alpha_list, pred_k_list, pred_states_list = combined_cps_k_focused(pred_alpha_list, pred_k_list, pred_states_list)
    final_predictions = []
    merged_cps = [0] + merged_cps

    for i in range(len(merged_cps) - 1):
        
        start = merged_cps[i]
        end = merged_cps[i + 1]
        
        log_k_plus1 = np.mean(pred_k_list[start:end])
        final_alpha = np.mean(pred_alpha_list[start:end])
        final_state = statistics.mode(pred_states_list[start:end])

        final_k = 10**log_k_plus1 - 1     

        if final_k >  0.1 and final_state == 0:
            final_state = 2

        final_predictions.append(final_k)
        final_predictions.append(final_alpha)
        final_predictions.append(int(final_state))
        final_predictions.append(end)

    return final_predictions, pred_alpha_list, pred_k_list, pred_states_list

In [None]:
challenge_data_path = "<path to track 2>"

N_EXP = 9
N_FOVS = 30
track = 2

path_results = 'new_scoring/res/'
path_track = os.path.join(path_results, f'track_{track}/')

os.makedirs(path_results, exist_ok=True)
os.makedirs(path_track, exist_ok=True)

In [None]:
for exp in range(1, N_EXP + 1):
    
    path_exp = os.path.join(path_track, f'exp_{exp}/')
    os.makedirs(path_exp, exist_ok=True)
    
    for fov in range(N_FOVS):

        df = pd.read_csv(challenge_data_path+f'/exp_{exp}/trajs_fov_{fov}.csv')

        traj_idx = df.traj_idx.unique()
        
        submission_file = os.path.join(path_exp, f'fov_{fov}.txt')
        
        with open(submission_file, 'a') as f:
            for idx in traj_idx:
                
                sub_df = df[df.traj_idx == idx]  

                pred, _, _, _ =  getPredictions(sub_df)

                prediction_final = [idx.astype(int)] + pred
                formatted_numbers = ','.join(map(str, prediction_final))
                
                f.write(formatted_numbers + '\n')

Comparison to reference

In [None]:
from andi_datasets.utils_challenge import codalab_scoring

input_dir = "new_scoring"
output_dir = "new_scoring"

codalab_scoring(INPUT_DIR = input_dir, OUTPUT_DIR = output_dir)

Visualising tracks

In [None]:
challenge_data_path = "/home/haidiri/Desktop/AnDiChallenge2024/benchmark_andi_track_2/track_2"

N_EXP = 9
N_FOVS = 30
track = 2

gt_a, gt_k, gt_state = [], [], []
pred_a, pred_k, pred_state = [], [], []
x_coor, y_coor = [], []

for exp in [2]:    
    for fov in range(N_FOVS):
        print(fov, end="\r")
        df = pd.read_csv(challenge_data_path+f'/exp_{exp}/trajs_fov_{fov}.csv')
        traj_idx = df.traj_idx.unique()

        for idx in traj_idx:
            sub_df = df[df.traj_idx == idx]  
            _, pa, pk, ps =  getPredictions(sub_df)

            pred_a.append(pa)
            pred_k.append(pk)
            pred_state.append(ps)

            gt_a.append(sub_df["alpha"].values)
            gt_k.append(sub_df["D"].values)
            gt_state.append(sub_df["state"].values)

            x_coor.append(sub_df["x"])
            y_coor.append(sub_df["y"])
                

In [None]:
INDEX = 66

plt.plot(x_coor[INDEX], y_coor[INDEX])
plt.figure()

plt.scatter([i for i in range(len(pred_a[INDEX]))], pred_a[INDEX], color="red")
plt.scatter([i for i in range(len(gt_a[INDEX]))], gt_a[INDEX], color="blue")
plt.figure()

plt.scatter([i for i in range(len(pred_state[INDEX]))], pred_state[INDEX], color="red")
plt.scatter([i for i in range(len(gt_state[INDEX]))], gt_state[INDEX], color="blue")
plt.figure()

plt.scatter([i for i in range(len(pred_k[INDEX]))], pred_k[INDEX], color="red")
plt.scatter([i for i in range(len(gt_k[INDEX]))], gt_k[INDEX], color="blue")
print(np.unique(gt_k[INDEX]), np.unique(gt_a[INDEX]))
plt.show()

