In [None]:
import os
import shutil
import math
import numpy as np
import pandas as pd
import json

In [None]:
#adds new types of ids
#assigns new ids starting from 0 to both annotations and images in val.json and train.json respectively
def transformIds(annotations_df, images_df):

  annotations_df_new = annotations_df.copy()
  images_df_new = images_df.copy()

  newImageIds = np.array(list(range(images_df['id'].size)))
  old_ids = images_df['id'].to_numpy()
  imgeIdsChangeMap = dict([(old_ids[index], index) for index in list(range(images_df['id'].size))])

  newAnnotationIds = np.array(list(range(annotations_df['id'].size)))

  images_df_new['id'] = newImageIds
  images_df_new['frame_id'] = images_df_new['id'].apply(lambda id : id % 150)
  images_df_new['first_frame_image_id'] = images_df_new['id'].apply(lambda id: ((id // 150) * 150))

  annotations_df_new['id'] = newAnnotationIds

  annotations_df_new['image_id'] = annotations_df_new['image_id'].apply(lambda id : imgeIdsChangeMap[id])

  return annotations_df_new, images_df_new

In [None]:
#returns images and and annotations refering a list of image_ids
def getPartialAnnotations(annotations, images, sequenceSample):
  image_ids = np.array([])
  for sequence in sequenceSample:
    try:
      sequence_id = int(sequence)
      image_ids = np.concatenate((image_ids, np.array(list(range((sequence_id - 1)*150 + 1, sequence_id * 150 + 1)))))
    except ValueError:
      print(f'the dir name {sequence} couldnt be cast as int')
  image_ids_set = set(image_ids)
  filtered_annotations = annotations[annotations['image_id'].isin(image_ids_set)]
  filtered_images = images[images['id'].isin(image_ids_set)]
  return filtered_annotations, filtered_images

In [None]:
#copies images from a set of sequences into a new folder
def copyPartialSequence(subdirectoryPath, sequenceSample, dsPath):
  for sequence in sequenceSample:
    for imageName in os.listdir(os.path.join(dsPath, sequence)):
      shutil.copy(os.path.join(dsPath, sequence, imageName), os.path.join(subdirectoryPath, imageName))

In [None]:
#receives bbox as relative values for x_center, y_center, wdth, heigh
#returns bbox as absolute values for x_left, y_top, width, height
def adjustBoundingBox(bbox):
  x , y, w ,h = [value * 1024 for value in bbox]
  return [x - (w/2), y - (h/2), w, h]

In [None]:
#receives dataset as formatted in this project
#returns dataset formatted as documented in https://github.com/timmeinhardt/trackformer/blob/main/docs/TRAIN.md
def createCOCODataset(dataset_path, annotations_path, output_dir_path):

  if(not os.path.exists(output_dir_path)):
    os.mkdir(output_dir_path)

  with open(annotations_path, 'r') as file:
    annotationsDict = json.load(file)


  images_df = pd.DataFrame(annotationsDict['images'])
  images_df['file_name'] = images_df['file_name'].apply(lambda name : name.split('/')[-1])
  images_df['seq_length'] = 150

  annotations_df = pd.DataFrame(annotationsDict['annotations'])

  annotations_df = annotations_df.rename(columns={'label': 'track_id'})
  annotations_df = annotations_df.sort_values(by='track_id')
  annotations_df['bbox'] = annotations_df['bbox'].apply(lambda bbox : adjustBoundingBox(bbox))

  annotationsDict['annotations'] = annotations_df.to_dict(orient='records')
  annotationsDict['images'] = images_df.to_dict(orient='records')

  sequences = np.array(os.listdir(dataset_path))

  train_proportion = 0.8
  num_samples = int(len(sequences) * train_proportion)

  train_sequences = sequences[np.random.choice(len(sequences), size=num_samples, replace=False)]

  val_sequences = sequences[~np.isin(sequences, train_sequences)]

  train_ds_path = os.path.join(output_dir_path, 'train')

  if(not os.path.exists(train_ds_path)):
    os.mkdir(train_ds_path)

  val_ds_path = os.path.join(output_dir_path, 'val')

  if(not os.path.exists(val_ds_path)):
    os.mkdir(val_ds_path)


  copyPartialSequence(train_ds_path, train_sequences, dataset_path)

  copyPartialSequence(val_ds_path, val_sequences, dataset_path)

  annotations_dir_path = os.path.join(output_dir_path, 'annotations')

  if(not os.path.exists(annotations_dir_path)):
    os.mkdir(annotations_dir_path)


  train_annotations_df, train_images_df = getPartialAnnotations(annotations_df, images_df, train_sequences)
  val_annotations_df, val_images_df = getPartialAnnotations(annotations_df, images_df, val_sequences)

  train_annotations_df, train_images_df = transformIds(train_annotations_df, train_images_df)
  val_annotations_df, val_images_df = transformIds(val_annotations_df, val_images_df)

  del annotationsDict['info']

  val_annotations_dict = annotationsDict.copy()
  train_annotations_dict = annotationsDict.copy()

  val_annotations_dict['annotations'] = val_annotations_df.to_dict(orient='records')
  val_annotations_dict['images'] = val_images_df.to_dict(orient='records')
  train_annotations_dict['annotations'] = train_annotations_df.to_dict(orient='records')
  train_annotations_dict['images'] = train_images_df.to_dict(orient='records')

  train_annotations_dict['sequences'] = np.sort(train_sequences).tolist()
  train_annotations_dict['frame_range'] = {"start": 0.0,
                                          "end": 1.0}
  train_annotations_dict['type'] = 'instances'


  val_annotations_dict['sequences'] = np.sort(val_sequences).tolist()
  val_annotations_dict['frame_range'] = {"start": 0.0,
                                         "end": 1.0}
  val_annotations_dict['type'] = 'instances'

  train_annotations_filename = "train.json"
  with open(os.path.join(annotations_dir_path, train_annotations_filename), 'w') as outfile:
    json.dump(train_annotations_dict, outfile, indent=4)

  val_annotations_filename = "val.json"
  with open(os.path.join(annotations_dir_path, val_annotations_filename), 'w') as outfile:
    json.dump(val_annotations_dict, outfile, indent=4)

# Additional Steps
since the Documentation in https://github.com/timmeinhardt/trackformer/blob/main/docs/TRAIN.md is not complete it is necessary to also include the sequences used for training/validation in seperate directories in the dataset and provide the original annotations json. These adjustments were made manually.