This notebook is just to transform the CalMS21 dataset into .pkl files which will be used by 'Creating_Inputs_for_models.ipynb' file

In [None]:
import os
import json
import numpy as np
import pandas as pd

In [None]:
# @title Download and unzip the data
import os, requests, zipfile

fname = 'task1_classic_classification.zip'
url = "https://data.caltech.edu/records/s0vdx-0k302/files/task1_classic_classification.zip?download=1"

if not os.path.isfile(fname):
  try:
    r = requests.get(url)
  except requests.ConnectionError:
    print("!!! Failed to download data !!!")
  else:
    if r.status_code != requests.codes.ok:
      print("!!! Failed to download data !!!")
    else:
      with open(fname, "wb") as fid:
        fid.write(r.content)
else:
  print('Data have already been downloaded!!!')

if not os.path.exists('task1_classic_classification'):
  # Unzip the file
  with zipfile.ZipFile(fname, 'r') as zip_ref:
    zip_ref.extractall('.')


# Download the script
fname = 'calms21_convert_to_npy.py'
url = "https://data.caltech.edu/records/s0vdx-0k302/files/calms21_convert_to_npy.py?download=1"

if not os.path.isfile(fname):
  try:
    r = requests.get(url)
  except requests.ConnectionError:
    print("!!! Failed to download data !!!")
  else:
    if r.status_code != requests.codes.ok:
      print("!!! Failed to download data !!!")
    else:
      with open(fname, "wb") as fid:
        fid.write(r.content)

In [None]:
!python calms21_convert_to_npy.py  --input_directory '.' --output_directory '.'

Saving ./calms21_task1_train
Saving ./calms21_task1_test


In [None]:
def load_task1_data(data_path):
  """
  Load data for task 1:
      The vocaubulary tells you how to map behavior names to class ids;
      it is the same for all sequences in this dataset.
  """
  data_dict = np.load(data_path, allow_pickle=True).item()
  dataset = data_dict['annotator-id_0']
  # Get any sequence key.
  sequence_id = list(data_dict['annotator-id_0'].keys())[0]
  vocabulary = data_dict['annotator-id_0'][sequence_id]['metadata']['vocab']
  return dataset, vocabulary

In [None]:
training_data, vocab = load_task1_data('./calms21_task1_train.npy')
test_data, _ = load_task1_data('./calms21_task1_test.npy')

In [None]:
## Creating a Dictionary that maps recordings to arrays of shape (num_frames, num_keypoints, num_dimensions) for resident and intruder mouse
## This will be the 'coordinates' input for the keypoint-moseq model

coordinates_resident = {}
confidences_resident = {}
coordinates_intruder = {}
confidences_intruder = {}
annotations = {}

for i in training_data.keys():

  coord_data = training_data[i]['keypoints']
  conf_data = training_data[i]['scores']

  reshaped_coord_data_res = np.transpose(coord_data[:,0,:,:], (0,2,1))
  reshaped_coord_data_intr = np.transpose(coord_data[:,1,:,:], (0,2,1))
  reshaped_conf_data_res = conf_data[:,0,:]
  reshaped_conf_data_intr = conf_data[:,1,:]

  coordinates_resident['m'+i[18:20]] = reshaped_coord_data_res
  coordinates_intruder['m'+i[18:20]] = reshaped_coord_data_intr
  confidences_resident['m'+i[18:20]] = reshaped_conf_data_res
  confidences_intruder['m'+i[18:20]] = reshaped_conf_data_intr
  annotations['m'+i[18:20]] = training_data[i]['annotations']


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
## Exporting the data files:
import pickle

folder_path = '/content/drive/MyDrive/NMA_Project/'

with open(folder_path+'coordinates_resident.pkl', 'wb') as file:
    pickle.dump(coordinates_resident, file)
with open(folder_path+'confidences_resident.pkl', 'wb') as file:
    pickle.dump(confidences_resident, file)
with open(folder_path+'coordinates_intruder.pkl', 'wb') as file:
    pickle.dump(coordinates_intruder, file)
with open(folder_path+'confidences_intruder.pkl', 'wb') as file:
    pickle.dump(confidences_intruder, file)
with open(folder_path+'annotations.pkl', 'wb') as file:
    pickle.dump(annotations, file)

print(f'Dictionary exported to {folder_path}')

Dictionary exported to /content/drive/MyDrive/NMA_Project/
