## Load and preprocess data using `movement`
## Imports

In [1]:
from pathlib import Path
from matplotlib import pyplot as plt

from movement.io import load_poses

# Use function from utils.py, located within the same directory
from utils import reshape_loaded_ds

## Define and create paths
First let's find the data folder on the current machine and check its contents.

In [7]:
# Exchange the path to the data folder with the correct one on your system
data_folder = Path("/mnt/Data/in2research2024")
assert data_folder.exists()  # Will raise an error if the path does not exist
print(f"Data will be loaded from {data_folder}")

# The following mouse IDs must be present as subfolders in the data folder
mouse_ids = ["SB019", "SB021"]
for mouse_id in mouse_ids:
    assert (data_folder / mouse_id).exists()
    print(f"Subfolder {mouse_id} has been found")

Data will be loaded from /mnt/Data/in2research2024
Subfolder SB019 has been found
Subfolder SB021 has been found


Now let's create subfolders for saving plots and reports.

In [8]:
# Create folders for plots and reports
plot_folder = data_folder / "plots"
plot_folder.mkdir(exist_ok=True)
print(f"Plots will be saved in {plot_folder}")

report_folder = data_folder / "reports"
report_folder.mkdir(exist_ok=True)
print(f"Reports will be saved in {report_folder}")

Plots will be saved in /mnt/Data/in2research2024/plots
Reports will be saved in /mnt/Data/in2research2024/reports


## Load data into a `movement` dataset
Select a single file containing predicted pose tracks.

In [10]:
mouse_id = "SB019"
file_name = "220719_SB019_FM001_female1_2022-07-19-164002DLC_resnet50_shanice_allNov29shuffle1_196000_filtered.csv"
file_path = data_folder / mouse_id / file_name
print(f"Will load data from {file_path}")

Will load data from /mnt/Data/in2research2024/SB019/220719_SB019_FM001_female1_2022-07-19-164002DLC_resnet50_shanice_allNov29shuffle1_196000_filtered.csv


Load data with `movement` and reshape it to the desired format.

Note that tracking was performed with single-animal DeepLabCut models,
despite the fact that the data contains two animals ("individuals"): "resident" and "intruder".
The keypoint names are prefixed with the animal name, e.g. "resident_nose" and "intruder_nose",
so we'll use that prefix to split the data into two individuals,
see `utils.py/reshape_loaded_ds` for the implementation.


In [16]:
ds = load_poses.from_dlc_file(file_path, fps=50)
print("Data has been loaded successfully.")

all_keypoints = ds.keypoints.values
keypoints_names = [
    kpt.split("_")[1] for kpt in all_keypoints if kpt.startswith("resident")
]
print(f"Keypoints found: {keypoints_names}")
individuals = ["resident", "intruder"]

ds_new = reshape_loaded_ds(ds,individuals, keypoints_names)
print(f"Data has been reshaped successfully into a dataset with two individuals: {individuals}")

Data has been loaded successfully.
Keypoints found: ['nose', 'leftear', 'rightear', 'butt', 'neck', 'lefthip', 'righthip', 'leftshoulder', 'rightshoulder', 'lowerback']
Data has been reshaped successfully into a dataset with two individuals: ['resident', 'intruder']


In [17]:
ds_new