## Step 1
Inserting all necessary packages needed for this step

In [None]:
import os
from utils import reshape_loaded_ds
from pathlib import Path
from matplotlib import pyplot as plt
from movement.io import load_poses , save_poses
from movement.utils.reports import report_nan_values


Defining the file path for the data

In [None]:
data_folder = Path(r"C:\Users\Yousuf\Desktop\Project\Data")

plot_folder_0 = data_folder / "plots_thresholded"
plot_folder_0.mkdir(exist_ok=True)

plot_folder_1 = data_folder / "plots_interpolated"
plot_folder_1.mkdir(exist_ok=True)

plot_folder_2 = data_folder / "plots_smoothed"
plot_folder_2.mkdir(exist_ok=True)

clean_data_folder = data_folder / "clean_data"
clean_data_folder.mkdir(exist_ok=True)

report_folder = data_folder / "reports"
report_folder.mkdir(exist_ok=True)

In [None]:
file_name = "220719_SB019_FM001_male2_2022-07-19-172457DLC_resnet50_shanice_allNov29shuffle1_196000_filtered.csv"
file_path = data_folder / file_name


Reshaping the data in order to load into movement appropriately

In [None]:
ds = load_poses.from_dlc_file(file_path, fps=50)
all_keypoints = ds.keypoints.values

resident_keypoints = []
intruder_keypoints = []

for kp in all_keypoints:
    if kp.startswith("resident"):
        resident_keypoints.append(kp.split("_")[1])
    else:
        intruder_keypoints.append(kp.split("_")[1])
ds_new = reshape_loaded_ds(ds,["resident","intruder"],intruder_keypoints)
ds_new

Selecting relevant time period from data

In [None]:
ds_new  = ds_new.sel(time=slice(28, 324))
ds_new

Plotting **position** data over time for all individuals and keypoints. 

In [None]:
position = ds_new.position
position

In [None]:
position.plot.line(x="time", row="keypoints",  hue="individuals", col="space", aspect=2, size=2.5)

Plotting **confidence** data with xarray for all individuals and keypoints

In [None]:
confidence = ds_new.confidence
confidence

In [None]:
confidence.plot.line(x="time", row="keypoints",  hue="individuals", aspect=2, size=2.5)

## Step 2

Plotting confidence histograms for each individual

In [None]:
fig, ax = plt.subplots(figsize=(10,4), nrows=1, ncols=2)

colors = ["blue", "orange"]
for i, individual in enumerate(confidence.individuals.values):
    da = confidence.sel(individuals=individual)
    da.plot.hist(bins=50, alpha=1, label=individual, ax=ax[i], color=colors[i])



Plotting confidence histograms for each individual and keypoint

In [None]:
fig, ax = plt.subplots(figsize=(10,14), nrows=10, ncols=2, sharex=True)

colors = ["blue", "orange"]
for i, individual in enumerate(confidence.individuals.values):
    for j, keypoint in enumerate(confidence.keypoints.values):
        da = confidence.sel(keypoints=keypoint, individuals=individual)
        da.plot.hist(bins=30, alpha=0.7, label=keypoint, ax=ax[j, i], color=colors[i], density=True)
        ax[j, i].set_xlim([0, 1])
        ax[j, i].set_title(individual) if j == 0 else ax[j, i].set_title("")
        ax[j, i].set_ylabel(keypoint) if i == 0 else ax[j, i].set_ylabel("")
        ax[j, i].set_xlabel("confidence") if j==9 else ax[j, i].set_xlabel("")
            


Filtering data by confidence with set threshold

In [None]:
ds_new["position_thresholded"] = ds_new.move.filter_by_confidence(threshold=0.8)
ds_new["position_thresholded"].plot.line(x="time", row="keypoints",  hue="individuals", col="space", aspect=2, size=2.5)
plt.savefig(plot_folder_0 / "SB019_male2_position_threshold-80.png")

Interpolating data with a set max gap 

In [None]:
ds_new["position_interpolated"] = ds_new.move.interpolate_over_time(max_gap=25, data_vars=["position_thresholded"])
ds_new["position_interpolated"].plot.line(x="time", row="keypoints",  hue="individuals", col="space", aspect=2, size=2.5)
plt.savefig(plot_folder_1 / "SB019_male2_position_interpolated_threshold-80_max-gap-25.png")

Applying median filter to data with window and minimum period 

In [None]:
window = 7
ds_new["position_smoothed"] = ds_new.move.median_filter(window, min_periods=2,data_vars=["position_interpolated"])
ds_new["position_smoothed"].plot.line(x="time", row="keypoints",  hue="individuals", col="space", aspect=2, size=2.5)
plt.savefig(plot_folder_2 / "SB019_male2_position_smoothed_threshold-80_max-gap-25.png")

Saving clean data and generating reports

In [None]:
    
ds_to_save = ds_new[["position_smoothed" , "confidence"]].copy()
ds_to_save = ds_to_save.rename({"position_smoothed": "position"})



new_file_name = file_name.replace(".csv" , "_clean.csv")
clean_file_path = clean_data_folder / new_file_name
if clean_file_path.exists():
    os.remove(clean_file_path)
save_poses.to_dlc_file(ds_to_save, clean_file_path, split_individuals=False,)
       
nan_report = report_nan_values(ds_to_save.position, f"clean data for {file_name}")

report_file_name = file_name.replace(".csv" , "_clean_nan_report.txt")
with open(report_folder /  report_file_name , "w") as f:
    f.write(nan_report)
