- Selects Videos of interest: only usable+perfect videos, up to 3 videos per patient.
- Copies video files from a source directory (`dataset_path`) to a target directory (`target_path`) based on the 'ParsedFullVideoID' column in the DataFrame.
- Exports the updated DataFrame to a CSV file in the target directory.

In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm

In [None]:
csv_path = r"melda_swallow_annotations_final.csv"
dataset_path = r"\\fauad.fau.de\shares\ANKI\Projects\Swallowing\Data\from_Melda"
target_path = r"\\fauad.fau.de\shares\ANKI\Projects\Swallowing\Data\dataset_martin\Melda"


In [None]:
df = pd.read_csv(csv_path)
df.describe()

### select data to use

In [None]:
# only use perfect videos
df = df[(df["is_usable"] == 1) & (df[["visibility_issues", "severe_movement", "bad_view", "wrong_perspective", "software_artifact", "bolus_present_initially", "accessory_artifact", "artifact_detected"]].sum(axis=1) == 0)]
df.describe()

In [None]:
# group by patient id
df.groupby("PatientID").size()

In [None]:
# randomly select up to 3 videos per patient
df = df.groupby("PatientID").head(3)

In [None]:
df

In [None]:
# Convert PatientID column to int and then to string
df['PatientID'] = df['PatientID'].apply(lambda x: str(int(x.split(',')[0])) if isinstance(x, str) else str(int(str(x).split('.')[0])))

# Ensure the target directory exists
if not os.path.exists(target_path):
    os.makedirs(target_path)

# Copy videos to target directory
for i, row in df.iterrows():
    video_id = row['ParsedFullVideoID']
    patient_id = row['PatientID']
    print(f"Copying video {video_id}...")
    source_video = os.path.join(dataset_path, patient_id, video_id)
    target_video = os.path.join(target_path, video_id)

    # Check if the video exists before copying
    if os.path.exists(source_video):
        print("Video exists. Copying...")
        shutil.copy(source_video, target_video)

# Export the DataFrame to CSV in the target folder
df.to_csv(os.path.join(target_path, 'exported_videos.csv'), index=False)

print("Videos copied and DataFrame exported successfully.")