In [None]:
import os
import pandas as pd
base_folder="/content/drive/MyDrive/Dance_project_data"
dancers=[f"D{i}" for i in range(9,10)]
patterns=["All_CSVs", "NewZigZag", "Circle", "train"]
cleaned_dataframes={}

def clean_file(file_path):
    try:
        with open(file_path, 'r') as f:
            lines=[next(f) for _ in range(7)]
        marker_line=lines[2].strip().split(',')
        marker_indices=[i for i, val in enumerate(marker_line) if val.strip()=="Marker"]
        columns_to_keep=[0]+marker_indices
        df=pd.read_csv(file_path,skiprows=2,usecols=columns_to_keep,engine="python")
        unlabeled_mask=df.iloc[0].astype(str).str.contains("Unlabeled", na=False)
        filtered_df=df.loc[:,~unlabeled_mask]
        return filtered_df

    except Exception as e:
        print(f"Error in file {file_path}: {e}")
        return None
for dancer in dancers:
    for pattern in patterns:
        folder_path=os.path.join(base_folder,f"{dancer}_{pattern}")
        if not os.path.exists(folder_path):
            print(f"Skipping {folder_path} (Folder not found)")
            continue
        files=[f for f in os.listdir(folder_path) if f.endswith(".csv")]
        for file in files:
            file_path=os.path.join(folder_path, file)
            df_cleaned=clean_file(file_path)
            if df_cleaned is not None:
                cleaned_dataframes[file] = df_cleaned
                save_path=os.path.join(folder_path, f"cleaned_{file}")
                df_cleaned.to_csv(save_path, index=False)
                print(f"Cleaned and saved: {file}: {save_path}")
print("\n All dancer files cleaned!")

In [None]:
import pathlib
BASE_DIR=pathlib.Path("/content/drive/MyDrive/Dance_original_data")
csv_files=list(BASE_DIR.rglob("*.csv"))+list(BASE_DIR.rglob("*.CSV"))
print(f"\nFound {len(csv_files)} CSV file(s):")
for f in csv_files:
    print("  -",f.relative_to(BASE_DIR))

In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
BASE_DIR=Path("/content/drive/MyDrive/Dance_original_data")
OUTPUT_DIR=Path("/content/drive/MyDrive/Dance_clean_data")
OUTPUT_DIR.mkdir(exist_ok=True)

def clean_csv(input_path, output_folder):
    try:
        df=pd.read_csv(input_path, header=[0, 1, 2, 3, 4])
        df=df.dropna()
        cols_to_keep=[col for col in df.columns if "Unlabeled" not in col[1] and "Frame" not in col[1]]
        df=df[cols_to_keep]
        df=df.astype(float)
        rel_path=input_path.relative_to(BASE_DIR)
        cleaned_csv_path=output_folder/rel_path
        cleaned_csv_path.parent.mkdir(parents=True, exist_ok=True)
        df.to_csv(cleaned_csv_path, index=False)
        npy_path=cleaned_csv_path.with_suffix(".npy")
        np.save(npy_path, df.values)
        print(f"Cleaned: {rel_path}")
    except Exception as e:
        print(f"Failed: {input_path.name} — {e}")

csv_files=list(BASE_DIR.rglob("*.csv"))
print(f"Found {len(csv_files)} CSVs")

for csv_file in csv_files:
    clean_csv(csv_file,OUTPUT_DIR)