<a href="https://colab.research.google.com/github/margaretxie/Test_Paper3_with_UCO/blob/main/UCO_v2_cam234_got_1296_videos_estimated_counts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#test paper 3 - the HK one - with UCO dataset (only cam234, not cam 0 1, total 1296 videos )

# Paper 3 - https://link.springer.com/article/10.1007/s13755-023-00258-3  -skeleton based repeptition count method
# The paper's code - webpage: https://github.com/YuChengHSU/repetition-counting/tree/main

# UCO dataset [paper20] - downloaded from  http://rabinf24.uco.es/avaphy/    (Outlook Emailed got access )


# v2: apply paper3's model - with KPs which visibility >=threshold
# v1: extract each videos' KPs  (this code only for half cameras (2,3,4))
# v0: framework of looping all the .mp4 files, with name PnAn (Person*Action*)

In [2]:
# setup

v="V2"

# !! need to put "rep-count.py" in the same folder/location.
# The rep-count.py file could be found from github or https://github.com/YuChengHSU/repetition-counting/blob/main/rep-count.py

from google.colab import drive
import pandas as pd
import numpy as np
import os
import subprocess
from datetime import datetime

import pytz


# Mount Google Drive
drive.mount('/content/drive')

results = []

base_path = '/content/drive/My Drive/repetition_dataset/UCO/extracedKP_npy_cam234'

filterKP_path  =  "/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234"   #  filtered .npy files here

save_path = '/content/drive/My Drive/repetition_dataset/UCO/results_cam234'

threshold = 0.5

f=25 # (frame rate)

# Ensure the output directory exists
os.makedirs(base_path, exist_ok=True)





Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:


if not os.path.exists(filterKP_path):
    os.makedirs(filterKP_path)

# List all .npy files in the folder
npy_files = [f for f in os.listdir(base_path) if f.endswith('.npy')]

# Initialize an empty list to store results
results = []

#####

if not os.path.exists(filterKP_path):
    os.makedirs(filterKP_path)

# List all .npy files in the folder
npy_files = [f for f in os.listdir(base_path) if f.endswith('.npy')]


# Initialize an empty list to store results and skipped files
results = []
skipped_files = []

# Process each .npy file
for npy_file in npy_files:
    video_name = npy_file.replace(".npy", ".mp4")
    frame_rate = f

    # Construct the filename for the keypoints and load them
    kp_filename = npy_file
    print("\nKP filename:", kp_filename)

    keypoints_path = os.path.join(base_path, kp_filename)
    keypoints = np.load(keypoints_path)
    frame_number = keypoints.shape[0]
    print("Frame number: ", frame_number)

    if frame_number == 0:
        skipped_files.append(kp_filename)
        continue

    # Calculate the average visibility for each keypoint
    average_visibility = np.mean(keypoints[:, :, 3], axis=0)

    # Filter out keypoints where the average visibility is less than threshold
    valid_kp_indices = np.where(average_visibility >= threshold)[0]
    filtered_keypoints = keypoints[:, valid_kp_indices, :3]  # Select only x, y, z dimensions

    # Save the filtered keypoints to a new file
    filtered_kp_path = os.path.join(filterKP_path, f"filtered_{kp_filename}")
    np.save(filtered_kp_path, filtered_keypoints)

    # Prepare the command to run the external Python script with the necessary arguments
    command = [
        "python", "rep-count.py",
        "--data", filtered_kp_path,
        "-j", str(len(valid_kp_indices)),
        "-d", "3",  # Only three dimensions are considered (x, y, z)
        "-f", str(frame_rate),
        "--wins", "256",
        "--noverlap", "1"
    ]

    # Execute the command and capture the output
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        output = result.stdout.strip()
        estimated_count = int(output.split()[-1])  # Parse the count from the output
        print("Output:", output)
    except subprocess.CalledProcessError as e:
        print("Failed to execute command:", e)
        print("Error output:", e.stderr)
        estimated_count = None

    # Append results to the list
    results.append([video_name, len(valid_kp_indices), valid_kp_indices.tolist(), estimated_count, frame_number])

# Get the correct current time
local_tz = pytz.timezone('America/New_York')  # Replace with system local time zone if different
current_time = datetime.now(local_tz)
print("Local time:", current_time.strftime('%Y-%m-%d %I:%M %p'))

# Generate file name
current_time_str = current_time.strftime('%Y%m%d%I%M%p').lower()


# Define output file name: with used KP number, and current time
file_name = f"{v}_Cam234_output_{current_time_str}.csv"
print("Saved file name:", file_name)



# Make sure the save path exists
os.makedirs(save_path, exist_ok=True)

# Define the full output CSV path
output_csv_path = os.path.join(save_path, file_name)

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results, columns=['Video Name', 'KP Numbers', 'KP Used', 'Estimated Count', 'Frame Number'])
results_df.to_csv(output_csv_path, index=False)

print("\n######## First five rows of results_df:\n")
print(results_df.head(5))

print("\n######## Last five rows of results_df:\n")
print(results_df.tail(5))

print(f"\nResults saved to {output_csv_path}")

# Print the number of skipped files and their names
print(f"\nNumber of .npy files skipped due to frame number = 0: {len(skipped_files)}")
print("Skipped files:")
for skipped_file in skipped_files:
    print(skipped_file)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Frame number:  785
Output: Namespace(data='/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234/filtered_p12a09c3.npy', j=33, d=3, f=25, wins=256, noverlap=1)
Estimated count 4

KP filename: p12a09c4.npy
Frame number:  784
Output: Namespace(data='/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234/filtered_p12a09c4.npy', j=27, d=3, f=25, wins=256, noverlap=1)
Estimated count 4

KP filename: p12a10c2.npy
Frame number:  761
Output: Namespace(data='/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234/filtered_p12a10c2.npy', j=33, d=3, f=25, wins=256, noverlap=1)
Estimated count 6

KP filename: p12a10c3.npy
Frame number:  761
Output: Namespace(data='/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234/filtered_p12a10c3.npy', j=33, d=3, f=25, wins=256, noverlap=1)
Estimated count 6

KP filename: p12a10c4.npy
Frame number:  760
Output: Namespace(data='/content/dri

In [14]:
# to confirm whether 1296 files, only for checking
import os


def count_files_in_folder(folder_path):
    file_count = 0
    for root, dirs, files in os.walk(folder_path):
        file_count += len(files)
    return file_count


# Count the files in the folder
file_count = count_files_in_folder(filterKP_path)
print(f"Total number of files in folder '{filterKP_path}': {file_count} - should be {1296 - len(skipped_files)} (1296 - # of skipped files) ")


Total number of files in folder '/content/drive/My Drive/repetition_dataset/UCO/filteredKP_npy_cam234': 1295 - should be 1295 (1296 - # of skipped files) 


In [5]:
# saved the estimated counts of the 1296 videos

# get correct current time (before the time got from code was earlier than correct time 4 hours)

from datetime import datetime
import pytz

# Check system current time
system_time = datetime.now()
print("System current time:", system_time)

# Specify local time zone !!!!
local_tz = pytz.timezone('America/New_York')  # Replace with system local time zone !!!
current_time = datetime.now(local_tz)
print("Local time:", current_time.strftime('%Y-%m-%d %I:%M %p'))

# Generate file name

current_time_str = current_time.strftime('%Y%m%d%I%M%p').lower()

# define output file name: with used KP number, and current time

file_name = f"{v}_Cam234_output_{current_time_str}.csv"
print("Saved file name:", file_name)

output_csv_path = os.path.join(save_path, file_name)


# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results, columns=['Video Name', 'KP Numbers', 'KP Used', 'Estimated Count', 'Frame Number'])
#output_csv_path = os.path.join(base_path, file_name)
results_df.to_csv(output_csv_path, index=False)

print("\n########First five row_s of results_df:\n")
print(results_df.head(5))

print("\n########Last five row_s of results_df:\n")
print(results_df.tail(5))

print(f"\nResults saved to {output_csv_path}")

System current time: 2024-05-26 20:08:50.058650
Local time: 2024-05-26 04:08 PM
Saved file name: V2_Cam234_output_202405260408pm.csv

########First five row_s of results_df:

    Video Name  KP Numbers                                            KP Used  \
0  p6a03c4.mp4          28  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   
1  p6a04c2.mp4          28  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   
2  p6a04c3.mp4          28  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   
3  p6a04c4.mp4          28  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   
4  p6a05c2.mp4          28  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14,...   

   Estimated Count  Frame Number  
0                4           793  
1                4          1010  
2                6          1010  
3                4          1010  
4                5           707  

########Last five row_s of results_df:

       Video Name  KP Numbers  \
1290  p6a02c2.mp4          26   
1291  p6a02c3.mp4    