<h1>To make the process of dividing and sorting data into 80-20 faster, divide the data first before running the program</h1>


<h3>NOTE: Make sure that all directories exists before running this script!</h3>
<strong>Data</strong>
<ul> 
    <li>correct_raw</li>
    <li>incorrect_raw</li>
    
</ul>

<ul>
    <li>c_top_1</li>
    <li>c_top_2</li>
    <li>c_bot_1</li>
    <li>c_mid_1</li>
    <li>i_top_1</li>
    <li>i_top_2</li>
    <li>i_bot_1</li>
    <li>i_mid_1</li>
</ul>

<strong>LIBRARIES</strong>

In [1]:
import cv2 as cv
import numpy as np
import mediapipe as mp
from matplotlib import pyplot as plt
import time
import math
import os
import pathlib
import shutil
import random
from collections import Counter
from shutil import rmtree

<h1>Extract and copy each video frame to a list (<i>vid_copy</i>)</h1>

In [2]:
def estimate(origin, origin_str, label):    
    for root, directories, files in origin:
        for file in files:
            video = origin_str + f"/{file}"
            cap = cv.VideoCapture(video)
            print(f"Processing Video: {video}")
            
            #mpDraw = mp.solutions.drawing_utils
            mpPose = mp.solutions.pose
            pose = mpPose.Pose()
            
            frame_width = math.floor(int(cap.get(3)))
            frame_height = math.floor(int(cap.get(4)))
            size = (frame_width, frame_height)
            
            if not cap.isOpened():
                raise IOError("Cant Load Video")
            
            vid_copy = [] # Copy vid here

            while cv.waitKey(1) < 0:
                hasFrame, frame = cap.read()

                if not hasFrame:
                    cv.waitKey(1)
                    cap.release()
                    cv.destroyAllWindows()
                    break

                imgRGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
                results = pose.process(imgRGB)

                #print(results.pose_landmarks)
                tmp = 0
                if results.pose_landmarks:
                    #mpDraw.draw_landmarks(frame, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
                    for id, lm in enumerate(results.pose_landmarks.landmark):
                        height, width, channel = frame.shape
                        
                        # r_shoulder(11) as reference landmark
                        if id == 11:
                            vid_copy.append((frame, lm.y, round(lm.y, 1)))
                            tmp = lm.y
          
                cv.imshow("Realtime", frame)
                cv.waitKey(1)
                
            # Segmentize after each video iteration
            segmentize(vid_copy, file, label)

<h1>Extract squat segments from <i>vid_copy</i></h1>
<h3> GUIDE: vid_copy[<i>element_index</i>][<i>touple_index</i>] | <i>touple_index</i>: 0 = frame, 1 = r_shoulder(11) y-value</h3>

<strong> SQUAT SEGMENTS: </strong>
<ul>
    <li>top_1: first element of <i>vid_copy</i></li>
    <li>top_2: last element of <i>vid_copy</i></li>
    <li>bot_1: element of <i>vid_copy</i> with lowest yyvalue (2nd element of touple)</li>
    <li>mid_1: element of <i>vid_copy</i> between bot_1 and top_1</li>
</ul>


<i>Note: I'm storing the segments into a data structure. This may seem unnecessary since we can simply save the images into the directory and all's fine but this MIGHT come in handy in the future. </i>

In [3]:
# directory utilities
path = os.walk(os.path.abspath(os.getcwd())) # Gets current directory
c_origin_str = os.path.abspath(os.getcwd()) + "/correct_raw"
i_origin_str = os.path.abspath(os.getcwd()) + "/incorrect_raw"
c_origin = os.walk(c_origin_str)
i_origin = os.walk(i_origin_str)

labels = ["i", "c"]
classes = ["top_1" , "top_2", "bot_1", "mid_1"]

type(c_origin_str)

str

In [4]:
def segmentize(vid_copy, file, label):
    segments = [] 
    segments.append(vid_copy[0][0]) # top_1
    segments.append(vid_copy[len(vid_copy)-1][0]) # top_2

    min_y = vid_copy[0][1]
    bot1_idx = 0
    for i in range(len(vid_copy)):
        if min_y < vid_copy[i][1]:
            min_y = vid_copy[i][1]
            bot1_idx = i

    segments.append(vid_copy[bot1_idx][0]) # bot_1
    
    prev_frame_yval = vid_copy[0][2] # NOTE: The 2 index here is he 2nd decimal place
    change_idx = 0
    
    next_frame_yval = vid_copy[bot1_idx][2]
    change_idx_bot = 0
    
    for i in range(len(vid_copy)):
        if vid_copy[i][2] > prev_frame_yval: # This is where the change starts from top_1 
            change_idx = i
            break
            
    for j in range(len(vid_copy)):
        if vid_copy[j][2] == next_frame_yval: # This is where the change starts to bot_1
            change_idx_bot = j
            break
            
    segments.append(vid_copy[math.floor((change_idx + change_idx_bot)/2)][0]) # mid_1
    ctr = 0
    for class_name in classes:
    # Save images from segments list
        cv.imwrite(f"{os.path.abspath(os.getcwd())}/{label}_{class_name}/{file[:-4]}.png", segments[ctr])
        ctr+=1

In [5]:
# estimate(i_origin, i_origin_str, "i")

estimate(c_origin, c_origin_str, "c" )

In [6]:
def create_class_directories(dir_str): #creates classes directory
# Create directories
  for label in labels:
    for class_name in classes:
    # rmtree(f"{dir_str}/{label}_{class_name}")
      os.mkdir(f"{dir_str}/{label}_{class_name}")

In [7]:
def copy_files(filename, val, condition, label):
  dir_str = f"{os.path.abspath(os.getcwd())}"

  source = f"{dir_str}/{label}_"
  destination = f"{dir_str}/{val}"


  ctr = 0       
  for class_name in classes:
    
    shutil.copy((f"{source+class_name}/{filename}"), (f"{destination}/{condition}/{label}_{class_name}/{filename}"))
    if condition == "validation":
      shutil.copy(({source+class_name}/{filename}), (f"{destination}/test/{filename[:-4]}_{ctr}.png"))
      ctr+=1

In [8]:
def div_data(val):

  for label in labels:

    dir_str = f"{os.path.abspath(os.getcwd())}/{label}_top_1"
    filenames = next(os.walk(dir_str ), (None, None, []))[2]  # [] if no file

    train_files = random.sample(filenames, int(len(filenames) * (val * 0.01)))
    test_files = list((Counter(filenames)-Counter(train_files)).elements())

    for filename in train_files:
      copy_files(filename, val, "train", label)

    for filename in test_files:
      copy_files(filename, val, "validation", label)
    print(label, " files: ",len(filenames))


In [9]:
def create_copies():
  
  # for i in range(10,101,10):
  i = 100
  os.mkdir(f"{os.path.abspath(os.getcwd())}/{i}") # Create Folder
  dir_str = f"{os.path.abspath(os.getcwd())}/{i}"

  os.mkdir(f"{dir_str}/train")
  create_class_directories(f"{dir_str}/train")

#   os.mkdir(f"{dir_str}/validation")
#   create_class_directories(f"{dir_str}/validation")

#   os.mkdir(f"{dir_str}/test")

  div_data(i)
  print("Directory Created!")

In [10]:
create_copies()

i  files:  307
c  files:  272
Directory Created!


In [11]:
def augment_data(label):

  for i in range(100,101,10):
    dir_str = f"{os.path.abspath(os.getcwd())}/{i}/train/"
    filenames = next(os.walk(dir_str + f"{label}_bot_1/"), (None, None, []))[2]  # [] if no file
    
    for class_name in classes:
      
      for filename in filenames:
        img = cv.imread(f'{dir_str+label}_{class_name}/{filename}')
        img_flipped = cv.flip(img, 1)
        file_dir = f"{dir_str+label}_{class_name}/{filename[:-4]}(1).png"
        cv.imwrite(file_dir, img_flipped)

      print(f"Done Augmenting {label}_{class_name}")
    
    print(f"**************Done Augmenting {i}*************************")

In [12]:
augment_data("i")

Done Augmenting i_top_1
Done Augmenting i_top_2
Done Augmenting i_bot_1
Done Augmenting i_mid_1
**************Done Augmenting 100*************************


In [13]:
augment_data("c")

Done Augmenting c_top_1
Done Augmenting c_top_2
Done Augmenting c_bot_1
Done Augmenting c_mid_1
**************Done Augmenting 100*************************
