In [1]:
#import psycopg2
import os
import json
import csv
import pandas as pd
from datetime import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

Plan:
1) Data Collection
    - prase the JSON files
    - collect frames where there is only 1 person and the dance style is either ballet or tap
2) Feature Extraction
    - we have 17 keypoint coordinates we are looking at:
        - nose, R/L eye, R/L ear, R/L shoulder, R/L elbow, R/L wrist, R/L hip, R/L knee, R/L ankle
        - these coordinates will serve as our input data
3) Split the data into train/validation/test sets
4) Model
    - classification model

In [2]:
directory = '/Users/Shruti/Downloads/Dance_Dataset/densepose/txt'

#get directories without hidden files
directories = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
directories.sort()

#Setup dictionary to collect file names
all_file_names = {}

#Identify all file names
for d in directories:
    #get file names without hidden files
    files = [f for f in os.listdir(os.path.join(directory, d)) if os.path.isfile(os.path.join(directory, d, f))]
    files.sort()
    all_file_names[d] = files

In [3]:
for d in directories:
    if d == 'ballet':
        print('ballet')

ballet


In [4]:
#Count the number of videos in each video type
#Determined by checking last number string on each image and
#checking if it matches '0001'
num_videos = {}
first_img_indexes = {}

for d in directories:
    video_count = 0
    num_images = len(all_file_names[d])
    for i in range(num_images):
        if all_file_names[d][i].split('.')[0].split('_')[-1] == '0001':
            video_count += 1
    num_videos[d] = video_count

In [5]:
#Print out summary results
print(f"{'dance':<9} | {'Num Images':<10} | {'Num Videos':<9}")
print("-"*35)
for k,v in all_file_names.items():
    print(f'{k:<9} | {len(v):<10} | {num_videos[k]}')

dance     | Num Images | Num Videos
-----------------------------------
ballet    | 22410      | 89
break     | 25622      | 95
cha       | 28098      | 98
flamenco  | 24755      | 88
foxtrot   | 23738      | 79
jive      | 29100      | 106
latin     | 24460      | 90
pasodoble | 26607      | 98
quickstep | 24036      | 82
rumba     | 27262      | 94
samba     | 25807      | 96
square    | 27453      | 97
swing     | 26337      | 95
tango     | 24020      | 80
tap       | 28541      | 95
waltz     | 24380      | 80


Function: Number of people in a frame

In [6]:
def person_count(file_name):
    '''Input a JSON file and get the number of people in each frame. '''
    f = open(file_name, "r")
    j = json.load(f)
    f.close

    people_count = len(j)

    return people_count

In [7]:
# Testing results, should return 5
peeps = person_count('/Users/Shruti/Downloads/Dance_Dataset/densepose/txt/ballet/-5Yp-vToI2E_016_0001.json')
peeps

5

Extract frames where the dance type is 'ballet' or 'tap' and there is just 1 person in the frame.

In [8]:
def person_count(file_name):
    '''Input a JSON file and get the number of people in each frame. '''
    f = open(file_name, "r")
    j = json.load(f)
    f.close

    people_count = len(j)

    return people_count

In [9]:
# Setup lists to collect file paths
selected_files_ballet = []
selected_files_tap = []

# Identify all file names
for d in directories:
    if d == 'ballet':
        # Get file names without hidden files
        files = [f for f in os.listdir(os.path.join(directory, d)) if os.path.isfile(os.path.join(directory, d, f))]
        files.sort()
        for file_name in files:
            file_path = os.path.join(directory, d, file_name)
            # Count people in each frame
            num_people = person_count(file_path)
            if num_people == 1:  # Check if there is only one person
                selected_files_ballet.append(file_path)
    elif d == 'tap':
        # Get file names without hidden files
        files = [f for f in os.listdir(os.path.join(directory, d)) if os.path.isfile(os.path.join(directory, d, f))]
        files.sort()
        for file_name in files:
            file_path = os.path.join(directory, d, file_name)
            # Count people in each frame
            num_people = person_count(file_path)
            if num_people == 1:  # Check if there is only one person
                selected_files_tap.append(file_path)

# Print the sizes of the selected files lists
print("Number of selected files for ballet:", len(selected_files_ballet))
print("Number of selected files for tap:", len(selected_files_tap))

Number of selected files for ballet: 8351
Number of selected files for tap: 7078


In [10]:
print(selected_files_ballet[0])
print(selected_files_tap[0])

/Users/Shruti/Downloads/Dance_Dataset/densepose/txt/ballet/-sEgjXxjxIw_102_0026.json
/Users/Shruti/Downloads/Dance_Dataset/densepose/txt/tap/-ZxOm8B-BX4_020_0001.json


Machine Learning Classification Model - Random Forest

In [11]:
def get_body_position_info(file_name):
    '''Input a JSON file and get the body position of each person.  Outer list is
    list of people.  Inner list is x,y position of each body part for that person'''
    f = open(file_name, "r")
    j = json.load(f)
    f.close
    people= []

    for i,person in enumerate(j):
        #Set up list to collect person's body location
        person_i = []
        count = 0

        #add each body part position to the list
        for body_part in person:
            count+=1
            if count == 1: continue
            else: person_i.append(body_part[1])

        #Append entire list to list of people
        people.append(person_i)

    return people

In [12]:
get_body_position_info(selected_files_tap[0])

[[[1008.1487426757812, 361.0282897949219],
  [1025.12255859375, 348.037841796875],
  [995.1687622070312, 347.03857421875],
  [1045.091796875, 365.02532958984375],
  [971.2056884765625, 364.0260925292969],
  [1086.0286865234375, 447.9643249511719],
  [935.2611083984375, 450.9621276855469],
  [1168.90087890625, 458.95623779296875],
  [829.42431640625, 433.974609375],
  [1213.83154296875, 417.98638916015625],
  [762.5274658203125, 379.0150451660156],
  [1074.047119140625, 675.7966918945312],
  [979.193359375, 687.787841796875],
  [1068.056396484375, 839.6760864257812],
  [922.2811279296875, 839.6760864257812],
  [1132.956298828125, 996.5606689453125],
  [954.2318725585938, 1009.5511474609375]]]

In [13]:
def get_body_part_labels(file_name):
    '''Input a JSON file and get the body position of each person.  Outer list is
    list of people.  Inner list is x,y position of each body part for that person'''
    f = open(file_name, "r")
    j = json.load(f)
    f.close()
    
    # Extract body part labels from the first person entry in the JSON file
    body_part_labels = [body_part[0] for body_part in j[0][1:]]
   
    return body_part_labels

In [14]:
get_body_part_labels(selected_files_tap[0])

['nose',
 'left_eye',
 'right_eye',
 'left_ear',
 'right_ear',
 'left_shoulder',
 'right_shoulder',
 'left_elbow',
 'right_elbow',
 'left_wrist',
 'right_wrist',
 'left_hip',
 'right_hip',
 'left_knee',
 'right_knee',
 'left_ankle',
 'right_ankle']

**Creating Feature Map**

In [15]:
# Concatenate the lists of selected files for ballet and tap dance styles
all_selected_files = selected_files_ballet + selected_files_tap

# Split data into training and testing sets
train_files, test_files = train_test_split(all_selected_files, test_size=0.2, random_state=42)

# Initialize lists to store feature data and corresponding labels
train_feature_data = []
train_feature_data_flattened = []

train_labels = []

# Iterate through each JSON file representing a frame in the training set
for file_path in train_files:
    # Get body position info for the single person in the frame
    body_positions = get_body_position_info(file_path)
    
    train_feature_data.append(body_positions)

    # Flatten the list of x, y positions
    flattened_positions = [coord for point in body_positions for coord in point]
    
    # Append the flattened positions to the feature data list
    train_feature_data_flattened.append(flattened_positions)
    
    # Determine the label based on the file path (e.g., ballet or tap)
    # 1 for ballet
    # 0 for tap
    label = 1 if file_path in selected_files_ballet else 0
    train_labels.append(label)

# Convert the training feature data and labels into numpy arrays
X_train = np.array(train_feature_data)
X_train_flattened = np.array(train_feature_data_flattened)
y_train = np.array(train_labels)

print("Number of frames (rows) in X_train:", len(X_train_flattened))
print("Number of features (columns) in X_train:", len(X_train_flattened[0]))
print("Number of labels in y_train:", len(y_train))

Number of frames (rows) in X_train: 12343
Number of features (columns) in X_train: 17
Number of labels in y_train: 12343


In [16]:
for row in X_train_flattened:
    print(row)

[[565.23010254 170.81893921]
 [575.22070312 160.83850098]
 [564.23101807 160.83850098]
 [604.19354248 175.80915833]
 [557.23760986 175.80915833]
 [629.17010498 233.69567871]
 [563.23199463 250.66242981]
 [683.11950684 243.67611694]
 [528.26477051 266.63113403]
 [738.06793213 249.66438293]
 [492.29852295 268.62719727]
 [666.13543701 332.50198364]
 [637.16259766 344.47851562]
 [688.11480713 505.1635437 ]
 [786.02294922 340.48635864]
 [691.11199951 631.9151001 ]
 [881.93292236 341.484375  ]]
[[596.04138184 177.81181335]
 [603.03338623 170.81315613]
 [588.05059814 170.81315613]
 [615.01965332 171.81297302]
 [580.05975342 173.81257629]
 [631.00128174 212.80514526]
 [571.07006836 217.80418396]
 [649.97949219 255.79693604]
 [521.12738037 242.79940796]
 [623.01043701 292.78988647]
 [471.18466187 269.79425049]
 [628.00476074 313.78585815]
 [589.04943848 315.78549194]
 [621.01275635 408.76773071]
 [598.03912354 406.76812744]
 [607.02880859 485.75305176]
 [608.02764893 483.75341797]]
[[690.942443

In [17]:
for row in y_train:
    print(row)

1
1
0
0
1
1
0
0
1
1
0
1
0
1
0
1
0
1
0
1
1
1
1
0
0
1
0
1
0
0
1
0
0
1
0
1
0
1
0
1
0
1
1
1
0
1
0
0
0
0
1
1
0
1
0
0
0
1
1
1
1
0
0
0
1
0
1
1
1
0
1
1
1
1
0
1
1
1
1
1
1
0
1
0
0
1
1
0
1
1
0
1
1
1
0
0
0
0
1
1
1
0
1
1
1
0
1
1
0
0
1
0
1
0
1
0
0
1
1
1
0
1
1
0
0
0
1
0
0
0
1
0
1
1
0
0
1
1
0
0
1
0
0
1
0
0
0
0
1
1
1
1
1
0
0
0
0
0
1
1
1
0
0
0
1
0
0
1
0
0
0
1
0
1
1
0
0
0
1
0
1
0
1
1
0
0
1
0
0
1
0
1
1
1
0
1
0
1
0
1
1
0
0
0
1
1
0
0
1
1
0
1
0
1
1
0
0
1
0
0
0
1
1
1
0
0
1
0
1
1
1
0
0
1
0
0
1
1
0
1
1
1
1
0
1
0
0
1
1
0
1
1
1
1
1
1
0
0
1
0
1
0
1
1
0
0
1
1
0
1
1
0
0
0
1
0
1
0
1
1
1
0
1
1
1
0
0
0
0
0
1
0
0
0
0
1
1
1
1
0
1
0
1
1
0
0
1
0
0
0
0
1
1
1
1
1
0
0
1
1
1
0
1
1
1
0
1
1
1
0
1
0
1
1
1
0
1
1
0
1
1
0
1
0
1
1
0
0
1
1
1
0
0
1
1
1
0
1
1
1
1
1
0
1
0
1
0
1
1
0
1
1
1
1
0
1
1
0
0
1
1
1
1
1
1
1
0
0
0
1
0
1
1
1
0
1
1
1
1
0
0
1
0
1
1
1
1
1
0
0
1
1
1
1
1
0
0
1
0
0
0
1
0
0
1
0
1
1
1
1
1
0
0
0
0
1
0
1
0
0
0
1
0
1
1
0
0
0
1
1
1
1
0
0
1
1
0
1
1
1
1
1
1
1
0
0
0
1
1
1
0
0
1
1
1
1
1
0
0
0
1
0
0
1
1
0
1
1
1
1
1
1
0
0
0
0
1
1
1
0


In [18]:
# Initialize lists to store feature data and corresponding labels for the testing set
test_feature_data = []
test_labels = []

# Iterate through each JSON file representing a frame in the testing set
for file_path in test_files:
    # Get body position info for the single person in the frame
    body_positions = get_body_position_info(file_path)
    
    # Flatten the list of x, y positions
    flattened_positions = [coord for point in body_positions for coord in point]
    
    # Append the flattened positions to the feature data list for the testing set
    test_feature_data.append(flattened_positions)

     # Determine the label based on the file path (e.g., ballet or tap)
    # 1 for ballet
    # 0 for tap
    label = 1 if file_path in selected_files_ballet else 0
    test_labels.append(label)

# Convert the testing feature data and labels into numpy arrays
X_test_flattened = np.array(test_feature_data)
y_test = np.array(test_labels)

print("Number of frames (rows) in X_test:", len(X_test_flattened))
print("Number of features (columns) in X_test:", len(X_test_flattened[0]))
print("Number of labels in y_test:", len(y_test))


Number of frames (rows) in X_test: 3086
Number of features (columns) in X_test: 17
Number of labels in y_test: 3086


In [19]:
# Define the body part labels
body_part_labels = [
    "nose_x", "nose_y", 
    "left_eye_x", "left_eye_y", 
    "right_eye_x", "right_eye_y", 
    "left_ear_x", "left_ear_y", 
    "right_ear_x", "right_ear_y", 
    "left_shoulder_x", "left_shoulder_y", 
    "right_shoulder_x", "right_shoulder_y", 
    "left_elbow_x", "left_elbow_y", 
    "right_elbow_x", "right_elbow_y", 
    "left_wrist_x", "left_wrist_y", 
    "right_wrist_x", "right_wrist_y", 
    "left_hip_x", "left_hip_y", 
    "right_hip_x", "right_hip_y", 
    "left_knee_x", "left_knee_y", 
    "right_knee_x", "right_knee_y", 
    "left_ankle_x", "left_ankle_y", 
    "right_ankle_x", "right_ankle_y"
]

# Create a dictionary to map body part labels to column indices
body_part_mapping = {label: i for i, label in enumerate(body_part_labels)}

# Access the x-coordinate of the left eye for all frames
left_eye_x_coordinates = X_train_flattened[:, body_part_mapping["left_eye_x"]]
print(left_eye_x_coordinates)
print(len(left_eye_x_coordinates))

# Access the y-coordinate of the right shoulder for all frames
right_shoulder_y_coordinates = X_train_flattened[:, body_part_mapping["right_shoulder_y"]]
# print(right_shoulder_y_coordinates)
print(len(right_shoulder_y_coordinates))

# Access both x and y coordinates of the nose for all frames
nose_coordinates = X_train_flattened[:, [body_part_mapping["nose_x"], body_part_mapping["nose_y"]]]
# print(nose_coordinates)
print(len(nose_coordinates))

[[ 564.23101807  160.83850098]
 [ 588.05059814  170.81315613]
 [ 683.96746826  185.35261536]
 ...
 [1033.99133301  581.4720459 ]
 [ 879.79620361  465.55227661]
 [ 601.72113037  367.9515686 ]]
12343
12343
12343


In [20]:
print(X_train_flattened.shape)
print(X_train.shape)

(12343, 17, 2)
(12343, 1, 17, 2)


In [21]:
df_train = pd.DataFrame(X_train.reshape(X_train.shape[0], -1), columns=body_part_labels)
df_train['label'] = y_train
df_train

Unnamed: 0,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,right_ear_y,...,right_hip_y,left_knee_x,left_knee_y,right_knee_x,right_knee_y,left_ankle_x,left_ankle_y,right_ankle_x,right_ankle_y,label
0,565.230103,170.818939,575.220703,160.838501,564.231018,160.838501,604.193542,175.809158,557.237610,175.809158,...,344.478516,688.114807,505.163544,786.022949,340.486359,691.112000,631.915100,881.932922,341.484375,1
1,596.041382,177.811813,603.033386,170.813156,588.050598,170.813156,615.019653,171.812973,580.059753,173.812576,...,315.785492,621.012756,408.767731,598.039124,406.768127,607.028809,485.753052,608.027649,483.753418,1
2,690.942444,191.340958,698.913879,184.354553,683.967468,185.352615,709.874573,191.340958,674.003174,191.340958,...,356.020599,712.863892,455.826416,673.006714,455.826416,704.892456,551.640015,681.974609,555.632263,0
3,602.562134,152.006775,612.522583,141.016220,590.609619,140.017075,623.479065,149.009354,570.688721,147.011063,...,380.810120,635.431580,500.707062,555.748047,502.705353,639.415771,603.618591,571.684753,636.590271,0
4,918.204346,413.830139,934.191528,400.848663,919.203552,399.850067,969.163452,419.821594,978.156250,420.820190,...,688.438599,886.229980,846.213684,928.196350,835.229309,1044.103394,896.142517,952.177124,977.027161,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12338,749.537048,421.660767,758.515686,398.662872,750.534668,396.663055,766.496704,402.662506,772.482483,397.662964,...,631.641479,779.465881,777.628052,850.297424,775.628235,744.548889,937.613403,835.333008,938.613281,1
12339,436.691437,46.170338,460.679230,34.186569,438.690430,33.187920,501.658417,44.173042,433.692963,41.177101,...,321.797028,491.663483,480.581970,389.715332,487.572510,479.669586,688.300659,415.702118,630.379089,0
12340,1026.991577,579.472473,1026.991577,515.486511,1033.991333,581.472046,1016.991760,435.503998,1018.991760,432.504669,...,654.456055,1003.992126,803.423462,838.996094,805.423035,1009.991943,948.391724,778.997559,944.392578,1
12341,884.795166,482.533386,904.791077,465.552277,879.796204,465.552277,942.783386,460.557831,867.798645,462.555603,...,738.248901,698.833008,844.131104,698.833008,842.133301,607.851562,1002.954407,1105.750244,978.981079,1


**Model**

X (feature matrix): 
- the x,y positions of the different body parts for each frame in our dataset
- each row in the matrix corresponds to one frame
- each column represents either the x or y position of a specific body part
- there are a total of 17 attributes - nose, R/L eye, R/L ear, R/L shoulder, R/L elbow, R/L wrist, R/L hip, R/L knee, R/L ankle 
- our matrix has a total of 34 columns (17x2 for each x,y coordinate)
- our matrix has a total of 15429 rows (8351 ballet frames; 7078 tap frames)

Y (target variable)
- represents the output label, in our case the dance style associated with each frame (ballet OR tap)

In [23]:
X_train_flattened_reshaped = X_train_flattened.reshape(X_train_flattened.shape[0], -1)
X_test_flattened_reshaped = X_test_flattened.reshape(X_test_flattened.shape[0], -1)
print(X_train_flattened_reshaped.shape)
print(X_test_flattened_reshaped.shape)
print(y_train.shape)
print(y_test.shape)

(12343, 34)
(3086, 34)
(12343,)
(3086,)


In [25]:
X_train_flattened_reshaped = X_train_flattened.reshape(X_train_flattened.shape[0], -1)
X_test_flattened_reshaped = X_test_flattened.reshape(X_test_flattened.shape[0], -1)

# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train_flattened_reshaped, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test_flattened_reshaped)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9854180168502916


In [26]:
print(y_pred.shape)

(3086,)
