In [12]:
import json
import numpy as np
import random
import glob

The purpose of this notebook is to create a train and test set for use in modeling. The test set will be 20% of the videos. The videos will be shuffled and then concatenated into one long array for each set.

I'm also going to save the d2m and yt sets as npz instead of json because this seems to be an faster, better data structure! This only needs to happen once. The json files are slow to load, so skip the next few cells in the future.

In [None]:
# #Load both d2m sets (original and scaled).
# with open('../bopbot_data/d2m_dict_data.json', 'r') as f:
#     d2m_dict_data = json.load(f)
# with open('../bopbot_data/d2m_scaled.json', 'r') as f:
#     d2m_scaled = json.load(f)   

In [None]:
# #Convert the data back to arrays from lists.
# for key in d2m_dict_data:
#     d2m_dict_data[key] = np.array(d2m_dict_data[key])
# for key in d2m_scaled:
#     d2m_scaled[key] = np.array(d2m_scaled[key])

In [None]:
# #Save the d2m sets as npz files.
# np.savez_compressed('../bopbot_data/d2m_orig.npz',d2m_orig_dict = d2m_dict_data)
# np.savez_compressed('../bopbot_data/d2m_scaled.npz',d2m_scaled_dict = d2m_scaled)

In [3]:
# #Load both yt sets (original and scaled).
# with open('../bopbot_data/yt_data.json', 'r') as f:
#     yt_data = json.load(f)
# #Convert the data back to arrays from lists.
# for key in yt_data:
#     yt_data[key] = np.array(yt_data[key])
# #Save the yt sets as npz files.
# np.savez_compressed('../bopbot_data/yt_orig.npz',yt_orig_dict = yt_data)

In [None]:
# #Load both yt sets (original and scaled).
# with open('../bopbot_data/yt_scaled.json', 'r') as f:
#     yt_scaled = json.load(f)
# #Convert the data back to arrays from lists.
# for key in yt_scaled:
#     yt_scaled[key] = np.array(yt_scaled[key])
# #Save the yt sets as npz files.
# np.savez_compressed('../bopbot_data/yt_scaled.npz',yt_scaled_dict = yt_scaled)

In [13]:
#Reload batch0900 because it was empty.
batch0900 = glob.glob("../vid2vid/few-shot-vid2vid/NumpyPoses/batchnew/*.npy")
vid2vid_videos9 = {}
for file in range(len(batch0900)):
    vid2vid_videos9[file] = np.load(batch0900[file])


Now that the d2m and yt data is saved as npz, do the following for those in the event anyone re-runs this notebook. (Which I had to do almost immediately because the kernel crashed. :-/ )

In [2]:
#Load the d2m data. (Just the scaled version... it's already saved as npz)
d2m_scaled = np.load('../bopbot_data/d2m_scaled.npz',allow_pickle=True)
d2m_scaled.files

['d2m_scaled_dict']

In [3]:
#Load the d2m data. (Just the scaled version... it's already saved as npz)
yt_scaled = np.load('../bopbot_data/yt_scaled.npz',allow_pickle=True)
yt_scaled.files

['yt_scaled_dict']

In [4]:
#Load the v2v data. (Just the scaled version... it's already saved as npz)
v2v_scaled = np.load('../bopbot_data/v2v_scaled.npz',allow_pickle=True)
v2v_scaled.files

['batch0',
 'batch1',
 'batch2',
 'batch3',
 'batch4',
 'batch5',
 'batch6',
 'batch7',
 'batch8',
 'batch9']

In [16]:
#reload batch9
batch9_scaled = np.load('../bopbot_data/batch9_scaled.npz',allow_pickle=True)
batch9_scaled.files

['batch9']

We only need to keep a handful of videos for test data. Let's take videos from each of the d2m sets, the yt set and batch9 of vid2vid. (Batch9 is the set that doesn't have any duplicates.) Get the total number of videos in those sets.

In [6]:
d2m_scaled = d2m_scaled['d2m_scaled_dict'][()]
d2m_scaled.keys()

dict_keys(['zumba0000', 'zumba0001', 'zumba0002', 'zumba0003', 'zumba0004', 'zumba0005', 'zumba0006', 'zumba0007', 'zumba0008', 'zumba0009', 'zumba0010', 'zumba0011', 'zumba0012', 'zumba0013', 'zumba0014', 'zumba0015', 'zumba0016', 'zumba0017', 'zumba0018', 'zumba0019', 'zumba0020', 'zumba0021', 'zumba0022', 'zumba0023', 'zumba0024', 'zumba0025', 'zumba0026', 'zumba0027', 'zumba0028', 'zumba0029', 'zumba0030', 'zumba0031', 'zumba0032', 'zumba0033', 'zumba0034', 'zumba0035', 'zumba0036', 'zumba0037', 'zumba0038', 'zumba0039', 'zumba0040', 'zumba0041', 'zumba0042', 'zumba0043', 'zumba0044', 'zumba0045', 'zumba0046', 'zumba0047', 'zumba0048', 'zumba0049', 'zumba0050', 'zumba0051', 'zumba0052', 'zumba0053', 'zumba0054', 'zumba0055', 'zumba0056', 'zumba0057', 'zumba0058', 'zumba0059', 'zumba0060', 'zumba0061', 'zumba0062', 'zumba0063', 'zumba0064', 'zumba0065', 'zumba0066', 'zumba0067', 'zumba0068', 'zumba0069', 'zumba0070', 'zumba0071', 'zumba0072', 'zumba0073', 'zumba0074', 'zumba0075', '

In [37]:
yt_scaled.keys()

dict_keys(['yt0000', 'yt0001', 'yt0002', 'yt0003', 'yt0004', 'yt0005', 'yt0006', 'yt0007', 'yt0008', 'yt0009', 'yt0010', 'yt0011', 'yt0012', 'yt0013', 'yt0014', 'yt0015', 'yt0016', 'yt0017', 'yt0018', 'yt0019', 'yt0020', 'yt0021', 'yt0022', 'yt0023', 'yt0024', 'yt0025', 'yt0026', 'yt0027', 'yt0028', 'yt0029', 'yt0030', 'yt0031', 'yt0032', 'yt0033', 'yt0034', 'yt0035', 'yt0036', 'yt0037', 'yt0038', 'yt0039', 'yt0040', 'yt0041', 'yt0042', 'yt0043', 'yt0044', 'yt0045', 'yt0046', 'yt0047', 'yt0048', 'yt0049', 'yt0050', 'yt0051', 'yt0052', 'yt0053', 'yt0054', 'yt0055', 'yt0056', 'yt0057', 'yt0058', 'yt0059', 'yt0060', 'yt0061', 'yt0062', 'yt0063', 'yt0064', 'yt0065', 'yt0066', 'yt0067', 'yt0068', 'yt0069', 'yt0070', 'yt0071', 'yt0072', 'yt0073', 'yt0074', 'yt0075', 'yt0076', 'yt0077', 'yt0078', 'yt0079', 'yt0080', 'yt0081', 'yt0082', 'yt0083', 'yt0084', 'yt0085', 'yt0086', 'yt0087', 'yt0088', 'yt0089', 'yt0090', 'yt0091', 'yt0092', 'yt0093', 'yt0094'])

In [39]:
batch9.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,

In [7]:
len(d2m_scaled)

3270

In [8]:
yt_scaled = yt_scaled['yt_scaled_dict'][()]
len(yt_scaled)

95

In [17]:
batch9 = batch9_scaled['batch9'][()]
len(batch9)

8343

In [11]:
batch0 = v2v_scaled['batch0'][()]
print(len(batch0))
batch1 = v2v_scaled['batch1'][()]
print(len(batch1))
batch2 = v2v_scaled['batch2'][()]
print(len(batch2))
batch3 = v2v_scaled['batch3'][()]
print(len(batch3))
batch4 = v2v_scaled['batch4'][()]
print(len(batch4))
batch5 = v2v_scaled['batch5'][()]
print(len(batch5))
batch6 = v2v_scaled['batch6'][()]
print(len(batch6))
batch7 = v2v_scaled['batch7'][()]
print(len(batch7))
batch8 = v2v_scaled['batch8'][()]
print(len(batch8))
batch9 = v2v_scaled['batch9'][()]
print(len(batch9))


55
65
707
216
326
541
738
251
195
0


In [18]:
#Re-save with the new batch9.
np.savez_compressed('../bopbot_data/v2v_scaled.npz',
                    batch0=batch0,
                    batch1=batch1,
                    batch2=batch2,
                    batch3=batch3,
                    batch4=batch4,
                    batch5=batch5,
                    batch6=batch6,
                    batch7=batch7,
                    batch8=batch8,
                    batch9=batch9)


In [15]:
#Re-save the new batch9 (unscaled).
np.savez_compressed('../bopbot_data/v2v_batch9_orig.npz',
                    batch9=batch9)


In [19]:
#Get the total number of videos. Decide how many to pull. 
total_num_vids = 0
total_num_vids = len(d2m_scaled) + len(yt_scaled) + len(batch9)
percent_d2m = len(d2m_scaled)/total_num_vids
percent_yt = len(yt_scaled)/total_num_vids
percent_v2v_b9 = len(batch9)/total_num_vids
print(f'#d2m: {percent_d2m*30}, #yt: {percent_yt*30}, #v2v_b9: {percent_v2v_b9*30}')

#d2m: 8.378886231636487, #yt: 0.24342330030748208, #v2v_b9: 21.37769046805603


In [31]:
print(f'#d2m: {percent_d2m*20}, #yt: {percent_yt*20}, #v2v_b9: {percent_v2v_b9*20}')

#d2m: 5.585924154424325, #yt: 0.16228220020498807, #v2v_b9: 14.251793645370688


Based on the above numbers, I'll pull 2 videos from each of the d2m sets, 1 from the youtube set and 13 from the batch9 v2v set.

In [32]:
#Save those to a new npz and delete them from the original.
ballet = [random.randint(0,1396),random.randint(0,1396)]
hiphop = [random.randint(0,1036),random.randint(0,1036)]
zumba = [random.randint(0,839),random.randint(0,839)]
yt = random.randint(0,94)
b9 = []
for i in range(13):
    b9.append(random.randint(0,8342))

In [33]:
ballet

[632, 1335]

In [35]:
hiphop

[20, 426]

In [36]:
zumba

[148, 647]

In [38]:
yt

55

In [40]:
b9

[5297, 4145, 8219, 1368, 1579, 1764, 4515, 6548, 2821, 1777, 4461, 3922, 7035]

In [41]:
batch9[5297]

array([[[243.11182208,  70.95508847],
        [243.11182208,  70.95508847],
        [268.28482858,  73.21920637],
        ...,
        [212.56257321, 133.55633999],
        [182.00165096, 180.90047715],
        [150.70271136, 238.18281559]],

       [[247.66703637,  71.71308028],
        [247.66703637,  71.71308028],
        [272.11110482,  73.25292948],
        ...,
        [224.01156994, 129.00034748],
        [182.02681359, 184.73894559],
        [148.4155061 , 238.94547675]],

       [[249.98874297,  72.48430193],
        [249.98874297,  72.48430193],
        [274.42502916,  73.20779239],
        ...,
        [236.20636626, 128.9977534 ],
        [185.83830354, 188.54680382],
        [147.67022531, 242.7328417 ]],

       ...,

       [[212.57243074,  32.0126757 ],
        [198.05125846,  54.90651861],
        [180.47996036,  55.67203326],
        ...,
        [212.58358531, 111.42178594],
        [212.55712563, 159.50186518],
        [208.75782789, 203.05111454]],

       [[213.34

In [42]:
#Remove the test videos
test_vids_scaled = {}
test_vids_scaled['ballet0632'] = d2m_scaled['ballet0632']
del d2m_scaled['ballet0632']
test_vids_scaled['ballet1335'] = d2m_scaled['ballet1335']
del d2m_scaled['ballet1335']
test_vids_scaled['hiphop0020'] = d2m_scaled['hiphop0020']
del d2m_scaled['hiphop0020']
test_vids_scaled['hiphop0426'] = d2m_scaled['hiphop0426']
del d2m_scaled['hiphop0426']
test_vids_scaled['zumba0148'] = d2m_scaled['zumba0148']
del d2m_scaled['zumba0148']
test_vids_scaled['zumba0647'] = d2m_scaled['zumba0647']
del d2m_scaled['zumba0647']
test_vids_scaled['yt0055'] = yt_scaled['yt0055']
del yt_scaled['yt0055']
for n in b9:
    key = f'v2v{n}'
    test_vids_scaled[key] = batch9[n]
    del batch9[n]

In [43]:
test_vids_scaled.keys()

dict_keys(['ballet0632', 'ballet1335', 'hiphop0020', 'hiphop0426', 'zumba0148', 'zumba0647', 'yt0055', 'v2v5297', 'v2v4145', 'v2v8219', 'v2v1368', 'v2v1579', 'v2v1764', 'v2v4515', 'v2v6548', 'v2v2821', 'v2v1777', 'v2v4461', 'v2v3922', 'v2v7035'])

In [44]:
#Save with the test videos.
np.savez_compressed('../bopbot_data/test_vids_scaled.npz',
                    test_vids_dict=test_vids_scaled)


In [45]:
#Create one dict with sequential keys.
all_vids_scaled = {}
new_key = 0
for key in d2m_scaled:
    all_vids_scaled[new_key] = d2m_scaled[key]
    new_key += 1
for key in yt_scaled:
    all_vids_scaled[new_key] = yt_scaled[key]
    new_key += 1
for key in batch0:
    all_vids_scaled[new_key] = batch0[key]
    new_key += 1
for key in batch1:
    all_vids_scaled[new_key] = batch1[key]
    new_key += 1
for key in batch2:
    all_vids_scaled[new_key] = batch2[key]
    new_key += 1
for key in batch3:
    all_vids_scaled[new_key] = batch3[key]
    new_key += 1
for key in batch4:
    all_vids_scaled[new_key] = batch4[key]
    new_key += 1
for key in batch5:
    all_vids_scaled[new_key] = batch5[key]
    new_key += 1
for key in batch6:
    all_vids_scaled[new_key] = batch6[key]
    new_key += 1
for key in batch7:
    all_vids_scaled[new_key] = batch7[key]
    new_key += 1
for key in batch8:
    all_vids_scaled[new_key] = batch8[key]
    new_key += 1
for key in batch9:
    all_vids_scaled[new_key] = batch9[key]
    new_key += 1

In [46]:
len(all_vids_scaled)

14782

In [47]:
all_vids_keys = all_vids_scaled.keys()

In [50]:
all_vids_keys = list(all_vids_keys)
all_vids_keys

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [53]:
type(all_vids_keys)

list

In [51]:
#Shuffle the remaining videos (include the other v2v videos, too).
random.shuffle(all_vids_keys)

In [54]:
all_vids_keys

[10293,
 14007,
 4293,
 3604,
 9351,
 8892,
 2782,
 4996,
 5258,
 4590,
 7509,
 12248,
 7978,
 2319,
 11416,
 4342,
 12815,
 1102,
 694,
 6207,
 854,
 4236,
 5456,
 4362,
 3687,
 12738,
 7832,
 2348,
 3654,
 537,
 422,
 14245,
 4317,
 9431,
 5610,
 11750,
 7507,
 12399,
 13948,
 11103,
 3515,
 13246,
 7418,
 9594,
 7494,
 3852,
 6601,
 6975,
 7132,
 13081,
 11395,
 10469,
 1419,
 11932,
 12732,
 8087,
 3747,
 758,
 4411,
 247,
 14654,
 13937,
 5238,
 4325,
 1389,
 10990,
 2181,
 2509,
 11667,
 11050,
 3451,
 11627,
 13306,
 11336,
 8994,
 12295,
 8331,
 3270,
 3064,
 8948,
 6837,
 11997,
 11616,
 1863,
 2848,
 13952,
 5611,
 9967,
 11296,
 8507,
 6988,
 6404,
 7743,
 3401,
 6884,
 9826,
 10655,
 386,
 12970,
 6956,
 13503,
 10861,
 13150,
 6732,
 98,
 12674,
 13849,
 1819,
 821,
 83,
 13417,
 461,
 14248,
 1141,
 6661,
 9252,
 3257,
 13431,
 13639,
 3288,
 14113,
 11782,
 13574,
 14158,
 9147,
 12960,
 8511,
 4121,
 6461,
 13419,
 7779,
 3029,
 8954,
 2855,
 6170,
 6830,
 5421,
 4117,


In [55]:
all_vids_scaled[12815].shape

(127, 14, 2)

In [66]:
vid = all_vids_scaled[12815]
vid.shape

(127, 14, 2)

In [64]:
zeros = np.zeros(28)
zeros = zeros.reshape(-1,14,2)
zeros

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]])

In [87]:
test_array = np.concatenate((test,vid,zeros), axis=0)
test_array.shape

(128, 14, 2)

In [85]:
np.zeros([1,14,2])

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]])

In [1]:
test = np.empty([0,14,2])
test.shape

NameError: name 'np' is not defined

In [90]:
#Save the dictionary of all videos.
np.savez_compressed('../bopbot_data/all_vids_dict.npz',
                    all_vids_scaled=all_vids_scaled)

In [96]:
all_vids_list = []
zeros_list = np.zeros([1,14,2]).tolist()


In [None]:
#Combine all videos into one list with one frame of zeros between each.
for key in all_vids_keys:
    all_vids_list.append(all_vids_scaled[key].tolist())
    all_vids_list.append(zeros_list)

In [None]:
all_vids_array = np.array(all_vids_list)

In [88]:
# all_vids_array = np.empty([0,14,2])
# zeros = np.zeros([1,14,2])

# #Combine all videos into one array with one frame of zeros between each.
# for key in all_vids_keys:
#     all_vids_array = np.concatenate((all_vids_array,all_vids_scaled[key],zeros), axis=0)

KeyboardInterrupt: 

In [None]:
all_vids_array.shape

In [None]:
#Save the huge array of all training data.
np.save('../bopbot_data/all_vids_scaled.npy',all_vids_array)