## Import Libraries

In [3]:
import pandas as pd
import time
import numpy as np
import math
from itertools import islice 
import json
from pandas.io.json import json_normalize
import os

## Read and parse json files for each video

In [9]:


# this finds our json files
path_to_json = "/mnt/sde/jagadish/userdata/dl_project/tv_json_files_new/jagtv_8306/"
json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

# here I define my pandas Dataframe with the columns I want to get from the json
jsons_data = pd.DataFrame(columns=['index','person_id','po', 'hl', 'hr'])
df = pd.DataFrame()


# we need both the json and an index number so use enumerate()
for index, js in enumerate(json_files):
    with open(os.path.join(path_to_json, js)) as json_file:
        json_text = json.load(json_file)
        index = (str(json_file).split('/')[-1].split('_')[2])
        person_id = json_text['people'][0]['person_id']
        po = json_text['people'][0]['pose_keypoints_2d']
        hl = json_text['people'][0]['hand_left_keypoints_2d']
        hr = json_text['people'][0]['hand_right_keypoints_2d'] 
        # here I push a list of data into a pandas DataFrame at row given by 'index'
        jsons_data.loc[index] = [index,person_id, po, hl, hr]


# now that we have the pertinent json data in our DataFrame let's look at it
print(jsons_data)

                     index person_id  \
000000001530  000000001530      [-1]   
000000000024  000000000024      [-1]   
000000000820  000000000820      [-1]   
000000000839  000000000839      [-1]   
000000001415  000000001415      [-1]   
000000000953  000000000953      [-1]   
000000001132  000000001132      [-1]   
000000001178  000000001178      [-1]   
000000000712  000000000712      [-1]   
000000000929  000000000929      [-1]   
000000000954  000000000954      [-1]   
000000001300  000000001300      [-1]   
000000000378  000000000378      [-1]   
000000001369  000000001369      [-1]   
000000001318  000000001318      [-1]   
000000000518  000000000518      [-1]   
000000000310  000000000310      [-1]   
000000000723  000000000723      [-1]   
000000000307  000000000307      [-1]   
000000000210  000000000210      [-1]   
000000001382  000000001382      [-1]   
000000000738  000000000738      [-1]   
000000001536  000000001536      [-1]   
000000000750  000000000750      [-1]   


## Preprocessing data

In [10]:
df = jsons_data

In [11]:
df['index'] = df['index'].astype(int)

In [12]:
df=df.sort_index()

In [13]:
df.head(5)

Unnamed: 0,index,person_id,po,hl,hr
0,0,[-1],"[730.418, 492.061, 0.799119, 883.546, 497.954,...","[1043.4, 329.055, 0.376755, 1045.58, 350.318, ...","[1086.79, 673.614, 0.567056, 1087.36, 653.644,..."
1,1,[-1],"[727.496, 494.931, 0.797698, 883.574, 497.942,...","[1042.32, 328.811, 0.239091, 1044.44, 350.569,...","[1085.33, 674.153, 0.52687, 1087.02, 653.821, ..."
2,2,[-1],"[727.534, 494.959, 0.795355, 883.587, 497.855,...","[1042.43, 329.041, 0.242665, 1046.25, 351.387,...","[1084.07, 675.66, 0.515069, 1083.52, 655.665, ..."
3,3,[-1],"[730.385, 494.956, 0.796999, 883.557, 494.988,...","[1044.26, 330.845, 0.226355, 1046.42, 352.958,...","[1083.65, 675.677, 0.537642, 1084.21, 655.344,..."
4,4,[-1],"[727.503, 494.948, 0.798814, 883.542, 494.978,...","[1047.29, 332.332, 0.356608, 1047.84, 354.131,...","[1083, 677.416, 0.552919, 1083, 656.519, 0.584..."


In [14]:
gf = pd.DataFrame()

In [15]:
gf=df

In [16]:
gf.head(1)

Unnamed: 0,index,person_id,po,hl,hr
0,0,[-1],"[730.418, 492.061, 0.799119, 883.546, 497.954,...","[1043.4, 329.055, 0.376755, 1045.58, 350.318, ...","[1086.79, 673.614, 0.567056, 1087.36, 653.644,..."


In [17]:
gf = gf.join(gf['po'].apply(pd.Series).add_prefix('po'))
gf = gf.join(gf['hl'].apply(pd.Series).add_prefix('hl'))
gf = gf.join(gf['hr'].apply(pd.Series).add_prefix('hr'))

In [18]:
gf = gf.drop(['po', 'hl', 'hr', 'person_id', 'index'], axis=1)

In [19]:
gf.head(1)

Unnamed: 0,po0,po1,po2,po3,po4,po5,po6,po7,po8,po9,...,hr53,hr54,hr55,hr56,hr57,hr58,hr59,hr60,hr61,hr62
0,730.418,492.061,0.799119,883.546,497.954,0.737814,886.41,609.801,0.746752,1021.86,...,0.633117,1141.57,670.191,0.480044,1143.85,664.485,0.303567,1141.57,662.773,0.171271


In [20]:
gf = gf.drop(gf.iloc[:, 2::3],axis=1)


In [21]:
gf.head(1)

Unnamed: 0,po0,po1,po3,po4,po6,po7,po9,po10,po12,po13,...,hr48,hr49,hr51,hr52,hr54,hr55,hr57,hr58,hr60,hr61
0,730.418,492.061,883.546,497.954,886.41,609.801,1021.86,686.256,1086.56,668.6,...,1135.29,650.791,1126.16,682.173,1141.57,670.191,1143.85,664.485,1141.57,662.773


In [21]:
mf = pd.DataFrame()

In [22]:
n = gf.shape[1]
i=0
j=0
while i < n:

    col = gf.columns[i][0:2] + "_" + str(j) #col name
    X = gf.columns[i]
    Y = gf.columns[i+1]
    mf[col] = gf[[X, Y]].values.tolist()
    i = i+2
    j= j+1

In [23]:
mf.head(1)

Unnamed: 0,po_0,po_1,po_2,po_3,po_4,po_5,po_6,po_7,po_8,po_9,...,hr_57,hr_58,hr_59,hr_60,hr_61,hr_62,hr_63,hr_64,hr_65,hr_66
0,"[668.524, 506.788]","[812.999, 518.557]","[809.915, 639.184]","[921.712, 689.223]","[1054.22, 689.168]","[821.661, 391.958]","[951.152, 344.82]","[1062.95, 383.175]","[1107.13, 512.647]","[1083.64, 580.34]",...,"[1139.53, 733.978]","[1151.95, 743.16]","[1100.1, 721.015]","[1122.79, 738.299]","[1137.91, 748.561]","[1149.25, 756.662]","[1106.59, 730.197]","[1122.25, 742.079]","[1134.67, 748.561]","[1143.85, 757.202]"


In [24]:
hf=pd.DataFrame()

In [25]:
hf = mf

In [26]:
hf.head(1)

Unnamed: 0,po_0,po_1,po_2,po_3,po_4,po_5,po_6,po_7,po_8,po_9,...,hr_57,hr_58,hr_59,hr_60,hr_61,hr_62,hr_63,hr_64,hr_65,hr_66
0,"[668.524, 506.788]","[812.999, 518.557]","[809.915, 639.184]","[921.712, 689.223]","[1054.22, 689.168]","[821.661, 391.958]","[951.152, 344.82]","[1062.95, 383.175]","[1107.13, 512.647]","[1083.64, 580.34]",...,"[1139.53, 733.978]","[1151.95, 743.16]","[1100.1, 721.015]","[1122.79, 738.299]","[1137.91, 748.561]","[1149.25, 756.662]","[1106.59, 730.197]","[1122.25, 742.079]","[1134.67, 748.561]","[1143.85, 757.202]"


In [27]:
hf.shape[1]

67

## Euclidean distances between body key points

In [28]:
def eudis5(v1, v2): # Function to calculate euclidean distance between two points
    dist = [(a - b)**2 for a, b in zip(v1, v2)]
    dist = math.sqrt(sum(dist))
    return dist

In [29]:
hf['pd'] = ''
hf['pd'] = hf['pd'].apply(list)

In [30]:
n=hf.index
m = hf.shape[1]
for i in n[:] :
    
    ear =[]
    I=1
    for j in range(25) :
        
        for k in range(I,25) :

            X = hf.columns[j]
            Y = hf.columns[k]

            a = np.array(hf[X][i])
            b = np.array(hf[Y][i])
            x = eudis5(a, b)
            ear.append(x)
        I = I + 1
     
    hf.loc[i,'pd'].append(ear[:])

## Euclidean distances between left hand key points

In [34]:
hf['hld'] = ''
hf['hld'] = hf['hld'].apply(list)

In [36]:
n=hf.index
m = hf.shape[1]
for i in n[:] :
    
    ear =[]
    I=26
    for j in range(25,46) :
        
        for k in range(I,46) :

            X = hf.columns[j]
            Y = hf.columns[k]

            a = np.array(hf[X][i])
            b = np.array(hf[Y][i])
            x = eudis5(a, b)
            ear.append(x)
        I = I + 1
     
    hf.loc[i,'hld'].append(ear[:])

## Euclidean distances between right hand key points

In [37]:
hf['hrd'] = ''
hf['hrd'] = hf['hrd'].apply(list)

In [39]:
n=hf.index
m = hf.shape[1]
for i in n[:] :
    
    ear =[]
    I=47
    for j in range(46,67) :
        
        for k in range(I,67) :

            X = hf.columns[j]
            Y = hf.columns[k]

            a = np.array(hf[X][i])
            b = np.array(hf[Y][i])
            x = eudis5(a, b)
            ear.append(x)
        I = I + 1
     
    hf.loc[i,'hrd'].append(ear[:])

In [40]:
hf.head(5)

Unnamed: 0,po_0,po_1,po_2,po_3,po_4,po_5,po_6,po_7,po_8,po_9,...,hr_60,hr_61,hr_62,hr_63,hr_64,hr_65,hr_66,pd,hld,hrd
0,"[668.524, 506.788]","[812.999, 518.557]","[809.915, 639.184]","[921.712, 689.223]","[1054.22, 689.168]","[821.661, 391.958]","[951.152, 344.82]","[1062.95, 383.175]","[1107.13, 512.647]","[1083.64, 580.34]",...,"[1122.79, 738.299]","[1137.91, 748.561]","[1149.25, 756.662]","[1106.59, 730.197]","[1122.25, 742.079]","[1134.67, 748.561]","[1143.85, 757.202]","[[144.95356148091017, 193.70109885336217, 312....","[[25.356668077647758, 50.80675817251097, 69.58...","[[18.37067361312592, 35.96953745879976, 52.335..."
1,"[668.482, 506.847]","[815.763, 521.42]","[809.971, 642.073]","[927.616, 689.123]","[1060.07, 689.155]","[821.662, 391.941]","[951.15, 342.041]","[1060.1, 383.125]","[1098.38, 515.581]","[1080.66, 580.393]",...,"[1123.22, 737.217]","[1137.8, 748.555]","[1149.68, 757.193]","[1104.86, 726.959]","[1122.68, 740.996]","[1135.64, 749.095]","[1147.52, 759.893]","[[148.00022057416, 195.7171637772222, 316.8200...","[[24.79238439521294, 49.94466522062187, 68.376...","[[17.65285271563777, 35.41657415674191, 51.966..."
2,"[668.502, 506.862]","[815.84, 521.49]","[812.732, 642.142]","[930.541, 689.134]","[1060.06, 689.131]","[821.692, 391.961]","[951.136, 342.076]","[1062.97, 383.135]","[1104.2, 518.498]","[1089.45, 589.097]",...,"[1123.92, 737.172]","[1138.17, 749.314]","[1151.37, 758.289]","[1105.44, 728.725]","[1122.86, 741.395]","[1134.48, 749.314]","[1150.31, 758.817]","[[148.06236735916394, 197.74471244511196, 319....","[[25.06168791203013, 51.408595156841095, 70.04...","[[18.181648880120875, 35.790999762510125, 53.6..."
3,"[668.626, 506.815]","[815.892, 521.487]","[812.788, 642.116]","[933.533, 686.32]","[1065.89, 689.148]","[821.715, 391.983]","[951.156, 342.054]","[1063.04, 383.173]","[1107.13, 515.588]","[1086.52, 586.199]",...,"[1124.13, 735.948]","[1138.16, 748.898]","[1151.11, 757.532]","[1105.25, 726.774]","[1123.05, 740.265]","[1136.0, 748.898]","[1149.49, 760.23]","[[147.99507539104135, 197.70949103419392, 319....","[[25.49485063694239, 51.68668106969148, 70.387...","[[18.58155224947574, 34.54875974619057, 51.357..."
4,"[668.727, 509.745]","[818.74, 524.36]","[812.841, 642.179]","[942.354, 689.12]","[1065.92, 689.214]","[821.714, 394.872]","[951.226, 342.079]","[1063.06, 383.135]","[1110.08, 518.502]","[1089.5, 586.228]",...,"[1124.89, 737.255]","[1140.05, 749.889]","[1151.68, 758.48]","[1106.19, 728.158]","[1124.39, 742.308]","[1136.52, 749.889]","[1145.11, 758.48]","[[150.72325100660487, 195.72329792847862, 327....","[[22.771730193377888, 46.54963411241803, 65.78...","[[18.11280972129947, 33.776051930324726, 50.78..."


## Filtering the dataframe with desired columns

In [41]:
df= pd.DataFrame()

In [42]:
df=hf.filter(items=['pd',  'hld', 'hrd'])

In [43]:
df.head(1)

Unnamed: 0,pd,hld,hrd
0,"[[144.95356148091017, 193.70109885336217, 312....","[[25.356668077647758, 50.80675817251097, 69.58...","[[18.37067361312592, 35.96953745879976, 52.335..."


In [44]:
df = df.join(df['pd'].apply(pd.Series).add_prefix('p_'))
df = df.join(df['hld'].apply(pd.Series).add_prefix('hl_'))
df = df.join(df['hrd'].apply(pd.Series).add_prefix('hr_'))

In [45]:
df.head(1)

Unnamed: 0,pd,hld,hrd,p_0,hl_0,hr_0
0,"[[144.95356148091017, 193.70109885336217, 312....","[[25.356668077647758, 50.80675817251097, 69.58...","[[18.37067361312592, 35.96953745879976, 52.335...","[144.95356148091017, 193.70109885336217, 312.0...","[25.356668077647758, 50.80675817251097, 69.582...","[18.37067361312592, 35.96953745879976, 52.3354..."


In [46]:
df = df.join(df['p_0'].apply(pd.Series).add_prefix('pd_'))
df = df.join(df['hl_0'].apply(pd.Series).add_prefix('hld_'))
df = df.join(df['hr_0'].apply(pd.Series).add_prefix('hrd_'))

In [47]:
df.head(1)

Unnamed: 0,pd,hld,hrd,p_0,hl_0,hr_0,pd_0,pd_1,pd_2,pd_3,...,hrd_200,hrd_201,hrd_202,hrd_203,hrd_204,hrd_205,hrd_206,hrd_207,hrd_208,hrd_209
0,"[[144.95356148091017, 193.70109885336217, 312....","[[25.356668077647758, 50.80675817251097, 69.58...","[[18.37067361312592, 35.96953745879976, 52.335...","[144.95356148091017, 193.70109885336217, 312.0...","[25.356668077647758, 50.80675817251097, 69.582...","[18.37067361312592, 35.96953745879976, 52.3354...",144.953561,193.701099,312.06841,426.642554,...,50.202309,30.686542,16.679406,5.426933,19.657506,33.551794,46.017145,14.009737,26.367881,12.607112


In [48]:
df = df.drop(['pd', 'hld', 'hrd','p_0', 'hl_0', 'hr_0'], axis=1)

In [49]:
df.head()

Unnamed: 0,pd_0,pd_1,pd_2,pd_3,pd_4,pd_5,pd_6,pd_7,pd_8,pd_9,...,hrd_200,hrd_201,hrd_202,hrd_203,hrd_204,hrd_205,hrd_206,hrd_207,hrd_208,hrd_209
0,144.953561,193.701099,312.06841,426.642554,191.407601,325.748706,413.342525,438.645131,421.581772,616.585114,...,50.202309,30.686542,16.679406,5.426933,19.657506,33.551794,46.017145,14.009737,26.367881,12.607112
1,148.000221,195.717164,316.820091,431.946025,191.487601,327.203631,410.696714,429.986713,418.688091,618.228075,...,54.064102,31.485597,16.207998,3.457687,22.684571,37.913202,53.893636,15.282519,31.210931,16.054009
2,148.062367,197.744712,319.198245,431.902368,191.4926,327.164185,413.416707,435.853351,428.905367,610.521804,...,54.622294,33.139513,19.126493,1.184223,21.540318,35.598153,54.02634,14.061826,32.511976,18.463367
3,147.995075,197.709491,319.996506,437.108692,191.3704,327.06175,413.339748,438.59175,425.367153,610.500486,...,55.219509,32.947123,17.402817,3.146999,22.334885,37.881841,55.466039,15.563778,33.131176,17.618011
4,150.723251,195.723298,327.180571,435.857088,191.313429,328.508101,414.160122,441.439866,427.667589,612.566824,...,54.669587,31.72188,17.425007,6.57,23.05347,37.311463,49.337512,14.304141,26.284063,12.148802
5,150.711865,195.65874,322.300536,435.852323,195.303574,329.521255,414.14447,432.822632,428.718032,611.545308,...,56.051084,33.842845,19.124147,5.61526,22.267009,37.04252,54.790509,14.778507,32.557106,17.906065
6,150.792838,193.765516,322.278604,435.896622,197.673158,327.065382,414.09404,432.773131,427.660344,612.604931,...,56.660621,33.411246,19.126493,6.444488,23.330611,37.692149,54.680224,14.365212,31.392967,17.184906
7,153.411512,193.837254,325.683124,438.552297,197.698948,328.432403,414.137934,444.507559,430.650422,619.735165,...,57.520749,33.788621,18.933072,6.209809,23.854098,38.6964,56.347283,14.879278,32.511431,17.903668
8,153.203814,191.90693,320.743826,436.208306,197.666898,328.496098,413.188886,444.398167,430.121405,614.262746,...,55.075216,32.487923,17.217109,3.816963,22.663911,37.880673,54.515947,15.270859,31.852687,16.787422
9,153.377865,191.900484,313.539363,433.696477,197.296884,327.678373,413.359563,441.725104,427.38741,614.423043,...,52.844624,30.844524,16.389484,7.33,22.095422,36.6384,46.922094,14.549554,24.831906,10.364771


In [51]:
df.drop([col for col, val in df.sum().iteritems() if val == 0], axis=1, inplace=True)

In [52]:
df.head(1)

Unnamed: 0,pd_0,pd_1,pd_2,pd_3,pd_4,pd_5,pd_6,pd_7,pd_8,pd_9,...,hrd_200,hrd_201,hrd_202,hrd_203,hrd_204,hrd_205,hrd_206,hrd_207,hrd_208,hrd_209
0,144.953561,193.701099,312.06841,426.642554,191.407601,325.748706,413.342525,438.645131,421.581772,616.585114,...,50.202309,30.686542,16.679406,5.426933,19.657506,33.551794,46.017145,14.009737,26.367881,12.607112


## Save the processed data in a csv file for each video

In [53]:
df.to_csv("/mnt/sde/jagadish/userdata/dl_project/tv_test_data/tv_8361.csv", index=True)