## Data Cleaning from json files



In [2]:
import os
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
 

In [None]:
class PoseDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, root_dir):
        """
        Args:
            root_dir (string): Directory with all the images.
        """
        data = defaultdict(lambda: []) # 
        self.N = 0
        for subdir, dirs, files in os.walk(root):
            # get the name of the pose = the name of the directory with the json files
            poseName = subdir.split(root,1)[1]
            #for each file within the pose
            for fileName in files:
                #only look for json files
                if fileName.endswith(".json"):
                    p = os.path.join(root,subdir,fileName)
                    #open the file, and extract the poses of the first person
                    with open(p) as f:
                        jsonData = json.load(f)
                        people = jsonData["people"]
                        if len(people) > 0:
                            data[poseName].append(jsonData["people"][0]['pose_keypoints_2d'])
                            self.N +=1

    def __len__(self):
        return self.N

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks])
        landmarks = landmarks.astype('float').reshape(-1, 2)
        sample = {'image': image, 'landmarks': landmarks}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [3]:
root = "/content/drive/Othercomputers/My PC/cs7643-project/output/"


data = defaultdict(lambda: [])

for subdir, dirs, files in os.walk(root):
    # get the name of the pose = the name of the directory with the json files
    poseName = subdir.split(root,1)[1]
    #for each file within the pose
    for fileName in files:
        #only look for json files
        if fileName.endswith(".json"):
            p = os.path.join(root,subdir,fileName)
            #open the file, and extract the poses of the first person
            with open(p) as f:
                jsonData = json.load(f)
                people = jsonData["people"]
                if len(people) > 0:
                    data[poseName].append(jsonData["people"][0]['pose_keypoints_2d'])

In [8]:
# convert the map data into pandas dataframe

num_pose_points = 75
N = 9050
y_map = {name:idx for idx,name in enumerate(data.keys())}
df = pd.DataFrame(columns = [f"x{i}" for i in range(num_pose_points)]+["Y"], index=[i for i in range(N)])

c = 0
for poseName,listOfPosePoints in data.items():
    for l in listOfPosePoints:
        df.loc[c] = l+[y_map[poseName]]
        c+=1
np_data = df.to_numpy()

In [10]:
df.to_csv("/content/drive/Othercomputers/My PC/cs7643-project/output/pose_df.csv")

In [83]:

x_data = torch.tensor(np_data[:,0:-1].astype("float"))
y_data = torch.tensor(np_data[:,-1].astype(int))

#need to stratify to ensure class balance
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42,stratify=y_data)

1079

## The DNN

In [65]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [70]:

class DNN(nn.Module):

    def __init__(self,numClasses = 107,features=75,hidden_dim=256):
        super(DNN, self).__init__()

        self.fc1 = nn.Linear(features,hidden_dim)
        self.fc2 = nn.Linear(hidden_dim,numClasses)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)

        out = x
        return out


## Training and testing

In [96]:
import torch.optim as optim

model = DNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i in range(len(X_train)):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = X_train[i,:].unsqueeze(dim=0), y_train[i].unsqueeze(dim=0)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0

print('Finished Training')

[1,  1000] loss: 4.684
[1,  2000] loss: 4.545
[1,  3000] loss: 4.388
[1,  4000] loss: 4.325
[2,  1000] loss: 4.158
[2,  2000] loss: 4.193
[2,  3000] loss: 4.112
[2,  4000] loss: 4.111
[3,  1000] loss: 3.972
[3,  2000] loss: 3.991
[3,  3000] loss: 3.905
[3,  4000] loss: 3.970
[4,  1000] loss: 3.861
[4,  2000] loss: 3.939
[4,  3000] loss: 3.857
[4,  4000] loss: 3.889
[5,  1000] loss: 3.761
[5,  2000] loss: 3.812
[5,  3000] loss: 3.779
[5,  4000] loss: 3.821
[6,  1000] loss: 3.752
[6,  2000] loss: 3.823
[6,  3000] loss: 3.701
[6,  4000] loss: 3.732
[7,  1000] loss: 3.604
[7,  2000] loss: 3.717
[7,  3000] loss: 3.665
[7,  4000] loss: 3.698
[8,  1000] loss: 3.644
[8,  2000] loss: 3.734
[8,  3000] loss: 3.656
[8,  4000] loss: 3.693
[9,  1000] loss: 3.564
[9,  2000] loss: 3.636
[9,  3000] loss: 3.562
[9,  4000] loss: 3.649
[10,  1000] loss: 3.538
[10,  2000] loss: 3.598
[10,  3000] loss: 3.541
[10,  4000] loss: 3.607
Finished Training


In [95]:
i

4312

9

## Testing


In [102]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    images, labels = X_test,y_test
    # calculate outputs by running images through the network
    outputs = model(images.float())
    # the class with the highest energy is what we choose as prediction
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 13 %


In [101]:
outputs

tensor([[ 0.1674, -0.9808, -0.2520,  ...,  2.2150, -0.5100, -3.3449],
        [-0.2813,  0.1347,  1.4681,  ...,  5.3942, -0.7866, -4.8951],
        [ 1.6224,  0.7240,  0.3740,  ...,  2.8399,  0.6893, -5.0767],
        ...,
        [-1.2346, -4.2762, -2.8379,  ..., -4.0123, -1.7167,  1.4468],
        [-0.3394, -1.2395,  0.6135,  ...,  0.0387, -1.3142, -2.5738],
        [-2.1884, -2.6262, -2.8384,  ..., -1.1630, -0.8836,  2.0409]])

In [None]:
from time import sleep
while True:
    sleep(10)
