## Data Cleaning from json files



In [96]:
import os
import json
import numpy as np
import pandas as pd
from collections import defaultdict
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import time

pd.options.display.max_rows = 4000

 

In [108]:
class PoseDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, x,y):
        """
        Args:
            root_dir (string): Directory with all the images.
        """
        self.x  = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        X = torch.FloatTensor(self.x.iloc[idx])
        Y = torch.from_numpy(np.array(self.y.iloc[idx])).type(torch.LongTensor)

        sample = (X,Y)

        return sample

In [101]:
torch.FloatTensor(y_train.loc[[0,3]])

tensor([0., 0.])

In [109]:
BATCH_SIZE=32

csv_file_path = "/content/drive/Othercomputers/My PC/cs7643-project/output/pose_df.csv"
pose_data = pd.read_csv(csv_file_path,index_col=0)
#[27,33] only have one sample each
pose_data = pose_data[pose_data["Y"] != 27]
pose_data = pose_data[pose_data["Y"] != 33]

X_train, X_test, y_train, y_test = train_test_split(pose_data.iloc[:,0:-1], pose_data.iloc[:,-1], test_size=0.2, random_state=42,stratify=pose_data.iloc[:,-1])

train_dataset = PoseDataset(x=X_train,y=y_train)
test_dataset = PoseDataset(X_test,y_test)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE,shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=100,shuffle=False, num_workers=2)

## The DNN

In [57]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [119]:

class DNN(nn.Module):

    def __init__(self,numClasses = 80,features=75,hidden_dim=256):
        super(DNN, self).__init__()

        self.fc1 = nn.Linear(features,hidden_dim)
        self.fc2 = nn.Linear(hidden_dim,numClasses)

    def forward(self, x):
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)

        out = x
        return out


## Training and testing

In [116]:
class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target):
    """Computes the precision@k for the specified values of k"""
    batch_size = target.shape[0]

    _, pred = torch.max(output, dim=-1)

    correct = pred.eq(target).sum() * 1.0

    acc = correct / batch_size

    return acc

def train(epoch, data_loader, model, optimizer, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    for idx, (data, target) in enumerate(data_loader):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        #############################################################################
        # TODO: Complete the body of training loop                                  #
        #       1. forward data batch to the model                                  #
        #       2. Compute batch loss                                               #
        #       3. Compute gradients and update model parameters                    #
        #############################################################################
        model.train()
        out = model(data)
        loss = criterion(out, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        batch_acc = accuracy(out, target)

        losses.update(loss, out.shape[0])
        acc.update(batch_acc, out.shape[0])

        iter_time.update(time.time() - start)
        if idx % 100 == 0:
            print(('Epoch: [{0}][{1}/{2}]\t'
                   'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t'
                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                   'Prec @1 {top1.val:.4f} ({top1.avg:.4f})\t')
                  .format(epoch, idx, len(data_loader), iter_time=iter_time, loss=losses, top1=acc))
            
def validate(epoch, val_loader, model, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    num_class = 10
    cm = torch.zeros(num_class, num_class)
    # evaluation loop
    for idx, (data, target) in enumerate(val_loader):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        #############################################################################
        # TODO: Complete the body of training loop                                  #
        #       HINT: torch.no_grad()                                               #
        #############################################################################
        model.eval()
        with torch.no_grad():
            out = model(data)
            loss = criterion(out, target)

        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        batch_acc = accuracy(out, target)

        # update confusion matrix
        _, preds = torch.max(out, 1)
        # for t, p in zip(target.view(-1), preds.view(-1)):
        #     cm[t.long(), p.long()] += 1

        losses.update(loss, out.shape[0])
        acc.update(batch_acc, out.shape[0])

        iter_time.update(time.time() - start)
        if idx % 100 == 0:
            print(('Epoch: [{0}][{1}/{2}]\t'
                   'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t')
                  .format(epoch, idx, len(val_loader), iter_time=iter_time, loss=losses, top1=acc))
    # cm = cm / cm.sum(1)
    # per_cls_acc = cm.diag().detach().numpy().tolist()
    # for i, acc_i in enumerate(per_cls_acc):
    #     print("Accuracy of Class {}: {:.4f}".format(i, acc_i))

    print("* Prec @1: {top1.avg:.4f}".format(top1=acc))
    return acc.avg, cm


# def adjust_learning_rate(optimizer, epoch, args):
#     epoch += 1
#     if epoch <= args.warmup:
#         lr = args.learning_rate * epoch / args.warmup
#     elif epoch > args.steps[1]:
#         lr = args.learning_rate * 0.01
#     elif epoch > args.steps[0]:
#         lr = args.learning_rate * 0.1
#     else:
#         lr = args.learning_rate
#     for param_group in optimizer.param_groups:
#         param_group['lr'] = lr

In [None]:
import torch.optim as optim

model = DNN(numClasses=80)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)


for epoch in range(1000):  # loop over the dataset multiple times

    # train loop
    train(epoch, train_dataloader, model, optimizer, criterion)

    # validation loop
    acc, cm = validate(epoch, test_dataloader, model, criterion)


print('Finished Training')

Epoch: [0][0/227]	Time 0.006 (0.006)	Loss 4.3202 (4.3202)	Prec @1 0.0312 (0.0312)	
Epoch: [0][100/227]	Time 0.002 (0.002)	Loss 4.3860 (4.3665)	Prec @1 0.0312 (0.0114)	
Epoch: [0][200/227]	Time 0.002 (0.002)	Loss 4.2555 (4.3119)	Prec @1 0.0938 (0.0261)	
Epoch: [0][0/19]	Time 0.002 (0.002)	
* Prec @1: 0.0680
Epoch: [1][0/227]	Time 0.009 (0.009)	Loss 4.1268 (4.1268)	Prec @1 0.1250 (0.1250)	
Epoch: [1][100/227]	Time 0.002 (0.002)	Loss 4.1160 (4.1404)	Prec @1 0.0312 (0.0770)	
Epoch: [1][200/227]	Time 0.002 (0.002)	Loss 4.0430 (4.1127)	Prec @1 0.1250 (0.0779)	
Epoch: [1][0/19]	Time 0.003 (0.003)	
* Prec @1: 0.0834
Epoch: [2][0/227]	Time 0.004 (0.004)	Loss 4.1043 (4.1043)	Prec @1 0.1250 (0.1250)	
Epoch: [2][100/227]	Time 0.002 (0.002)	Loss 3.9016 (4.0241)	Prec @1 0.1250 (0.0894)	
Epoch: [2][200/227]	Time 0.002 (0.002)	Loss 3.9490 (4.0125)	Prec @1 0.0625 (0.0877)	
Epoch: [2][0/19]	Time 0.002 (0.002)	
* Prec @1: 0.0878
Epoch: [3][0/227]	Time 0.005 (0.005)	Loss 3.9862 (3.9862)	Prec @1 0.1250 (0.

In [69]:
y_train.loc[[0]].values

array([0.])

## Testing


In [102]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    images, labels = X_test,y_test
    # calculate outputs by running images through the network
    outputs = model(images.float())
    # the class with the highest energy is what we choose as prediction
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 13 %


In [101]:
outputs

tensor([[ 0.1674, -0.9808, -0.2520,  ...,  2.2150, -0.5100, -3.3449],
        [-0.2813,  0.1347,  1.4681,  ...,  5.3942, -0.7866, -4.8951],
        [ 1.6224,  0.7240,  0.3740,  ...,  2.8399,  0.6893, -5.0767],
        ...,
        [-1.2346, -4.2762, -2.8379,  ..., -4.0123, -1.7167,  1.4468],
        [-0.3394, -1.2395,  0.6135,  ...,  0.0387, -1.3142, -2.5738],
        [-2.1884, -2.6262, -2.8384,  ..., -1.1630, -0.8836,  2.0409]])

In [None]:
from time import sleep
while True:
    sleep(10)
