In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class data():
    def __init__(self, seq_length, batch_size):
        np.random.seed(100)
        self.seq_length = seq_length
        self.acc_num = 334
        self.no_acc_num = 392
        self.split = 0.95
        self.train_index = 0 
        self.train_batch_size = batch_size
        self.valid_index = 0
        self.valid_batch_size = 10
        self.read_annotation()
        self.shuffle_data()
        
    def shuffle_data(self):
        #img_range = np.arange(0,300-self.seq_length-60,60)
        acc_list = np.arange(1,self.acc_num+1)
        no_acc_list = np.arange(1,self.no_acc_num+1)
        list1 = np.array(np.meshgrid(1,acc_list)).T.reshape(-1,2)
        list2 = np.array(np.meshgrid(0,no_acc_list)).T.reshape(-1,2)
        shuffle_list = np.concatenate([list1, list2], axis=0)
        np.random.shuffle(shuffle_list)
        self.train = shuffle_list[:int(shuffle_list.shape[0]*self.split)]
        self.valid = shuffle_list[int(shuffle_list.shape[0]*self.split):] 
        
    def read_annotation(self):
        annotation_file = '/media/user/Hard_Disk/Dataset/child_accident_2/annotation/accident_frame.txt'
        w = open(annotation_file, "r")
        ann = w.read()
        annotation_data = []
        for i in ann.split("\n"):
            b = i.split(" ")
            if (len(b) > 1):
                annotation_data.append(b[1])
        self.annotation = np.array(annotation_data).astype("int32")
        
    def read_data(self, is_accident, dir_index):
        data = []
        label = []
        img_path = '/media/user/Hard_Disk/Dataset/child_accident_2/image feature/' 
        act_path = '/media/user/Hard_Disk/Dataset/child_accident_2/action feature/' 
        if (is_accident):
            acc_dir = "accident/"
            range_start = self.annotation[dir_index-1] - self.seq_length
            range_end = self.annotation[dir_index-1]
            label.append([0,1])
            
        else:
            acc_dir = "no_accident/"
            range_start = 0
            range_end = self.seq_length
            label.append([1,0])
            
        dir_name = "%04d"%dir_index
        img_npy = []
        act_npy = []
        for j in range(range_start, range_end):
            img_feature = np.load(img_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            img_npy.append(img_feature)
            act_feature = np.load(act_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            act_npy.append(act_feature)
        
            
        return np.array(img_npy), np.array(act_npy) ,np.array(label)
        
    def next_batch(self, mode="train"):
        batch_img = []
        batch_act = []
        batch_y = []
        if (mode == "valid"):
            batch_size = self.valid_batch_size
        elif (mode == "train"):
            batch_size = self.train_batch_size
        for i in range(batch_size):
            if (mode == "train"):
                img, act, label = self.read_data(self.train[self.train_index+i][0], self.train[self.train_index+i][1])
            elif (mode == "valid"):
                img, act, label = self.read_data(self.valid[self.valid_index+i][0], self.valid[self.valid_index+i][1])
            batch_img.append(img)
            batch_act.append(act)
            batch_y.append(label)
        if (mode == "valid"):
            self.valid_index += self.valid_batch_size
        elif (mode == "train"):
            self.train_index += self.train_batch_size
        return np.array(batch_img), np.array(batch_act), np.squeeze(np.array(batch_y))
    
    def has_next(self, mode="train"):
        if (mode == "train"):
            if (self.train_index + self.train_batch_size >= self.train.shape[0]):
                return False
        elif (mode == "valid"):
            if (self.valid_index + self.valid_batch_size >= self.valid.shape[0]):
                return False
        return True
    def display_shape(self):
        print("train shape:",self.train.shape, " valid shape:",self.valid.shape)
        
    def reset_batch(self, mode="train"):
        if (mode == "train"):
            self.train_index = 0
            np.random.shuffle(self.train)
        elif (mode == "valid"):
            self.valid_index = 0
        
   

In [3]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, img_input_size, act_input_size, hidden_size, embedding_size, embedding_size2, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.img = nn.Sequential(
            nn.Linear(img_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
        self.act = nn.Sequential(
            nn.Linear(act_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
            
        
        self.img_fc1 = nn.Linear(img_input_size, embedding_size)
        self.act_fc1 = nn.Linear(act_input_size, embedding_size)
        self.img_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.act_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.lstm = nn.LSTM(embedding_size2*2, hidden_size, num_layers,dropout=0.25, batch_first=True)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x, y):
        # Set initial hidden and cell states 
        x1 = self.img_fc1(x)
        x1 = self.img_fc2(x1)
        x2 = self.act_fc1(y)
        x2 = self.act_fc2(x2)
#         x1 = self.img(x)
#         x = self.act(y)
        x = torch.cat((x1, x2), 2)
#         print(x.shape)
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc2(out[:, -1, :])
        return out

In [4]:
model = RNN(img_input_size=20*4096, act_input_size=1024, hidden_size=1024, embedding_size=1024, embedding_size2=512, num_layers=1, num_classes=2).to(device)

  "num_layers={}".format(dropout, num_layers))


In [5]:
w = torch.from_numpy(np.array([1,1])).type(torch.FloatTensor).to(device)
criterion = nn.CrossEntropyLoss(weight=w)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [6]:
sequence_length = 100
num_epochs = 20
train_data = data(seq_length=sequence_length, batch_size=50)
train_data.display_shape()

train shape: (689, 2)  valid shape: (37, 2)


In [7]:
def evaluation(model):
    i = 0
    acc = 0
    while(train_data.has_next("valid")):
        img, act, labels = train_data.next_batch("valid")
        labels = torch.from_numpy(labels).to(device)
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
        outputs = F.softmax(outputs)
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        loss = criterion(outputs, target)
        correct = (predict == target).squeeze()
        acc += torch.nonzero(correct).size(0) / predict.shape[0]
        i += 1
    train_data.reset_batch("valid")
    print("validation accuracy:",acc/i)

In [8]:

# Loss and optimizer

# Train the model

for epoch in range(100):
    i = 0
    acc = 0
    print("epoch:",epoch)
    while (train_data.has_next("train")):
        img, act, labels = train_data.next_batch("train")
        labels = torch.from_numpy(labels).to(device)
        
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
        outputs = F.softmax(outputs)
#         outputs, torch.max(labels, 1)[1]
        
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        loss = criterion(outputs, target)
        
        
        correct = (predict == target).squeeze()
#         print(predict)
#         print(target)
#         print("================")
#         print(correct)
        acc += torch.nonzero(correct).size(0) / predict.shape[0]
        
    
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        i += 1
        if (i+1) % 5 == 0:
            
            print ('Epoch [{}/{}], Step {}, Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, loss.item()))
            print ('train accuracy:',acc/i)
            evaluation(model)
            print("====================================")
    if (epoch % 10 == 0):
        path = "pytorch/model-"+str(epoch)+".ckpt"
        torch.save(model, path)
    train_data.reset_batch("train")
# # Test the model
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.reshape(-1, sequence_length, input_size).to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# # Save the model checkpoint
# torch.save(model.state_dict(), 'model.ckpt')

epoch: 0




Epoch [1/20], Step 5, Loss: 0.6800
train accuracy: 0.48


  # Remove the CWD from sys.path while we load stuff.


validation accuracy: 0.5333333333333333
Epoch [1/20], Step 10, Loss: 0.6690
train accuracy: 0.5244444444444444
validation accuracy: 0.5


  "type " + obj.__name__ + ". It won't be checked "


epoch: 1
Epoch [2/20], Step 5, Loss: 0.7010
train accuracy: 0.6
validation accuracy: 0.4666666666666666
Epoch [2/20], Step 10, Loss: 0.6626
train accuracy: 0.6199999999999999
validation accuracy: 0.5
epoch: 2
Epoch [3/20], Step 5, Loss: 0.6525
train accuracy: 0.5950000000000001
validation accuracy: 0.4666666666666666
Epoch [3/20], Step 10, Loss: 0.6833
train accuracy: 0.6044444444444446
validation accuracy: 0.5
epoch: 3
Epoch [4/20], Step 5, Loss: 0.6680
train accuracy: 0.655
validation accuracy: 0.5333333333333333
Epoch [4/20], Step 10, Loss: 0.6224
train accuracy: 0.6622222222222222
validation accuracy: 0.5
epoch: 4
Epoch [5/20], Step 5, Loss: 0.6106
train accuracy: 0.7050000000000001
validation accuracy: 0.5333333333333333
Epoch [5/20], Step 10, Loss: 0.6136
train accuracy: 0.6933333333333335
validation accuracy: 0.5666666666666667
epoch: 5
Epoch [6/20], Step 5, Loss: 0.5518
train accuracy: 0.7149999999999999
validation accuracy: 0.5333333333333333
Epoch [6/20], Step 10, Loss: 0.576

Epoch [30/20], Step 10, Loss: 0.3338
train accuracy: 0.9711111111111111
validation accuracy: 0.6666666666666666
epoch: 30
Epoch [31/20], Step 5, Loss: 0.3538
train accuracy: 0.985
validation accuracy: 0.6333333333333333
Epoch [31/20], Step 10, Loss: 0.3338
train accuracy: 0.9711111111111111
validation accuracy: 0.6333333333333333
epoch: 31
Epoch [32/20], Step 5, Loss: 0.3338
train accuracy: 0.965
validation accuracy: 0.6666666666666666
Epoch [32/20], Step 10, Loss: 0.3536
train accuracy: 0.9711111111111114
validation accuracy: 0.6666666666666666
epoch: 32
Epoch [33/20], Step 5, Loss: 0.3336
train accuracy: 0.96
validation accuracy: 0.6333333333333333
Epoch [33/20], Step 10, Loss: 0.3336
train accuracy: 0.9644444444444444
validation accuracy: 0.6333333333333333
epoch: 33
Epoch [34/20], Step 5, Loss: 0.3735
train accuracy: 0.965
validation accuracy: 0.6666666666666666
Epoch [34/20], Step 10, Loss: 0.3535
train accuracy: 0.9666666666666666
validation accuracy: 0.6666666666666666
epoch: 34

Epoch [58/20], Step 10, Loss: 0.4488
train accuracy: 0.731111111111111
validation accuracy: 0.5333333333333333
epoch: 58
Epoch [59/20], Step 5, Loss: 0.3830
train accuracy: 0.7799999999999999
validation accuracy: 0.6
Epoch [59/20], Step 10, Loss: 0.4261
train accuracy: 0.8377777777777777
validation accuracy: 0.5333333333333333
epoch: 59
Epoch [60/20], Step 5, Loss: 0.3695
train accuracy: 0.945
validation accuracy: 0.6333333333333333
Epoch [60/20], Step 10, Loss: 0.4028
train accuracy: 0.9377777777777777
validation accuracy: 0.7000000000000001
epoch: 60
Epoch [61/20], Step 5, Loss: 0.3849
train accuracy: 0.965
validation accuracy: 0.7000000000000001
Epoch [61/20], Step 10, Loss: 0.3200
train accuracy: 0.9711111111111111
validation accuracy: 0.7333333333333334
epoch: 61
Epoch [62/20], Step 5, Loss: 0.3237
train accuracy: 0.975
validation accuracy: 0.6666666666666666
Epoch [62/20], Step 10, Loss: 0.3545
train accuracy: 0.9688888888888888
validation accuracy: 0.7000000000000001
epoch: 62
E

epoch: 86
Epoch [87/20], Step 5, Loss: 0.3335
train accuracy: 0.985
validation accuracy: 0.6333333333333334
Epoch [87/20], Step 10, Loss: 0.3336
train accuracy: 0.9822222222222224
validation accuracy: 0.6333333333333334
epoch: 87
Epoch [88/20], Step 5, Loss: 0.3135
train accuracy: 0.99
validation accuracy: 0.7000000000000001
Epoch [88/20], Step 10, Loss: 0.3332
train accuracy: 0.9888888888888889
validation accuracy: 0.6666666666666666
epoch: 88
Epoch [89/20], Step 5, Loss: 0.3135
train accuracy: 0.99
validation accuracy: 0.6666666666666666
Epoch [89/20], Step 10, Loss: 0.3334
train accuracy: 0.9844444444444443
validation accuracy: 0.7000000000000001
epoch: 89
Epoch [90/20], Step 5, Loss: 0.3334
train accuracy: 0.99
validation accuracy: 0.6666666666666666
Epoch [90/20], Step 10, Loss: 0.3134
train accuracy: 0.9822222222222222
validation accuracy: 0.6666666666666666
epoch: 90
Epoch [91/20], Step 5, Loss: 0.3183
train accuracy: 0.985
validation accuracy: 0.6333333333333334
Epoch [91/20], 