In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
from sklearn import metrics
import gc
import matplotlib.pyplot as plt
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class data():
    def __init__(self, seq_length, batch_size):
        np.random.seed(100)
        self.seq_length = seq_length
        self.acc_num = 334
        self.no_acc_num = 392
        self.split = 0.95
        self.train_index = 0 
        self.train_batch_size = batch_size
        self.valid_index = 0
        self.valid_batch_size = 12
        self.read_annotation()
        self.shuffle_data()
        
    def shuffle_data(self):
        #img_range = np.arange(0,300-self.seq_length-60,60)
        acc_list = np.arange(1,self.acc_num+1)
        no_acc_list = np.arange(1,self.no_acc_num+1)
        list1 = np.array(np.meshgrid(1,acc_list)).T.reshape(-1,2)
        list2 = np.array(np.meshgrid(0,no_acc_list)).T.reshape(-1,2)
        shuffle_list = np.concatenate([list1, list2], axis=0)
        np.random.shuffle(shuffle_list)
        self.train = shuffle_list[:int(shuffle_list.shape[0]*self.split)]
        self.valid = shuffle_list[int(shuffle_list.shape[0]*self.split):] 
        
    def read_annotation(self):
        annotation_file = '/media/user/Hard_Disk/Dataset/child_accident_2/annotation/accident_frame.txt'
        w = open(annotation_file, "r")
        ann = w.read()
        annotation_data = []
        for i in ann.split("\n"):
            b = i.split(" ")
            if (len(b) > 1):
                annotation_data.append(b[1])
        self.annotation = np.array(annotation_data).astype("int32")
        
    def read_data(self, is_accident, dir_index):
        data = []
        label = []
        img_path = '/media/user/Hard_Disk/Dataset/child_accident_2/image feature/' 
        act_path = '/media/user/Hard_Disk/Dataset/child_accident_2/action feature/' 
        if (is_accident):
            acc_dir = "accident/"
            range_start = self.annotation[dir_index-1] - self.seq_length - 10
            range_end = self.annotation[dir_index-1] - 10
            label.append([0,1])
            
        else:
            acc_dir = "no_accident/"
            range_start = 0
            range_end = self.seq_length
            label.append([1,0])
            
        dir_name = "%04d"%dir_index
        img_npy = []
        act_npy = []
        for j in range(range_start, range_end):
            img_feature = np.load(img_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            img_npy.append(img_feature)
            act_feature = np.load(act_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            act_npy.append(act_feature)
        
            
        return np.array(img_npy), np.array(act_npy) ,np.array(label)
        
    def next_batch(self, mode="train"):
        batch_img = []
        batch_act = []
        batch_y = []
        if (mode == "valid"):
            batch_size = self.valid_batch_size
        elif (mode == "train"):
            batch_size = self.train_batch_size
        for i in range(batch_size):
            if (mode == "train"):
                img, act, label = self.read_data(self.train[self.train_index+i][0], self.train[self.train_index+i][1])
            elif (mode == "valid"):
                img, act, label = self.read_data(self.valid[self.valid_index+i][0], self.valid[self.valid_index+i][1])
            batch_img.append(img)
            batch_act.append(act)
            batch_y.append(label)
        if (mode == "valid"):
            self.valid_index += self.valid_batch_size
        elif (mode == "train"):
            self.train_index += self.train_batch_size
        return np.array(batch_img), np.array(batch_act), np.squeeze(np.array(batch_y))
    
    def has_next(self, mode="train"):
        if (mode == "train"):
            if (self.train_index + self.train_batch_size >= self.train.shape[0]):
                return False
        elif (mode == "valid"):
            if (self.valid_index + self.valid_batch_size >= self.valid.shape[0]):
                return False
        return True
    def display_shape(self):
        print("train shape:",self.train.shape, " valid shape:",self.valid.shape)
        
    def reset_batch(self, mode="train"):
        if (mode == "train"):
            self.train_index = 0
            np.random.shuffle(self.train)
        elif (mode == "valid"):
            self.valid_index = 0
        
   

In [3]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, img_input_size, act_input_size, hidden_size, embedding_size, embedding_size2, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.img = nn.Sequential(
            nn.Linear(img_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
        self.act = nn.Sequential(
            nn.Linear(act_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
            
        
        self.img_fc1 = nn.Linear(img_input_size, embedding_size)
        self.act_fc1 = nn.Linear(act_input_size, embedding_size)
        self.img_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.act_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.lstm1 = nn.LSTM(embedding_size2*2, hidden_size, num_layers,dropout=0.25, batch_first=True)
        self.lstm2 = nn.LSTM(embedding_size2*2, hidden_size, num_layers,dropout=0.25, batch_first=True)
        self.lstm3 = nn.LSTM(embedding_size2*2, hidden_size, num_layers,dropout=0.25, batch_first=True)
        self.st3_state_h = nn.Linear(hidden_size*3, hidden_size)
        self.st3_state_c = nn.Linear(hidden_size*3, hidden_size)
        self.st2_state_h = nn.Linear(hidden_size*2, hidden_size)
        self.st2_state_c = nn.Linear(hidden_size*2, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x, y):
        # Set initial hidden and cell states 
        x1 = self.img_fc1(x)
        x1 = self.img_fc2(x1)
        x2 = self.act_fc1(y)
        x2 = self.act_fc2(x2)
#         x1 = self.img(x)
#         x = self.act(y)
        x = torch.cat((x1, x2), 2)
#         print(x.shape)
        
        st1_h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        st1_c = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        st2_h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        st2_c = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        st3_h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        st3_c = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        
        for i in range(x.size(1)):
        # Forward propagate LSTM
            stack1_out, (st1_h, st1_c) = self.lstm1(x[:,i].unsqueeze(1), (st1_h, st1_c))  # out: tensor of shape (batch_size, seq_length, hidden_size)
            stack2_out, (st2_h, st2_c) = self.lstm2(x[:,int(i/10)].unsqueeze(1), (st2_h, st2_c))
            stack3_out, (st3_h, st3_c) = self.lstm3(x[:,int(i/20)].unsqueeze(1), (st3_h, st3_c))
           
            
            st3_h = torch.cat((st1_h, st2_h, st3_h), 2)
            st3_h = self.st3_state_h(st3_h)
            
            st3_c = torch.cat((st1_c, st2_c, st3_c), 2)
            st3_c = self.st3_state_c(st3_c)
            
            
            st2_h = torch.cat((st1_h, st2_h), 2)
            st2_h = self.st2_state_h(st2_h)
            
            st2_c = torch.cat((st1_c, st2_c), 2)
            st2_c = self.st2_state_c(st2_c)
            
            
        
        # Decode the hidden state of the last time step
        
        out = self.fc2(stack3_out[:,-1,:])
        
#         out = self.fc2(out[:, -1, :])
        return out

In [4]:
model = RNN(img_input_size=20*4096, act_input_size=1024, hidden_size=1024, embedding_size=1024, embedding_size2=512, num_layers=1, num_classes=2).to(device)

  "num_layers={}".format(dropout, num_layers))


In [5]:
w = torch.from_numpy(np.array([1,1])).type(torch.FloatTensor).to(device)
criterion = nn.CrossEntropyLoss(weight=w)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [6]:
sequence_length = 100
num_epochs = 20
train_data = data(seq_length=sequence_length, batch_size=50)
train_data.display_shape()

train shape: (689, 2)  valid shape: (37, 2)


In [7]:
# out of memory
def test():
    model = torch.load('pytorch - 06/model-90.ckpt')
    while(train_data.has_next("valid")):
        img, act, labels = train_data.next_batch("valid")
        labels = torch.from_numpy(labels).to(device)
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
        outputs = F.softmax(outputs)
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        recall(predict, target)
        torch.cuda.empty_cache()

In [8]:
def metric(pred, y):
    pred = np.reshape(np.array(pred.cpu().numpy()), (-1,1))
    y = np.reshape(np.array(y.cpu().numpy()), (-1, 1))
    print(pred.reshape(1,-1))
    print(y.reshape(1,-1))
    print("=========================================")
    print("precision")
    print(metrics.precision_score(y, pred, average='macro'))
    
    print("Accuracy")
    print(metrics.accuracy_score(y, pred))
    
    print("Recall")
    print(metrics.recall_score(y, pred, average='macro'))
    
    print("F1_score")
    print(metrics.f1_score(y, pred, average='weighted'))
    
    print("roc_auc_score")
    fpr, tpr, thresholds = metrics.roc_curve(y, pred)
    roc_auc = metrics.auc(fpr, tpr)
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b',
    label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([0,1.0])
    plt.ylim([0,1.0])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

In [9]:
def evaluation(model):
    with torch.no_grad():
        i = 0
        acc = 0
        while(train_data.has_next("valid")):
            img, act, labels = train_data.next_batch("valid")
            labels = torch.from_numpy(labels).to(device)
            img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
            act = torch.from_numpy(np.squeeze(act)).to(device)
            outputs = model(img, act)
            outputs = F.softmax(outputs)
            predict = torch.max(outputs, 1)[1]
            target = torch.max(labels, 1)[1]
            loss = criterion(outputs, target)
            correct = (predict == target).squeeze()
            acc += torch.nonzero(correct).size(0) / predict.shape[0]
            i += 1
#             metric(predict, target)
        train_data.reset_batch("valid")
        print("validation accuracy:",acc/i)

In [10]:

# Loss and optimizer

# Train the model

for epoch in range(1,101):
    i = 0
    acc = 0
    print("epoch:",epoch)
    while (train_data.has_next("train")):
        img, act, labels = train_data.next_batch("train")
        labels = torch.from_numpy(labels).to(device)
        
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
        outputs = F.softmax(outputs)
#         print(outputs.shape)
#         outputs, torch.max(labels, 1)[1]
        
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        loss = criterion(outputs, target)
        
        
        correct = (predict == target).squeeze()
#         print(predict)
#         print(target)
#         print("================")
#         print(correct)
        acc += torch.nonzero(correct).size(0) / predict.shape[0]
        
    
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        i += 1
        if (i+1) % 5 == 0:
            
            print ('Epoch [{}/{}], Step {}, Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, loss.item()))
            print ('train accuracy:',acc/i)
            evaluation(model)
            print("====================================")
    if (epoch % 10 == 0):
        path = "pytorch - 10/model-"+str(epoch)+".ckpt"
        torch.save(model, path)
        
    train_data.reset_batch("train")
# # Test the model
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.reshape(-1, sequence_length, input_size).to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# # Save the model checkpoint
# torch.save(model.state_dict(), 'model.ckpt')

epoch: 1




Epoch [2/20], Step 5, Loss: 0.6911
train accuracy: 0.51


  # This is added back by InteractiveShellApp.init_path()


validation accuracy: 0.5277777777777778
Epoch [2/20], Step 10, Loss: 0.6879
train accuracy: 0.5155555555555557
validation accuracy: 0.5277777777777778
epoch: 2
Epoch [3/20], Step 5, Loss: 0.6880
train accuracy: 0.635
validation accuracy: 0.6666666666666666
Epoch [3/20], Step 10, Loss: 0.6873
train accuracy: 0.6488888888888888
validation accuracy: 0.6944444444444445
epoch: 3
Epoch [4/20], Step 5, Loss: 0.6260
train accuracy: 0.6950000000000001
validation accuracy: 0.6666666666666666
Epoch [4/20], Step 10, Loss: 0.6026
train accuracy: 0.6777777777777777
validation accuracy: 0.7777777777777778
epoch: 4
Epoch [5/20], Step 5, Loss: 0.6714
train accuracy: 0.6950000000000001
validation accuracy: 0.7222222222222222
Epoch [5/20], Step 10, Loss: 0.5364
train accuracy: 0.708888888888889
validation accuracy: 0.8055555555555555
epoch: 5
Epoch [6/20], Step 5, Loss: 0.6112
train accuracy: 0.715
validation accuracy: 0.7777777777777778
Epoch [6/20], Step 10, Loss: 0.5231
train accuracy: 0.76
validation

  "type " + obj.__name__ + ". It won't be checked "


epoch: 11
Epoch [12/20], Step 5, Loss: 0.3644
train accuracy: 0.95
validation accuracy: 0.6944444444444445
Epoch [12/20], Step 10, Loss: 0.3591
train accuracy: 0.9333333333333331
validation accuracy: 0.7222222222222222
epoch: 12
Epoch [13/20], Step 5, Loss: 0.4040
train accuracy: 0.9349999999999999
validation accuracy: 0.75
Epoch [13/20], Step 10, Loss: 0.4024
train accuracy: 0.9222222222222223
validation accuracy: 0.7222222222222223
epoch: 13
Epoch [14/20], Step 5, Loss: 0.3588
train accuracy: 0.925
validation accuracy: 0.7222222222222223
Epoch [14/20], Step 10, Loss: 0.3390
train accuracy: 0.9422222222222225
validation accuracy: 0.75
epoch: 14
Epoch [15/20], Step 5, Loss: 0.3174
train accuracy: 0.98
validation accuracy: 0.7222222222222223
Epoch [15/20], Step 10, Loss: 0.3287
train accuracy: 0.9733333333333333
validation accuracy: 0.7777777777777778
epoch: 15
Epoch [16/20], Step 5, Loss: 0.3870
train accuracy: 0.96
validation accuracy: 0.6666666666666666
Epoch [16/20], Step 10, Loss: 

Epoch [41/20], Step 5, Loss: 0.3133
train accuracy: 0.99
validation accuracy: 0.7777777777777778
Epoch [41/20], Step 10, Loss: 0.3133
train accuracy: 0.9866666666666668
validation accuracy: 0.7777777777777778
epoch: 41
Epoch [42/20], Step 5, Loss: 0.3133
train accuracy: 0.99
validation accuracy: 0.7777777777777778
Epoch [42/20], Step 10, Loss: 0.3133
train accuracy: 0.9888888888888887
validation accuracy: 0.7777777777777778
epoch: 42
Epoch [43/20], Step 5, Loss: 0.3133
train accuracy: 0.985
validation accuracy: 0.7777777777777778
Epoch [43/20], Step 10, Loss: 0.3333
train accuracy: 0.9844444444444443
validation accuracy: 0.7777777777777778
epoch: 43
Epoch [44/20], Step 5, Loss: 0.3533
train accuracy: 0.98
validation accuracy: 0.7777777777777778
Epoch [44/20], Step 10, Loss: 0.3333
train accuracy: 0.9866666666666668
validation accuracy: 0.7777777777777778
epoch: 44
Epoch [45/20], Step 5, Loss: 0.3133
train accuracy: 0.99
validation accuracy: 0.7777777777777778
Epoch [45/20], Step 10, Lo

validation accuracy: 0.75
Epoch [70/20], Step 10, Loss: 0.3533
train accuracy: 0.9844444444444443
validation accuracy: 0.75
epoch: 70
Epoch [71/20], Step 5, Loss: 0.3133
train accuracy: 1.0
validation accuracy: 0.75
Epoch [71/20], Step 10, Loss: 0.3133
train accuracy: 0.9955555555555556
validation accuracy: 0.75
epoch: 71
Epoch [72/20], Step 5, Loss: 0.3733
train accuracy: 0.975
validation accuracy: 0.75
Epoch [72/20], Step 10, Loss: 0.3733
train accuracy: 0.9777777777777776
validation accuracy: 0.75
epoch: 72
Epoch [73/20], Step 5, Loss: 0.3333
train accuracy: 0.98
validation accuracy: 0.7222222222222222
Epoch [73/20], Step 10, Loss: 0.3333
train accuracy: 0.9777777777777776
validation accuracy: 0.7222222222222222
epoch: 73
Epoch [74/20], Step 5, Loss: 0.3533
train accuracy: 0.985
validation accuracy: 0.7222222222222222
Epoch [74/20], Step 10, Loss: 0.3133
train accuracy: 0.991111111111111
validation accuracy: 0.7222222222222222
epoch: 74
Epoch [75/20], Step 5, Loss: 0.3533
train accu

Epoch [100/20], Step 10, Loss: 0.3133
train accuracy: 0.9822222222222222
validation accuracy: 0.7777777777777778
epoch: 100
Epoch [101/20], Step 5, Loss: 0.3133
train accuracy: 0.98
validation accuracy: 0.7777777777777778
Epoch [101/20], Step 10, Loss: 0.3333
train accuracy: 0.9822222222222224
validation accuracy: 0.7777777777777778
