In [1]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import torch.utils.data
import torchvision.transforms as transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
class data():
    def __init__(self, seq_length, batch_size):
        np.random.seed(100)
        self.seq_length = seq_length
        self.acc_num = 334
        self.no_acc_num = 392
        self.split = 0.95
        self.train_index = 0 
        self.train_batch_size = batch_size
        self.valid_index = 0
        self.valid_batch_size = 10
        self.read_annotation()
        self.shuffle_data()
        
    def shuffle_data(self):
        img_range = np.arange(0,300-self.seq_length-30,30)
        acc_list = np.arange(1,self.acc_num+1)
        no_acc_list = np.arange(1,self.no_acc_num+1)
        list1 = np.array(np.meshgrid(1,acc_list,img_range)).T.reshape(-1,3)
        list2 = np.array(np.meshgrid(0,no_acc_list,img_range)).T.reshape(-1,3)
        shuffle_list = np.concatenate([list1, list2], axis=0)
        np.random.shuffle(shuffle_list)
        self.train = shuffle_list[:int(shuffle_list.shape[0]*self.split)]
        self.valid = shuffle_list[int(shuffle_list.shape[0]*self.split):] 
        
    def read_annotation(self):
        annotation_file = '/media/user/Hard_Disk/Dataset/child_accident_2/annotation/accident_frame.txt'
        w = open(annotation_file, "r")
        ann = w.read()
        annotation_data = []
        for i in ann.split("\n"):
            b = i.split(" ")
            if (len(b) > 1):
                annotation_data.append(b[1])
        self.annotation = np.array(annotation_data).astype("int32")
        
    def read_data(self, is_accident, dir_index, image_range):
        data = []
        label = []
        img_path = '/media/user/Hard_Disk/Dataset/child_accident_2/image feature/' 
        act_path = '/media/user/Hard_Disk/Dataset/child_accident_2/action feature/' 
        if (is_accident):
            acc_dir = "accident/"
            
            
        else:
            acc_dir = "no_accident/"

            
        range_start = image_range
        range_end = image_range+self.seq_length   
        dir_name = "%04d"%dir_index
        img_npy = []
        act_npy = []
        for j in range(range_start, range_end):
            img_feature = np.load(img_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            img_npy.append(img_feature)
            act_feature = np.load(act_path + acc_dir + dir_name + "/" + str(j) + ".npy")
            act_npy.append(act_feature)
        if (is_accident == True and range_end + 30 > self.annotation[dir_index-1]):
            label.append([0,1])
        else:
            label.append([1,0])
            
        return np.array(img_npy), np.array(act_npy) ,np.array(label)
        
    def next_batch(self, mode="train"):
        batch_img = []
        batch_act = []
        batch_y = []
        if (mode == "valid"):
            batch_size = self.valid_batch_size
        elif (mode == "train"):
            batch_size = self.train_batch_size
        for i in range(batch_size):
            if (mode == "train"):
                img, act, label = self.read_data(self.train[self.train_index+i][0], self.train[self.train_index+i][1], self.train[self.train_index+i][2])
            elif (mode == "valid"):
                img, act, label = self.read_data(self.valid[self.valid_index+i][0], self.valid[self.valid_index+i][1], self.valid[self.valid_index+i][2])
            batch_img.append(img)
            batch_act.append(act)
            batch_y.append(label)
        if (mode == "valid"):
            self.valid_index += self.valid_batch_size
        elif (mode == "train"):
            self.train_index += self.train_batch_size
        return np.array(batch_img), np.array(batch_act), np.squeeze(np.array(batch_y))
    
    def has_next(self, mode="train"):
        if (mode == "train"):
            if (self.train_index + self.train_batch_size >= self.train.shape[0]):
                return False
        elif (mode == "valid"):
            if (self.valid_index + self.valid_batch_size >= self.valid.shape[0]):
                return False
        return True
    def display_shape(self):
        print("train shape:",self.train.shape, " valid shape:",self.valid.shape)
        
    def reset_batch(self, mode="train"):
        if (mode == "train"):
            self.train_index = 0
            np.random.shuffle(self.train)
        elif (mode == "valid"):
            self.valid_index = 0
        
   

In [3]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, img_input_size, act_input_size, hidden_size, embedding_size, embedding_size2, num_layers, num_classes):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.img = nn.Sequential(
            nn.Linear(img_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
        self.act = nn.Sequential(
            nn.Linear(act_input_size, embedding_size),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(embedding_size, embedding_size2)
        )
            
        
        self.img_fc1 = nn.Linear(img_input_size, embedding_size)
        self.act_fc1 = nn.Linear(act_input_size, embedding_size)
        self.img_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.act_fc2 = nn.Linear(embedding_size, embedding_size2)
        self.lstm = nn.LSTM(embedding_size2*2, hidden_size, num_layers,dropout=0.25, batch_first=True)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x, y):
        # Set initial hidden and cell states 
        x1 = self.img_fc1(x)
        x1 = self.img_fc2(x1)
        x2 = self.act_fc1(y)
        x2 = self.act_fc2(x2)
#         x1 = self.img(x)
#         x = self.act(y)
        x = torch.cat((x1, x2), 2)
#         print(x.shape)
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Decode the hidden state of the last time step
        out = self.fc2(out[:, -1, :])
        return out

In [4]:
model = RNN(img_input_size=20*4096, act_input_size=1024, hidden_size=1024, embedding_size=1024, embedding_size2=512, num_layers=3, num_classes=2).to(device)

In [5]:
w = torch.from_numpy(np.array([0.15,1])).type(torch.FloatTensor).to(device)
criterion = nn.CrossEntropyLoss(weight=w)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [6]:
sequence_length = 100
num_epochs = 20
train_data = data(seq_length=sequence_length, batch_size=50)
train_data.display_shape()

train shape: (4138, 3)  valid shape: (218, 3)


In [7]:
def evaluation(model):
    i = 0
    acc = 0
    while(train_data.has_next("valid")):
        img, act, labels = train_data.next_batch("valid")
        labels = torch.from_numpy(labels).to(device)
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        loss = criterion(outputs, target)
        correct = (predict == target).squeeze()
        acc += torch.nonzero(correct).size(0) / predict.shape[0]
        i += 1
    train_data.reset_batch("valid")
    print("validation accuracy:",acc/i)

In [8]:

# Loss and optimizer

# Train the model

for epoch in range(1,num_epochs):
    i = 0
    acc = 0
    print("epoch:",epoch)
    while (train_data.has_next("train")):
        img, act, labels = train_data.next_batch("train")
        labels = torch.from_numpy(labels).to(device)
        
        img = torch.from_numpy(img.reshape(-1, sequence_length, 20*4096)).to(device)
        act = torch.from_numpy(np.squeeze(act)).to(device)
        outputs = model(img, act)
#         outputs, torch.max(labels, 1)[1]
        
        predict = torch.max(outputs, 1)[1]
        target = torch.max(labels, 1)[1]
        loss = criterion(outputs, target)
        
        
        correct = (predict == target).squeeze()
#         print(predict)
#         print(target)
#         print("================")
#         print(correct)
        acc += torch.nonzero(correct).size(0) / predict.shape[0]
        
    
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        i += 1
        if (i+1) % 5 == 0:
            
            print ('Epoch [{}/{}], Step {}, Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, loss.item()))
            print ('train accuracy:',acc/i)
            print(outputs)
            print(target)
            evaluation(model)
            print("====================================")
    #path = "pytorch/model-"+str(epoch)+".ckpt"
    #torch.save(model, path)
    train_data.reset_batch("train")
# # Test the model
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.reshape(-1, sequence_length, input_size).to(device)
#         labels = labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

#     print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# # Save the model checkpoint
# torch.save(model.state_dict(), 'model.ckpt')

epoch: 1
Epoch [2/20], Step 5, Loss: 0.6961
train accuracy: 0.455
tensor(1.00000e-02 *
       [[-0.4844, -1.9535],
        [-1.4754, -0.8724],
        [-2.5589, -1.1526],
        [-3.8997, -0.7477],
        [-2.0001, -1.4037],
        [-3.7124,  0.6303],
        [-2.0753,  1.1586],
        [-2.6846,  1.3092],
        [-0.0818, -2.5736],
        [ 0.6230, -2.7119],
        [-2.3661, -0.8675],
        [-0.8387,  0.1883],
        [-5.7544,  2.0514],
        [-1.7303, -0.9253],
        [-0.7842, -2.6336],
        [-2.7045,  0.4635],
        [-2.3739, -0.6307],
        [ 2.1402, -5.9762],
        [-2.0343,  1.7454],
        [-3.3407, -0.3216],
        [-1.4247, -0.2022],
        [-3.3265,  2.0472],
        [-3.2014,  0.2677],
        [-5.3477,  0.5204],
        [-2.2040,  2.1090],
        [ 0.9089, -1.7045],
        [-1.7021,  1.1096],
        [-1.3173,  0.6173],
        [-1.6663, -0.6150],
        [-3.9285,  2.2316],
        [-2.8174, -0.8068],
        [-2.2806,  0.9173],
        [ 0.5635,

validation accuracy: 0.19047619047619052
Epoch [2/20], Step 30, Loss: 0.6663
train accuracy: 0.4075862068965517
tensor([[-0.0624,  0.0392],
        [-0.0499,  0.0328],
        [-0.0344, -0.0024],
        [-0.0652,  0.0286],
        [ 0.0408, -0.0768],
        [-0.0115, -0.0176],
        [ 0.0109, -0.0253],
        [ 0.0335, -0.0704],
        [-0.0045, -0.0265],
        [ 0.0172, -0.0487],
        [ 0.0247, -0.0727],
        [-0.0315, -0.0004],
        [-0.0726,  0.0363],
        [-0.0502,  0.0282],
        [-0.0798,  0.0376],
        [-0.1758,  0.1436],
        [-0.0088, -0.0110],
        [ 0.0238, -0.0479],
        [-0.0805,  0.0442],
        [-0.0877,  0.0718],
        [-0.0779,  0.0240],
        [-0.0038, -0.0169],
        [-0.0198, -0.0097],
        [ 0.0662, -0.0936],
        [-0.0972,  0.0696],
        [-0.0966,  0.0669],
        [ 0.0584, -0.0888],
        [-0.0554,  0.0335],
        [-0.0668,  0.0349],
        [-0.0960,  0.0468],
        [-0.0014, -0.0513],
        [-0.0605,  0

validation accuracy: 0.7285714285714284
Epoch [2/20], Step 55, Loss: 0.6578
train accuracy: 0.5144444444444445
tensor([[ 0.2417, -0.2732],
        [ 0.1511, -0.1785],
        [ 0.0028, -0.0387],
        [ 0.1744, -0.2199],
        [ 0.2013, -0.2625],
        [ 0.1371, -0.1870],
        [ 0.1120, -0.1433],
        [ 0.1565, -0.2096],
        [-0.0410, -0.0294],
        [-0.0466,  0.0157],
        [ 0.1882, -0.2185],
        [ 0.0784, -0.1054],
        [ 0.0662, -0.0886],
        [-0.0067, -0.0391],
        [ 0.2210, -0.2603],
        [ 0.0087, -0.0667],
        [ 0.0105, -0.0283],
        [ 0.1109, -0.1515],
        [ 0.1219, -0.1763],
        [ 0.1832, -0.2256],
        [ 0.1261, -0.1577],
        [ 0.0893, -0.1237],
        [-0.0068, -0.0278],
        [-0.0006, -0.0476],
        [ 0.0732, -0.1111],
        [ 0.1852, -0.2477],
        [ 0.0480, -0.1134],
        [ 0.1133, -0.1615],
        [ 0.1794, -0.2022],
        [ 0.0237, -0.0423],
        [ 0.0792, -0.1189],
        [ 0.0519, -0.

validation accuracy: 0.6904761904761906
Epoch [2/20], Step 80, Loss: 0.6960
train accuracy: 0.5250632911392406
tensor([[-0.0534,  0.0025],
        [-0.1022,  0.0199],
        [-0.0483, -0.0276],
        [-0.1005,  0.0439],
        [-0.0175, -0.0424],
        [-0.1329,  0.0737],
        [-0.1119,  0.0512],
        [-0.0617,  0.0004],
        [-0.0683,  0.0135],
        [-0.0393, -0.0290],
        [-0.0698,  0.0245],
        [-0.0812,  0.0149],
        [-0.1322,  0.0611],
        [-0.0499, -0.0101],
        [-0.1621,  0.1045],
        [-0.0519,  0.0042],
        [-0.0687,  0.0237],
        [-0.0177, -0.0252],
        [-0.0670,  0.0139],
        [-0.1278,  0.0706],
        [-0.0272, -0.0099],
        [-0.0482,  0.0097],
        [-0.0356, -0.0317],
        [-0.1150,  0.0545],
        [-0.0828,  0.0298],
        [-0.0948,  0.0431],
        [-0.0862,  0.0100],
        [-0.0445, -0.0275],
        [-0.1197,  0.0641],
        [ 0.0052, -0.0588],
        [-0.1260,  0.0701],
        [-0.0764,  0.

validation accuracy: 0.5571428571428572
Epoch [3/20], Step 25, Loss: 0.7299
train accuracy: 0.6241666666666666
tensor([[ 0.4679, -0.4845],
        [ 0.4194, -0.4469],
        [ 0.4363, -0.4596],
        [ 0.3053, -0.3421],
        [ 0.4812, -0.5214],
        [ 0.4566, -0.4690],
        [-0.1123,  0.0612],
        [ 0.3672, -0.3799],
        [ 0.1832, -0.1879],
        [ 0.3983, -0.4216],
        [ 0.1083, -0.1206],
        [ 0.3809, -0.4051],
        [ 0.4918, -0.5033],
        [ 0.1881, -0.2031],
        [ 0.2645, -0.2976],
        [ 0.2530, -0.2665],
        [ 0.5481, -0.5637],
        [ 0.5548, -0.5943],
        [ 0.4603, -0.4884],
        [ 0.5066, -0.5332],
        [ 0.3557, -0.4001],
        [-0.1501,  0.0804],
        [ 0.5435, -0.5430],
        [-0.2394,  0.1612],
        [ 0.2226, -0.2457],
        [-0.1493,  0.0901],
        [-0.2243,  0.1574],
        [ 0.4786, -0.4832],
        [-0.1489,  0.0845],
        [ 0.2983, -0.2976],
        [-0.0460,  0.0369],
        [ 0.1248, -0.

validation accuracy: 0.8190476190476191
Epoch [3/20], Step 50, Loss: 0.7017
train accuracy: 0.5767346938775509
tensor([[-0.2677,  0.2206],
        [-0.2104,  0.1629],
        [ 0.2724, -0.2790],
        [ 0.1152, -0.1474],
        [-0.2409,  0.1956],
        [-0.2349,  0.1903],
        [-0.1982,  0.1491],
        [-0.2500,  0.2053],
        [ 0.2486, -0.2770],
        [-0.2013,  0.1635],
        [-0.2557,  0.2070],
        [ 0.0209, -0.0490],
        [-0.2656,  0.2170],
        [-0.2349,  0.1649],
        [-0.0374,  0.0141],
        [ 0.0676, -0.0912],
        [-0.0016, -0.0437],
        [-0.2363,  0.1896],
        [-0.1261,  0.0917],
        [ 0.0504, -0.0914],
        [-0.2626,  0.2141],
        [-0.2376,  0.1676],
        [ 0.3142, -0.3057],
        [ 0.0940, -0.1237],
        [ 0.3227, -0.3476],
        [-0.2778,  0.2247],
        [ 0.1360, -0.1323],
        [ 0.1867, -0.2077],
        [-0.2864,  0.2438],
        [ 0.2291, -0.2324],
        [-0.1950,  0.1593],
        [-0.1011,  0.

validation accuracy: 0.19047619047619052
Epoch [3/20], Step 75, Loss: 0.6967
train accuracy: 0.577027027027027
tensor([[-0.0819,  0.0424],
        [-0.1742,  0.1355],
        [-0.0803,  0.0407],
        [-0.1936,  0.1436],
        [-0.0915,  0.0706],
        [-0.1629,  0.1219],
        [-0.0405,  0.0053],
        [-0.1425,  0.1157],
        [-0.1024,  0.0734],
        [-0.1040,  0.0626],
        [-0.0903,  0.0423],
        [-0.1528,  0.0959],
        [-0.0222, -0.0131],
        [-0.1950,  0.1596],
        [-0.0585,  0.0204],
        [-0.1969,  0.1581],
        [ 0.0148, -0.0369],
        [-0.0804,  0.0602],
        [-0.0853,  0.0380],
        [-0.1537,  0.1131],
        [-0.0979,  0.0539],
        [-0.1071,  0.0797],
        [-0.0739,  0.0479],
        [-0.0514,  0.0330],
        [-0.1794,  0.1370],
        [-0.2141,  0.1658],
        [-0.1659,  0.1210],
        [-0.0827,  0.0560],
        [-0.2345,  0.1843],
        [-0.0570,  0.0288],
        [-0.1815,  0.1400],
        [-0.0743,  0.

KeyboardInterrupt: 