# Vulture deep

In [1]:
from utils import *

import os

import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

import pandas as pd
import numpy as np

from sklearn import preprocessing
# from sklearn.model_selection import train_test_split

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [30]:
# Misc
LOG_DIR = './log_pt/'

# Training Parameters
test_split = 0.1
learning_rate = 0.01
num_epochs = 2000
batch_size = 256
display_step = 100

# Network Parameters
num_classes = 6
keep_rate = 1

repeat_size = 82
crop_size = 80
perm_num = 5
channel_swap = True

In [4]:
raw = pd.read_csv('./vultures.csv', header=None)

In [5]:
class acc_dataset (torch.utils.data.Dataset):
    
    def __init__ (self, X, Y, mean):
        super().__init__()
        self.X = X
        self.Y = Y.astype(np.float)
        self.mean = mean
        
    def __len__ (self):
        return len(self.X)
    
    def __getitem__ (self, idx):
        
        x = self.X[idx]
        y = self.Y[idx]
        
        x = (x - self.mean).transpose(1, 0)
            
        return torch.tensor(x), torch.tensor(y).type(torch.LongTensor)

In [6]:
mean = np.array([0. for i in  range(3)])
X = []
Y = []

for i in range(len(raw)):
    
    row = np.array(raw.loc[i][~raw.loc[i].isnull()])
    data = row[0:-1].astype(float)
    label = row[-1]
    
    x_axis = data[0::3]
    y_axis = data[1::3]
    z_axis = data[2::3]
    
    x_axis = repeat_crop_data (x_axis, repeat_size)
    y_axis = repeat_crop_data (y_axis, repeat_size)
    z_axis = repeat_crop_data (z_axis, repeat_size)
    
    mean[0] += x_axis.mean()
    mean[1] += y_axis.mean()
    mean[2] += z_axis.mean()
    
    X.append(np.stack((x_axis, y_axis, z_axis)))
    Y.append(label)

mean = mean/len(raw)
    
X = np.array(X).transpose((0, 2, 1))
Y = np.array(Y)
label_names = np.unique(Y)

le = preprocessing.LabelEncoder()
le.fit(np.unique(Y))
Y = le.transform(Y)

In [7]:
def train_test_split(X, Y, split_rate):
    
    train_idx_overall = np.array([])
    test_idx_overall = np.array([])

    for l in np.unique(Y):

        idx = np.where(Y == l)[0]

        test_size = int(len(idx) * split_rate)

        test_choice = np.random.choice(len(idx), size=test_size, replace=False)

        train_idx = np.delete(idx, test_choice)

        test_idx = idx[test_choice]
        
        train_idx_overall = np.append(train_idx_overall, train_idx)
        
        test_idx_overall = np.append(test_idx_overall, test_idx)
        
        
    return (X[train_idx_overall.astype(int)], Y[train_idx_overall.astype(int)],
            X[test_idx_overall.astype(int)], Y[test_idx_overall.astype(int)],
            train_idx_overall, test_idx_overall)
    

train_x, train_y, test_x, test_y, train_idx, test_idx= train_test_split(X, Y, test_split)

In [35]:
train_x = np.load('./vulture_train_x.npy')
train_y = np.load('./vulture_train_y.npy')
test_x = np.load('./vulture_test_x.npy')
test_y = np.load('./vulture_test_y.npy')

In [36]:
train_set = acc_dataset(train_x, train_y, mean)
test_set = acc_dataset(test_x, test_y, mean)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=2)

In [37]:
# x, y = next(iter(train_loader))

# x = x.float()

# conv1 = nn.Conv1d(in_channels=3, out_channels=32, kernel_size=3, padding=1, stride=2)
# conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=2)
# conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=2)
# conv4 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=2)
# avg_pool = nn.AvgPool1d(kernel_size=6, stride=1, padding=0)
# fc5 = nn.Linear(256, num_classes)

# x = F.relu(conv1(x))

# x = F.relu(conv2(x))

# x = F.relu(conv3(x))

# x = F.relu(conv4(x))

# x = avg_pool(x)

# x = x.squeeze(2)

# x = fc5(x)

In [38]:
class MyNet (nn.Module):
    
    def __init__ (self, num_classes, input_dim, drop_rate):
        super().__init__()
        
        self.input_dim = input_dim
        
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=32, kernel_size=3, padding=1, stride=2)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1, stride=2)
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1, stride=2)
        self.conv4 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=2)
        self.avg_pool = nn.AvgPool1d(kernel_size=6, stride=1, padding=0)
        self.fc5 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.avg_pool(x)
        x = x.squeeze(2)
        x = self.fc5(x)

        return x

In [39]:
net = MyNet(num_classes=num_classes, input_dim=repeat_size, drop_rate=0.3).double()
net.to(device)

MyNet(
  (conv1): Conv1d(3, 32, kernel_size=(3,), stride=(2,), padding=(1,))
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(2,), padding=(1,))
  (conv3): Conv1d(64, 128, kernel_size=(3,), stride=(2,), padding=(1,))
  (conv4): Conv1d(128, 256, kernel_size=(3,), stride=(2,), padding=(1,))
  (avg_pool): AvgPool1d(kernel_size=(6,), stride=(1,), padding=(0,))
  (fc5): Linear(in_features=256, out_features=6, bias=True)
)

In [40]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

# Decay LR by a factor of 0.1 every 30 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=300, gamma=0.1)

In [41]:
step = 0

net.train()

for epoch in range(num_epochs):
    
    exp_lr_scheduler.step()
    
    for data in train_loader:
        
        x, y = data
        
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        
        logits = net(x)
        
        loss = loss_function(logits, y)
        
        loss.backward()
        
        optimizer.step()
        
        step += 1
        
        _, preds = torch.max(logits, 1)
        
        correct = (preds == y).sum().item()
        
        accuracy = correct/len(x)
        
        if step % display_step == 0:
            print('Epoch: %d, Step: %d, Minibatch loss: %.3f, Minibatch accuracy: %.3f' % 
                  (epoch, step, loss, accuracy))
            
print('Final, Step: %d, Minibatch loss: %.3f, Minibatch accuracy: %.3f' % 
                  (step, loss, accuracy))

Epoch: 49, Step: 100, Minibatch loss: 1.090, Minibatch accuracy: 0.618
Epoch: 99, Step: 200, Minibatch loss: 0.636, Minibatch accuracy: 0.758
Epoch: 149, Step: 300, Minibatch loss: 0.727, Minibatch accuracy: 0.753
Epoch: 199, Step: 400, Minibatch loss: 0.358, Minibatch accuracy: 0.892
Epoch: 249, Step: 500, Minibatch loss: 0.222, Minibatch accuracy: 0.930
Epoch: 299, Step: 600, Minibatch loss: 0.146, Minibatch accuracy: 0.968
Epoch: 349, Step: 700, Minibatch loss: 0.123, Minibatch accuracy: 0.978
Epoch: 399, Step: 800, Minibatch loss: 0.141, Minibatch accuracy: 0.968
Epoch: 449, Step: 900, Minibatch loss: 0.085, Minibatch accuracy: 0.984
Epoch: 499, Step: 1000, Minibatch loss: 0.079, Minibatch accuracy: 0.984
Epoch: 549, Step: 1100, Minibatch loss: 0.092, Minibatch accuracy: 0.984
Epoch: 599, Step: 1200, Minibatch loss: 0.102, Minibatch accuracy: 0.978
Epoch: 649, Step: 1300, Minibatch loss: 0.091, Minibatch accuracy: 0.989
Epoch: 699, Step: 1400, Minibatch loss: 0.056, Minibatch accur

In [42]:
net.eval()

test_correct = 0

with torch.no_grad():
    
    for data in test_loader:

        x, y = data
        
        x, y = x.to(device), y.to(device)

        test_logits = net(x)
        _, test_preds = torch.max(test_logits, 1)

        test_correct_batch = (test_preds == y).sum().item()

        test_correct += test_correct_batch

print('Test accuracy: %.3f' % (test_correct / len(test_set)))

Test accuracy: 0.783


In [184]:
torch.save(net.state_dict(), os.path.join(LOG_DIR, 'model_%s_checkpoint.pth.tar' 
                                          % 'vulter_plain_ep_500_step_50_SGD_acc_91'))

In [None]:
train_x, train_y, test_x, test_y

In [128]:
np.save('./vulture_train_x.npy', train_x)
np.save('./vulture_train_y.npy', train_y)
np.save('./vulture_test_x.npy', test_x)
np.save('./vulture_test_y.npy', test_y)
np.save('./vulture_train_idx.npy', train_idx)
np.save('./vulture_test_idx.npy', test_idx)

In [53]:
# train_x = np.load('./vulture_train_x.npy')
# train_y = np.load('./vulture_train_y.npy')
# test_x = np.load('./vulture_test_x.npy')
# test_y = np.load('./vulture_test_y.npy')

In [42]:
a.shape

(442, 82, 3)