# Vulture class_aware

In [1]:
from utils import *

import os

import torch
import torch.nn as nn
import torch.utils.data
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler

import pandas as pd
import numpy as np

from sklearn import preprocessing
# from sklearn.model_selection import train_test_split

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [47]:
# Misc
LOG_DIR = './log_pt/'

# Training Parameters
test_split = 0.1
learning_rate = 0.01
num_epochs = 200
batch_size = 256
display_step = 100

# Network Parameters
num_classes = 6
keep_rate = 1

repeat_size = 82
crop_size = 80
perm_num = 5
channel_swap = True

In [4]:
raw = pd.read_csv('./vultures.csv', header=None)

In [5]:
class acc_dataset (torch.utils.data.Dataset):
    
    def __init__ (self, X, Y, mean):
        super().__init__()
        self.X = X
        self.Y = Y.astype(np.float)
        self.mean = mean
        
    def __len__ (self):
        return len(self.X)
    
    def __getitem__ (self, idx):
        
        x = self.X[idx]
        y = self.Y[idx]
        
        x = (x - self.mean).transpose(1, 0)
            
        return torch.tensor(x), torch.tensor(y).type(torch.LongTensor)

In [6]:
mean = np.array([0. for i in  range(3)])
X = []
Y = []

for i in range(len(raw)):
    
    row = np.array(raw.loc[i][~raw.loc[i].isnull()])
    data = row[0:-1].astype(float)
    label = row[-1]
    
    x_axis = data[0::3]
    y_axis = data[1::3]
    z_axis = data[2::3]
    
    x_axis = repeat_crop_data (x_axis, repeat_size)
    y_axis = repeat_crop_data (y_axis, repeat_size)
    z_axis = repeat_crop_data (z_axis, repeat_size)
    
    mean[0] += x_axis.mean()
    mean[1] += y_axis.mean()
    mean[2] += z_axis.mean()
    
    X.append(np.stack((x_axis, y_axis, z_axis)))
    Y.append(label)

mean = mean/len(raw)
    
X = np.array(X).transpose((0, 2, 1))
Y = np.array(Y)
label_names = np.unique(Y)

le = preprocessing.LabelEncoder()
le.fit(np.unique(Y))
Y = le.transform(Y)

In [7]:
def train_test_split(X, Y, split_rate):
    
    train_idx_overall = np.array([])
    test_idx_overall = np.array([])

    for l in np.unique(Y):

        idx = np.where(Y == l)[0]

        test_size = int(len(idx) * split_rate)

        test_choice = np.random.choice(len(idx), size=test_size, replace=False)

        train_idx = np.delete(idx, test_choice)

        test_idx = idx[test_choice]
        
        train_idx_overall = np.append(train_idx_overall, train_idx)
        
        test_idx_overall = np.append(test_idx_overall, test_idx)
        
        
    return (X[train_idx_overall.astype(int)], Y[train_idx_overall.astype(int)],
            X[test_idx_overall.astype(int)], Y[test_idx_overall.astype(int)],
            train_idx_overall, test_idx_overall)
    

train_x, train_y, test_x, test_y, train_idx, test_idx= train_test_split(X, Y, test_split)

In [33]:
train_x = np.load('./vulture_train_x.npy')
train_y = np.load('./vulture_train_y.npy')
test_x = np.load('./vulture_test_x.npy')
test_y = np.load('./vulture_test_y.npy')

In [34]:
train_set = acc_dataset(train_x, train_y, mean)
test_set = acc_dataset(test_x, test_y, mean)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=2, 
                                           sampler=ClassAwareSampler(data_source=train_set,
                                                                     num_classes=num_classes))

# train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=2)

In [35]:
class MyNet (nn.Module):
    
    def __init__ (self, num_classes, input_dim, drop_rate):
        super().__init__()
        
        self.input_dim = input_dim
        
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.fc3 = nn.Linear(self.input_dim*64, 1024)
        self.drop = nn.Dropout(p=drop_rate)
        self.fc4 = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))

        #self.pretrained_resnet.avgpool(x)
        x = x.view(-1, self.input_dim*64)
        
        x = F.relu(self.fc3(x))
        x = self.drop(x)
        x = self.fc4(x)

        return x

In [48]:
net = MyNet(num_classes=num_classes, input_dim=repeat_size, drop_rate=0.5).double()
net.to(device)

MyNet(
  (conv1): Conv1d(3, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (fc3): Linear(in_features=5248, out_features=1024, bias=True)
  (drop): Dropout(p=0.5)
  (fc4): Linear(in_features=1024, out_features=6, bias=True)
)

In [49]:
# Loss function
loss_function = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

# Decay LR by a factor of 0.1 every 30 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

In [50]:
step = 0

net.train()

for epoch in range(num_epochs):
    
    exp_lr_scheduler.step()
    
    for data in train_loader:
        
        x, y = data
        
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        
        logits = net(x)
        
        loss = loss_function(logits, y)
        
        loss.backward()
        
        optimizer.step()
        
        step += 1
        
        _, preds = torch.max(logits, 1)
        
        correct = (preds == y).sum().item()
        
        accuracy = correct/len(x)
        
        if step % display_step == 0:
            print('Epoch: %d, Step: %d, Minibatch loss: %.3f, Minibatch accuracy: %.3f' % 
                  (epoch, step, loss, accuracy))
            
print('Final, Step: %d, Minibatch loss: %.3f, Minibatch accuracy: %.3f' % 
                  (step, loss, accuracy))

Epoch: 19, Step: 100, Minibatch loss: 0.147, Minibatch accuracy: 0.945
Epoch: 39, Step: 200, Minibatch loss: 0.115, Minibatch accuracy: 0.961
Epoch: 59, Step: 300, Minibatch loss: 0.049, Minibatch accuracy: 0.992
Epoch: 79, Step: 400, Minibatch loss: 0.060, Minibatch accuracy: 0.984
Epoch: 99, Step: 500, Minibatch loss: 0.042, Minibatch accuracy: 0.992
Epoch: 119, Step: 600, Minibatch loss: 0.068, Minibatch accuracy: 0.961
Epoch: 139, Step: 700, Minibatch loss: 0.062, Minibatch accuracy: 0.984
Epoch: 159, Step: 800, Minibatch loss: 0.052, Minibatch accuracy: 0.992
Epoch: 179, Step: 900, Minibatch loss: 0.056, Minibatch accuracy: 0.969
Epoch: 199, Step: 1000, Minibatch loss: 0.055, Minibatch accuracy: 0.984
Final, Step: 1000, Minibatch loss: 0.055, Minibatch accuracy: 0.984


In [51]:
net.eval()

test_correct = 0

with torch.no_grad():
    
    for data in test_loader:

        x, y = data
        
        x, y = x.to(device), y.to(device)

        test_logits = net(x)
        _, test_preds = torch.max(test_logits, 1)

        test_correct_batch = (test_preds == y).sum().item()

        test_correct += test_correct_batch

print('Test accuracy: %.3f' % (test_correct / len(test_set)))

Test accuracy: 0.891


In [184]:
torch.save(net.state_dict(), os.path.join(LOG_DIR, 'model_%s_checkpoint.pth.tar' 
                                          % 'vulter_plain_ep_500_step_50_SGD_acc_91'))

In [None]:
train_x, train_y, test_x, test_y

In [128]:
np.save('./vulture_train_x.npy', train_x)
np.save('./vulture_train_y.npy', train_y)
np.save('./vulture_test_x.npy', test_x)
np.save('./vulture_test_y.npy', test_y)
np.save('./vulture_train_idx.npy', train_idx)
np.save('./vulture_test_idx.npy', test_idx)

In [53]:
# train_x = np.load('./vulture_train_x.npy')
# train_y = np.load('./vulture_train_y.npy')
# test_x = np.load('./vulture_test_x.npy')
# test_y = np.load('./vulture_test_y.npy')

In [42]:
a.shape

(442, 82, 3)