In [1]:
VERSION = "20200325"  #@param ["1.5" , "20200325", "nightly"]
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version $VERSION

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3727  100  3727    0     0  67763      0 --:--:-- --:--:-- --:--:-- 67763
Updating TPU and VM. This may take around 2 minutes.
Updating TPU runtime to pytorch-dev20200325 ...
Uninstalling torch-1.5.0a0+d6149a7:
  Successfully uninstalled torch-1.5.0a0+d6149a7
Uninstalling torchvision-0.6.0a0+3c254fb:
  Successfully uninstalled torchvision-0.6.0a0+3c254fb
Copying gs://tpu-pytorch/wheels/torch-nightly+20200325-cp36-cp36m-linux_x86_64.whl...
/ [1 files][ 83.4 MiB/ 83.4 MiB]                                                
Operation completed over 1 objects/83.4 MiB.                                     
Copying gs://tpu-pytorch/wheels/torch_xla-nightly+20200325-cp36-cp36m-linux_x86_64.whl...
- [1 files][114.5 MiB/114.5 MiB]                                                
Operation completed over 1 objects/114.5 MiB.               

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/My Drive/Colab

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab


In [None]:
import joblib, time, os, copy, datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.debug.metrics as met
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.utils.utils as xu

### Functions for Data Preparation

In [None]:
def flatten(ls):
     for item in ls:
            for child in list(item):
                yield child

def get_segment_positions(x):
    ps = x.segment.split()
    ls = []
    for i in range(len(ps)-1):
        if i == 0:
            ls.append([int(ps[i]), int(ps[i+1])])
        else:
            ls.append([int(ps[i])+1, int(ps[i+1])])        
    return ls

def get_segment_features(x):
    ls = []
    for rg in x.positions:
        ls.append(x.feature[rg[0]:rg[1]])            
    return ls

def splitDataFrameList(df,target_column):
    def splitListToRows(row,row_accumulator,target_column):
        split_row = row[target_column]
        for s in split_row:
            new_row = row.to_dict()
            new_row[target_column] = s
            row_accumulator.append(new_row)
    new_rows = []
    df.apply(splitListToRows,axis=1,args = (new_rows,target_column))
    new_df = pd.DataFrame(new_rows)
    return new_df
                
def get_train_data():
    training_segment = pd.read_csv('training_segment.txt', header=None, names = ['segment'])
    training_segment['feature'] = joblib.load('train_feature.joblib')
    training_segment['positions'] = training_segment.apply(lambda x: get_segment_positions(x), axis=1)
    training_segment['feature'] = training_segment.apply(lambda x: get_segment_features(x), axis=1)
    training_segment = splitDataFrameList(training_segment, 'feature')
    training_segment['label'] = list(flatten(joblib.load('train_label.joblib')))
    training_segment = training_segment.drop(['segment','positions'], axis = 1)
    # Further collapse the data from each segment to each single frame
    training_segment = splitDataFrameList(training_segment, 'feature')
    return training_segment

def get_test_data():
    test_segment = pd.read_csv('test_segment.txt', header=None, names = ['segment'])
    test_segment['feature'] = joblib.load('test_feature.joblib')
    test_segment['positions'] = test_segment.apply(lambda x: get_segment_positions(x), axis=1)
    test_segment['feature'] = test_segment.apply(lambda x: get_segment_features(x), axis=1)
    test_segment = splitDataFrameList(test_segment, 'feature')
    test_segment['ID'] = test_segment.index
    test_segment = test_segment.drop(['segment','positions'], axis = 1)
    # Further collapse the data from each segment to each single frame
    test_segment = splitDataFrameList(test_segment, 'feature')
    return test_segment

In [None]:
%%time
train_df = get_train_data()
print(train_df.feature[0].shape)
train_df

In [None]:
%%time
train_sample_ratio = 1
train_data = train_df.sample(frac=train_sample_ratio)
train_data['feature'] = train_data.apply(lambda x : x["feature"].view(1, 20, 20), axis = 1)
train_data['label'] = train_data.apply(lambda x : x["label"] - 1, axis = 1)
train_data

### Functions for Model Training

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        ms = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)           

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)            
            print('{} Loss: {:.4f}, {} Acc: {:.4f}'.format(phase, epoch_loss, phase, epoch_acc))
        
        time_taken = str(datetime.timedelta(seconds=time.time() - ms))
        print('time taken: {}'.format(time_taken))
            

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [8]:
%%time

# from models.VGGNet import VGGNet

def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    print("train model starts.....")
    
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        ms = time.time()
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            para_loader = pl.ParallelLoader(dataloaders[phase], [device])
            loader = para_loader.per_device_loader(device)
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in loader:
                # inputs = inputs.to(device)
                # labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        # optimizer.step()
                        xm.optimizer_step(optimizer)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)           

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            
            time_taken = str(datetime.timedelta(seconds=time.time() - ms))
            xm.master_print('{} Loss: {:.4f}, {} Acc: {:.4f}, time taken: {}'.format(phase, epoch_loss, phase, epoch_acc, time_taken))
            

    time_elapsed = time.time() - since
    xm.master_print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    xm.master_print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model, val_acc_history


CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 12.2 µs


### NN Model

In [None]:
class VGGNet(nn.Module):
    def __init__(self):
        super(VGGNet, self).__init__()
        self.conv11 = nn.Conv2d(1, 6, 3, stride=1, padding=2)
        self.conv12 = nn.Conv2d(6, 6, 3, stride=1, padding=2)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv21 = nn.Conv2d(6, 16, 3, stride=1, padding=2)
        self.conv22 = nn.Conv2d(16, 16, 3, stride=1, padding=2)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv31 = nn.Conv2d(16, 64, 3, stride=1, padding=2)
        self.conv32 = nn.Conv2d(64, 64, 3, stride=1, padding=2)
        self.conv33 = nn.Conv2d(64, 64, 3, stride=1, padding=2)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv41 = nn.Conv2d(64, 128, 3, stride=1, padding=2)
        self.conv42 = nn.Conv2d(128, 128, 3, stride=1, padding=2)
        self.conv43 = nn.Conv2d(128, 128, 3, stride=1, padding=2)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv51 = nn.Conv2d(128, 256, 3, stride=1, padding=2)
        self.conv52 = nn.Conv2d(256, 256, 3, stride=1, padding=2)
        self.conv53 = nn.Conv2d(256, 256, 3, stride=1, padding=2)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(256*6*6, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 47)

    def forward(self, x):
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = self.pool(x)

        x = F.relu(self.conv21(x))
        x = F.relu(self.conv22(x))
        x = self.pool(x)

        x = F.relu(self.conv31(x))
        x = F.relu(self.conv32(x))
        x = F.relu(self.conv33(x))
        x = self.pool(x)

        x = F.relu(self.conv41(x))
        x = F.relu(self.conv42(x))
        x = F.relu(self.conv43(x))
        x = self.pool(x)

        x = F.relu(self.conv51(x))
        x = F.relu(self.conv52(x))
        x = F.relu(self.conv53(x))
        x = self.pool(x)

        x = x.view(-1, 256*6*6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x)

### Setup for Training

In [None]:

train_feature = torch.stack(train_data['feature'].tolist())
train_label = torch.tensor(train_data['label'].values.astype(np.long))

train_dataset = torch.utils.data.TensorDataset(train_feature, train_label)

train_size = int(0.98 * len(train_data))
val_size = int(0.01 * len(train_data))
test_size = len(train_data) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size, test_size])

train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True)
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True)
test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, sampler=train_sampler, num_workers=4, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, sampler=val_sampler, num_workers=4, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, sampler=test_sampler, num_workers=4, drop_last=True)

dataloaders_dict = {'train':train_loader, 'val':val_loader}
print("len(train_loader.dataset) = ", len(train_loader.dataset))
print("len(val_loader.dataset) = ", len(val_loader.dataset))
print("len(test_loader.dataset) = ", len(test_loader.dataset))


In [10]:

num_classes = 47
batch_size = 256
num_epochs = 10
feature_extract = True
# num_epochs = 10


device = xm.xla_device()
model = VGGNet().to(device)
print(VGGNet())
# model_ft = MyNet().double()
# print(model_ft)
# model_ft = model_ft.to(device)

params_to_update = model.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model.named_parameters():
        if param.requires_grad == True:
            print("\t",name)


VGGNet(
  (conv11): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv12): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv21): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv22): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv31): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv32): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv33): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv41): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv42): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv43): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv51): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv52): Conv2d(256, 256, kernel_size=(3, 3)

In [12]:

# def _mp_fn(index):
torch.set_default_tensor_type('torch.FloatTensor')
optimizer_ft = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()
model_ft, hist = train_model(model, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=False)

# xmp.spawn(_mp_fn, nprocs=, start_method='fork')

VGGNet(
  (conv11): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv12): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv21): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv22): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv31): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv32): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv33): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv41): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv42): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv43): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv51): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv52): Conv2d(256, 256, kernel_size=(3, 3)



train Loss: 3.2577, train Acc: 0.1267, time taken: 0:06:16.408039
val Loss: 3.2176, val Acc: 0.1227, time taken: 0:06:25.396365
Epoch 1/9
----------
train Loss: 3.2238, train Acc: 0.1275, time taken: 0:06:12.062088
val Loss: 3.2175, val Acc: 0.1227, time taken: 0:06:15.858170
Epoch 2/9
----------
train Loss: 3.2238, train Acc: 0.1275, time taken: 0:06:08.373956
val Loss: 3.2175, val Acc: 0.1227, time taken: 0:06:12.149060
Epoch 3/9
----------
train Loss: 3.2237, train Acc: 0.1275, time taken: 0:06:09.385173
val Loss: 3.2174, val Acc: 0.1227, time taken: 0:06:13.191269
Epoch 4/9
----------
train Loss: 3.2237, train Acc: 0.1275, time taken: 0:06:09.395568
val Loss: 3.2174, val Acc: 0.1227, time taken: 0:06:13.229347
Epoch 5/9
----------
train Loss: 3.2236, train Acc: 0.1275, time taken: 0:06:09.974035
val Loss: 3.2172, val Acc: 0.1227, time taken: 0:06:13.806440
Epoch 6/9
----------
train Loss: 3.2208, train Acc: 0.1275, time taken: 0:06:08.456035
val Loss: 3.1963, val Acc: 0.1227, time 

In [13]:
%%time
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data[0].to(device), data[1].to(device)
        outputs = model_ft(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of test data: {0}'.format(100 * correct / total))



Accuracy of test data: 44.64738175675676
CPU times: user 44.4 s, sys: 2.46 s, total: 46.8 s
Wall time: 6.7 s


### Save and Load Model

In [None]:
%%time

## Save the model. Run this if need to save.
model_name = "model_"  + str(train_sample_ratio) + ".model"
torch.save(model_ft.state_dict(), model_name)
# upload_files([model_name])

In [11]:
%%time

## Load the model from previously saved.
model_name = "model_1"  +  ".model"
model = VGGNet().double().to(device)
model.load_state_dict(torch.load(model_name, map_location=device)) 

CPU times: user 9.24 s, sys: 228 ms, total: 9.47 s
Wall time: 730 ms


### Apply on Test dataset

In [17]:
print(model)

VGGNet(
  (conv11): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv12): Conv2d(6, 6, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv21): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv22): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv31): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv32): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv33): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv41): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv42): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv43): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv51): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (conv52): Conv2d(256, 256, kernel_size=(3, 3)

In [None]:
%%time

test_df = get_test_data()
# test_data = test_df.sample(frac = 1.0)
test_df['feature'] = test_df.apply(lambda x : x["feature"].view(1, 20, 20), axis = 1)
test_feature = torch.stack(test_df['feature'].tolist())
test_dataset = torch.utils.data.TensorDataset(test_feature)
predict_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False)

result = []
with torch.no_grad():
  para_loader = pl.ParallelLoader(predict_loader, [device])
  loader = para_loader.per_device_loader(device)
  for data in loader:
    ouputs = model_ft(data)
    _, predicted = torch.max(outputs.data, 1)
    result = np.append(result, predicted)

test_df['label'] = np.transpose(result).astype('int') 
test_df



In [None]:
%%time
test_result = pd.DataFrame(result.reshape(-1,47))
test_result['ID'] = test_data['ID'].values
test_data_mean = test_result.groupby('ID')[list(range(47))].agg(['mean'])
test_data_mean.columns = list(range(47))
test_data_mean["Category"] = test_data_mean.idxmax(axis = 1)
test_data_mean['Category'] = test_data_mean.apply(lambda x : x["Category"] + 1, axis = 1).astype('int') 
test_data_mean["ID"] = test_data_mean.index
test_data_group = test_data_mean[['ID','Category']]
print(test_data_group.info())

In [None]:
submission_name = "submission_"  + str(train_sample_ratio) + "_mean.csv"
test_data_group.to_csv(submission_name, index=False)