In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pytorchvideo

Collecting pytorchvideo
  Downloading pytorchvideo-0.1.3.tar.gz (128 kB)
[K     |████████████████████████████████| 128 kB 5.2 MB/s 
[?25hCollecting fvcore
  Downloading fvcore-0.1.5.post20211023.tar.gz (49 kB)
[K     |████████████████████████████████| 49 kB 6.0 MB/s 
[?25hCollecting av
  Downloading av-8.0.3-cp37-cp37m-manylinux2010_x86_64.whl (37.2 MB)
[K     |████████████████████████████████| 37.2 MB 32 kB/s 
[?25hCollecting parameterized
  Downloading parameterized-0.8.1-py2.py3-none-any.whl (26 kB)
Collecting iopath
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 47.7 MB/s 
Collecting portalocker
  Downloading portalocker-2.3.2-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: pyto

In [None]:
import os
print(os.getcwd())
os.chdir('/content/drive/My Drive/Google Colab/3DCNN')
print(os.getcwd())

/content
/content/drive/My Drive/Google Colab/3DCNN


In [None]:
import random
from glob import glob

from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.utils.data as data
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import CenterCropVideo, NormalizeVideo
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale, UniformTemporalSubsample

from models import generate_model
from circle_loss import convert_label_to_similarity, CircleLoss

In [None]:
def load_net(path):
    trained_weights = torch.load(path, map_location={'cuda': 'cpu'})
    model = generate_model(18)
    model.fc = nn.Linear(512, 700)
    model.load_state_dict(trained_weights['state_dict'])
    net = nn.Sequential(*list(model.children())[:-2]) #avgpool3d入れない
    
    return net

In [None]:
# n = load_net('r3d18_K_200ep.pth')
# i = torch.zeros(1,3,64,128,128)


In [None]:
def make_data_path():
    data_path = []
    for dir_path in glob('./HandWashDataset/*'):
        data_path += glob(os.path.join(dir_path, '*.mp4'))
    print('DATA NUM: ', len(data_path))
    
    return data_path

In [None]:
def data_split(data, train_rate=0.7, val_rate=0.2):
    data_num = len(data)
    train_num = int(data_num * train_rate)
    val_num = int(data_num * val_rate)
    return data[:train_num], data[train_num:train_num+val_num], data[train_num+val_num:] 

In [None]:
class LoadDataset(data.Dataset):
    def __init__(self, data_path, transform, video_time):
        self.data_path = data_path
        self.transform = transform
        self.video_time = video_time
    
    def __len__(self):
        return len(self.data_path)

    def __getitem__(self, index):
        video = EncodedVideo.from_path(self.data_path[index])
        video_data = video.get_clip(start_sec=0, end_sec=self.video_time)
        video_data = self.transform(video_data)
        label = int(self.data_path[index][-11:-9])-1

        return video_data['video'], label

In [None]:
def make_dataloader():
    data_path = make_data_path()
    random.shuffle(data_path)
    train_data_path, val_data_path, test_data_path = data_split(data_path)

    num_frames = 64
    mean = [0.45, 0.45, 0.45]
    std = [0.225, 0.225, 0.225]
    side_size = 256
    crop_size = 256
    sampling_rate = 32
    frames_per_second = 30
    video_time = (num_frames * sampling_rate) / frames_per_second
    transform=ApplyTransformToKey(
        key='video',
        transform=Compose([
                UniformTemporalSubsample(num_frames),
                Lambda(lambda x: x/255.0),
                NormalizeVideo(mean, std),
                ShortSideScale(size=side_size),
                CenterCropVideo(crop_size=(crop_size, crop_size))
        ])
    )

    train_dataset = LoadDataset(train_data_path, transform, video_time)
    val_dataset = LoadDataset(val_data_path, transform, video_time)
    test_dataset = LoadDataset(test_data_path, transform, video_time)

    batch_size = 1
    train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    val_dataloader = data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
    test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    return {'train': train_dataloader, 'val': val_dataloader, 'test': test_dataloader}

In [None]:
d = make_dataloader()

DATA NUM:  300


In [None]:
t = torch.zeros(3,64,256,256)
for v, l in d['train']:
    t = v[0]
    break

In [None]:
def train(net, dataloader, criterion, optimizer, epochs):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print('train on ', device)

    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net) # make parallel
        torch.backends.cudnn.benchmark = True

    criterion = criterion.to(device)
    
    loss_dic = {'train': [], 'val': []}
    min_val_loss = -1.0

    for epoch in range(epochs):
        print('-------------')
        print('Epoch {}/{}'.format(epoch+1, epochs))
        
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
            else:
                net.eval()
            
            epoch_loss = 0.0
            
            for videos, labels in tqdm(dataloader[phase]):
                videos = videos.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(videos)

                    loss = criterion(*convert_label_to_similarity(outputs, labels))

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                epoch_loss += loss.item() * videos.size(0)

            epoch_loss = epoch_loss / len(dataloader[phase].dataset)
            loss_dic[phase].append(epoch_loss)
            print(f'{phase} loss: {epoch_loss}')

            if phase == 'val':
                if epoch == 0:
                    min_val_loss = epoch_loss
                
                if epoch_loss < min_val_loss:
                    min_val_loss = epoch_loss
                    save_path = './3DResNet.pth'
                    torch.save(net.state_dict(), save_path)
                    print('::::: model is saved :::::')

    return loss_dic

In [None]:
def plot_history_loss(loss):
    plt.plot(loss['train'], label="loss for training")
    plt.plot(loss['val'], label="loss for validation")
    
    plt.title('model loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='best')
    
    plt.show()

In [None]:
def main():
    dataloader = make_dataloader()
    
    model_path = 'r3d18_K_200ep.pth'
    net = load_net(model_path)
    
    criterion = CircleLoss(m=0.25, gamma=80)

    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-5)

    epochs = 50
    loss = train(net, dataloader, criterion, optimizer, epochs)
    
    plot_history_loss(loss)

In [None]:
if __name__=='__main__':
    torch.manual_seed(1234)
    # np.random.seed(1234)
    random.seed(1234)
    

    main()

DATA NUM:  300


NameError: ignored

In [None]:
import os
from typing import Tuple
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold
 
import torch
from torch import nn, Tensor
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.optim.lr_scheduler as lr_scheduler
from torchvision.transforms import ToTensor
def convert_label_to_similarity(normed_feature: Tensor, label: Tensor) -> Tuple[Tensor, Tensor]:
    similarity_matrix = normed_feature @ normed_feature.transpose(1, 0)
    label_matrix = label.unsqueeze(1) == label.unsqueeze(0)
 
    positive_matrix = label_matrix.triu(diagonal=1)
    negative_matrix = label_matrix.logical_not().triu(diagonal=1)
 
    similarity_matrix = similarity_matrix.view(-1)
    positive_matrix = positive_matrix.view(-1)
    negative_matrix = negative_matrix.view(-1)
    return similarity_matrix[positive_matrix], similarity_matrix[negative_matrix]
 
 
class CircleLoss(nn.Module):
    def __init__(self, m: float, gamma: float) -> None:
        super(CircleLoss, self).__init__()
        self.m = m
        self.gamma = gamma
        self.soft_plus = nn.Softplus()
 
    def forward(self, sp: Tensor, sn: Tensor) -> Tensor:
        ap = torch.clamp_min(- sp.detach() + 1 + self.m, min=0.)
        an = torch.clamp_min(sn.detach() + self.m, min=0.)
 
        delta_p = 1 - self.m
        delta_n = self.m
 
        logit_p = - ap * (sp - delta_p) * self.gamma
        logit_n = an * (sn - delta_n) * self.gamma
 
        loss = self.soft_plus(torch.logsumexp(logit_n, dim=0) + torch.logsumexp(logit_p, dim=0))
 
        return loss

class Model(nn.Module):
    def __init__(self) -> None:
        super(Model, self).__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=5),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
        )
 
    def forward(self, input: Tensor) -> Tensor:
        t = self.feature_extractor(input)
        print('feature ', t.shape)
        avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
        feature = t.mean(dim=[2, 3])
        print('mean ', feature.shape)
        avg = avgpool(t)
        print('avg pool ', avg.shape)
        print('== ' , avg == feature)
        return nn.functional.normalize(feature)

In [None]:
i = torch.zeros(1,1,256,256)
m = Model()

In [None]:
m = m.to('cpu')
m.zero_grad()
p = m(i)

feature  torch.Size([1, 32, 29, 29])
mean  torch.Size([1, 32])
avg pool  torch.Size([1, 1, 1, 1])
==  tensor([[[[False, False, False, False, False, False, False, False, False, False,
           False, False, False, False, False, False, False, False, False, False,
           False, False, False, False, False, False, False, False, False, False,
           False, False]]]])


In [None]:
p.shape

torch.Size([1, 32])