In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
import metrics as metric
from warnings import filterwarnings
import pdb
import os 
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision.models import *
import torch
import torchvision
from tqdm import tqdm

import torch.nn as nn
import time
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, recall_score

In [2]:
label_options = ['arousal', 'valence']
partition_info = pd.read_csv('/media/sven/New Volume/features/meta/processed_tasks/metadata/partition.csv')
classes = [0, 1, 2]
feature_set = 'egemaps'


feat_conf = {'egemaps': (88, 1, ',', 'infer'),
             'deepspectrum': (4096, 1, ',', 'infer'),
             'vggface': (512, 1, ',', 'infer'),
             'fasttext': (300, 1, ',', 'infer'),
             'xception': (2048, 1, ',', 'infer'),
             'openpose': (54, 1, ',', 'infer')
            }
num_feat = feat_conf[feature_set][0]
ind_off  = feat_conf[feature_set][1]
sep      = feat_conf[feature_set][2]
header   = feat_conf[feature_set][3]                                 

In [3]:
def pad_if_need(paths, size):
    diff = len(paths) - size
    if diff < 0:
        if abs(diff) > len(paths): # lon hon nhieu hon 2 lan so voi paths nen co the lap lai path
            up_sampling = paths[np.random.choice(paths.shape[0], abs(diff), replace=True)]
        else: # lon hon khong qua 2 lan so voi paths nen de khong lap lai path
            up_sampling = paths[np.random.choice(paths.shape[0], abs(diff), replace=False)]
        paths = np.concatenate([paths, up_sampling])
    return paths

def pooling_segment(video_features: np.array, stride=2, pool_out=16):
    if len(video_features) < pool_out:
        video_features = pad_if_need(video_features, pool_out)
        return [video_features]
    else:
        ret = []
        i = 0

        while (i < int(len(video_features)) - pool_out):
            ret.append(video_features[i:i + pool_out])
            i += stride
        return ret

In [4]:
def pooling_segments(feature_df: pd.DataFrame, label_df:pd.DataFrame, stride=2, pool_out=16):
    col_features = [col for col in feature_df.columns if not col in ['timestamp', 'segment_id']]
    all_segments = feature_df['segment_id'].unique()
    all_segment_features = []
    all_segment_labels = []
    all_segment_ids = []
    for segment in all_segments:
        segment_df = feature_df[feature_df['segment_id'] == segment]
        segment_features = segment_df[col_features].values
        label = label_df[label_df['segment_id'] == segment]['class_id'].values
        pooled_feature = pooling_segment(segment_features, stride, pool_out)
        pooled_label = [label] * len(pooled_feature)
        segment = [segment] * len(pooled_feature)
        all_segment_features += pooled_feature
        all_segment_labels += pooled_label
        all_segment_ids += segment
    all_segment_features = np.asarray(all_segment_features)
    all_segment_labels = np.asarray(all_segment_labels)
    all_segment_ids = np.asarray(all_segment_ids)
    return all_segment_features, all_segment_labels, all_segment_ids
   

In [48]:
def prepare_data():
    for label in ['arousal']:
        if not os.path.exists('./data_csv/'+label):
            os.makedirs('./data_csv/'+label)
        train_lab, train_feat, train_id, devel_lab, devel_feat, devel_id, test_lab, test_feat , test_id= [], [], [], [], [], [], [] ,[], []

        feature_folder = '/media/sven/New Volume/features/c2_muse_topic/feature_segments/egemaps_aligned/'
        label_folder = '/media/sven/New Volume/features/c2_muse_topic/label_segments/' + label + '/'

        print('\n ' + feature_set + ': ' + label)

        print('\n Preparing Partitions')
        for index, row in tqdm(partition_info.iterrows()):
            filename_id = str(row['Id']) + '.csv'
            row_partition = row['Proposal']

            label_df = pd.read_csv(label_folder + filename_id, index_col=None, dtype=np.float64)
            feature_df = pd.read_csv(feature_folder + feature_set + '/' + filename_id, index_col=None, dtype=np.float64)
            features, labels, id_segment = pooling_segments(feature_df, label_df)

            if row_partition == 'train':
                train_lab.append(labels)
                train_feat.append(features)
                train_id.append(id_segment)
            if row_partition == 'devel':    
                devel_lab.append(labels)
                devel_feat.append(features)
                devel_id.append(id_segment)
            if row_partition == 'test':
                test_lab.append(labels)
                test_feat.append(features)
                test_id.append(id_segment)
            
            
            list_value = [train_lab, train_feat, train_id, devel_lab, devel_feat, devel_id, test_lab, test_feat , test_id]
            list_key = ['train_lab', 'train_feat', 'train_id', 'devel_lab', 'devel_feat', 'devel_id', 'test_lab', 'test_feat' , 'test_id']
            dict_feature = dict(zip(list_key, list_value))
        for key, item in dict_feature.items():
            item = np.asarray(item)
            dict_feature[key] = np.concatenate(item, axis =0)
    return dict_feature
            
            
        

In [52]:
dict_feature = prepare_data()

3it [00:00, 19.89it/s]


 egemaps: arousal

 Preparing Partitions


295it [00:13, 22.29it/s]


In [54]:
list_feature['train_lab'].shape

(137646, 1)

In [39]:
train_feat = np.concatenate(train_feat, axis=0)
train_lab = np.concatenate(train_lab, axis=0)
train_id = np.concatenate(train_id, axis=0)
devel_lab = np.concatenate(devel_lab, axis=0)
devel_feat = np.concatenate(devel_feat, axis=0)
devel_id = np.concatenate(devel_id, axis=0)
test_lab = np.concatenate(test_lab, axis=0)
test_feat = np.concatenate(test_feat, axis=0)
test_id = np.concatenate(test_id, axis=0)

In [8]:
class Muse_Dataset(Dataset):
    def __init__(self, features, labels):
        super().__init__()
        self.features = torch.from_numpy(features)
        self.labels = torch.from_numpy(labels)
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        feature = self.features[index]
        label = self.labels[index]
        return feature, label

In [9]:
torch.manual_seed(0)

learning_rate = 0.01
batch_size = 256
num_epochs = 10
num_classes = 3
device = "cuda:0"
feature_set = 'egemaps'

In [10]:
train_dataset = Muse_Dataset(train_feat, train_lab)
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size,
                          num_workers=4,
                          shuffle=True)

valid_dataset = Muse_Dataset(devel_feat, devel_lab)
valid_loader = DataLoader(dataset=valid_dataset, 
                          batch_size=batch_size,
                          num_workers=4,
                          shuffle=False)

In [11]:
class Wave_Block(nn.Module):

    def __init__(self, in_channels, out_channels, dilation_rates, kernel_size):
        super(Wave_Block, self).__init__()
        self.num_rates = dilation_rates
        self.convs = nn.ModuleList()
        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()

        self.convs.append(nn.Conv1d(in_channels, out_channels, kernel_size=1))
        dilation_rates = [2 ** i for i in range(dilation_rates)]
        for dilation_rate in dilation_rates:
            self.filter_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate))
            self.gate_convs.append(
                nn.Conv1d(out_channels, out_channels, kernel_size=kernel_size, padding=int((dilation_rate*(kernel_size-1))/2), dilation=dilation_rate))
            self.convs.append(nn.Conv1d(out_channels, out_channels, kernel_size=1))
        
    def forward(self, x):
        x = self.convs[0](x)
        res = x
        for i in range(self.num_rates):
            x = torch.tanh(self.filter_convs[i](x)) * torch.sigmoid(self.gate_convs[i](x))
            x = self.convs[i + 1](x)
            res = res + x
        return res
# detail 
class wavenet(nn.Module):
    def __init__(self, inch=16, kernel_size=3, num_classes =3):
        super().__init__()

        self.wave_block1 = Wave_Block(inch, 16, 4, kernel_size)
        self.wave_block2 = Wave_Block(16, 32, 4, kernel_size)
        self.wave_block3 = Wave_Block(32, 64, 2, kernel_size)
        self.wave_block4 = Wave_Block(64, 128, 1, kernel_size)
        self.pool = nn.AdaptiveAvgPool1d(1)
        
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.wave_block1(x)
        x = self.wave_block2(x)
        x = self.wave_block3(x)
        x = self.wave_block4(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [12]:
model = wavenet()
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [13]:
def f1(y_true, y_pred):
    return round(f1_score(y_true, y_pred, average='micro') * 100, 3)


def uar(y_true, y_pred):
    return round(recall_score(y_true, y_pred, average='macro') * 100, 3)


def eval_metric(predicts, targets, partition_name):
    results = {}
    results['f1'] = f1(targets, predicts)
    results['uar'] = uar(targets, predicts)
    results['combine'] = round((0.66 * results['f1'] + 0.34 * results['uar']), 3)
    print(f'Results in {partition_name}:\n')
    print("  - f1: ", results['f1'])
    print("  - uar: ", results['uar'])
    print("  - combined:", results['combine'])

In [14]:
model.to(device)
print('done')

done


In [15]:
start_time = time.time()

cost_list=[]
schedule = [
    (0, 10, 1e-4),
    (10, 20, 1e-5),
    (20, 30, 1e-6)]

for start, end, lr in schedule:

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    for epoch in range(start, end):
        model.train()
        for batch_idx, (features, targets) in enumerate(train_loader):

            features = features.to(device).float()
            targets = targets.to(device).long()
            targets = targets.squeeze(1)

            ### FORWARD AND BACK PROP
            out = model(features)
            
            loss = criterion(out, targets)
  
            optimizer.zero_grad()

            loss.backward()

            ### UPDATE MODEL PARAMETERS
            optimizer.step()

            #################################################
            ### CODE ONLY FOR LOGGING BEYOND THIS POINT
            ################################################
            cost_list.append(loss.item())
            if  not batch_idx % 20:
                print (f'Epoch: {epoch+1:03d}/{end:03d} | '
                       f'Batch {batch_idx:03d}/{len(train_loader):03d} |' 
                       f' Cost: {loss:.4f}')

        model.eval()
        with torch.no_grad(): 
            pred=[]
            targ=[]
            for batch_idx, (features, targets) in enumerate(valid_loader):
                features = features.to(device).float()
                targets = targets.long()
                out = model(features)
                _, predicts = torch.max(out, 1)
                predicts = predicts.cpu().detach().numpy()
                targets = targets.cpu().detach().numpy()
                pred.append(predicts)
                targ.append(np.concatenate(targets))
            targ = np.concatenate(targ)
            pred = np.concatenate(pred)
            eval_metric(pred, targ, 'arousal')
        elapsed = (time.time() - start_time)/60
        print(f'Time elapsed: {elapsed:.2f} min')

    elapsed = (time.time() - start_time)/60
    print(f'Total Training Time: {elapsed:.2f} min')

Epoch: 001/010 | Batch 000/538 | Cost: 4.9995
Epoch: 001/010 | Batch 020/538 | Cost: 1.4185
Epoch: 001/010 | Batch 040/538 | Cost: 1.1394
Epoch: 001/010 | Batch 060/538 | Cost: 1.1009
Epoch: 001/010 | Batch 080/538 | Cost: 1.1289
Epoch: 001/010 | Batch 100/538 | Cost: 1.1450
Epoch: 001/010 | Batch 120/538 | Cost: 1.2835
Epoch: 001/010 | Batch 140/538 | Cost: 1.1172
Epoch: 001/010 | Batch 160/538 | Cost: 1.0844
Epoch: 001/010 | Batch 180/538 | Cost: 1.1102
Epoch: 001/010 | Batch 200/538 | Cost: 1.1039
Epoch: 001/010 | Batch 220/538 | Cost: 1.1172
Epoch: 001/010 | Batch 240/538 | Cost: 1.1284
Epoch: 001/010 | Batch 260/538 | Cost: 1.1183
Epoch: 001/010 | Batch 280/538 | Cost: 1.2351
Epoch: 001/010 | Batch 300/538 | Cost: 1.1250
Epoch: 001/010 | Batch 320/538 | Cost: 1.2180
Epoch: 001/010 | Batch 340/538 | Cost: 1.1065
Epoch: 001/010 | Batch 360/538 | Cost: 1.3170
Epoch: 001/010 | Batch 380/538 | Cost: 1.0850
Epoch: 001/010 | Batch 400/538 | Cost: 1.0982
Epoch: 001/010 | Batch 420/538 | C

In [17]:
targ

array([1, 1, 1, ..., 0, 0, 0])