In [None]:
import torch
import config
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as TF

from net import Net
from enum import Enum
from datetime import datetime
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from landmarksDataset import LandmarksDataset

In [None]:
class Modes(Enum):
    TRAIN = 'train'
    TEST = 'test'

In [None]:
def normalizeData(data):
    min_history = []
    max_history = []

    for i in range(data.shape[1]):
        # Compute standard deviation
        min_val = torch.min(data[:, i])
        max_val = torch.max(data[:, i])

        denominator = max_val - min_val
        denominator = 0.001 if denominator == 0 else denominator

        # Save the min and max history for denormalize later
        min_history.append(min_val)
        max_history.append(max_val)

        # Normalize the data
        data[:, i] = (data[:, i] - min_val) / denominator
        
    history = (min_history, max_history)
    return data, history

In [None]:
def getDataFromCSV(mode, file, start, end):
    return pd.read_csv(f'{config.samples_path}\\{mode}\\{file}',
                               usecols = range(start, end))

In [None]:
def getData(mode):
    # Get data from csv files
    x = getDataFromCSV(mode, config.x_data_file, 
                        config.x_cols_start_index, config.x_cols_end_index)
                        
    y = getDataFromCSV(mode, config.y_data_file, 
                        config.y_cols_start_index, config.y_cols_end_index)

    # Transforms the data to tensors
    x_tensor = torch.tensor(x.values, requires_grad=True).float()
    y_tensor = torch.tensor(y.values, requires_grad=True).float()

    # Normallize the data
    norm_x_tensor, _ = normalizeData(x_tensor)
    norm_y_tensor, history = normalizeData(y_tensor)

    return norm_x_tensor, norm_y_tensor, history

In [None]:
def getDataset(mode):
    x, y, history = getData(mode)
    return LandmarksDataset(x, y), history

In [None]:
def train(epochs, train_loader, net, optimizer, criterion, log_interval):
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            net_out = net(data)
            loss = criterion(net_out, target)
            loss.backward()
            optimizer.step()
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader), loss.item()))
                
    return net

In [None]:
def test(test_loader, net, criterion):
    test_loss = 0
    # correct = 0
    results = torch.tensor([]).float()

    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        net_out = net(data)
        results = torch.cat((results, net_out))
        # sum up batch loss
        test_loss += criterion(net_out, target).item()
        # pred = net_out.data.max(1)[1]  # get the index of the max log-probability
        # correct += pred.eq(target.data).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}'.format(
        test_loss))

    return results, test_loss

In [None]:
def getFeatureSize(data, target):
    # Convert tensors to numpy
    data, target = data.detach().numpy(), target.detach().numpy()
    return data.shape[0], target.shape[0]

In [None]:
def denormalizeData(data, min_history, max_history):
    for i in range(data.shape[1]):
        min_val, max_val = min_history[i], max_history[i]
        data[:, i] = (data[:, i] * (max_val - min_val)) + min_val
    
    return data

In [None]:
def generateClipAndFrameCols(results):
    clips = []
    frames = []
    loop_num = results.shape[0] // config.frame_num
    remaining_div = results.shape[0] % config.frame_num

    for i in range(loop_num):
        # clips = np.concatenate((clips, np.full((1, config.frame_num), str(i))), axis=None)
        clips = np.concatenate((clips, list(str(i) for i in range(config.frame_num))), axis=None)
        str_num_range = [*map(str, range(config.frame_num))]
        frames = np.concatenate((frames, str_num_range), axis=None)

    # Add the remaining rows, if the csv file not contains exactly rows num that divide by frame_num   
    if (remaining_div != 0):
        # clips = np.concatenate((clips, np.full((1, remaining_div), str(loop_num))), axis=None)
        clips = np.concatenate((clips, list(str(i) for i in range(remaining_div))), axis=None)
        str_num_range = [*map(str, range(remaining_div))]
        frames = np.concatenate((frames, str_num_range), axis=None)
    
    # Insert the clip & frame data to the results
    results = np.insert(results, 0, clips, axis=1)
    results = np.insert(results, 1, frames, axis=1)

    return results

In [None]:
def saveResultsToOutputFile(mode, filename, results):
    # Create the output csv columns
    blend_cols = []
    clip_and_frame_cols = ['clip', 'frame']
    quat_cols = ['Quaternion_x', 'Quaternion_y', 'Quaternion_z', 'Quaternion_w']
    for i in range(config.blend_range[0], config.blend_range[1]):
        blend_cols.append('Blendshape_{0}'.format(i))
    output_cols = clip_and_frame_cols + quat_cols + blend_cols
    
    # Convert the results to from tensor to numpy 
    results = results.detach().numpy()

    # Generate and insert clip & frame columns
    results = generateClipAndFrameCols(results)

    # Convert the results to data frame
    results = pd.DataFrame(results, columns=output_cols)

    # Convert columns data type from float to int
    results['clip'] = results['clip'].astype(int)
    results['frame'] = results['frame'].astype(int)
    
    # Assign values outside boundary to boundary values
    results.loc[:, quat_cols] = results.loc[:, quat_cols].clip(config.quat_domain[0], config.quat_domain[1])
    results.loc[:, blend_cols] = results.loc[:, blend_cols].clip(config.blend_domain[0], config.blend_domain[1])

    # Save the results to the output csv file
    file_path = f'{config.samples_path}\\{mode}\\{filename}'
    results.to_csv(file_path, index=False)
    print(f'[{datetime.now()}] Successfully saved the results to {file_path}')

In [None]:
def create_nn(batch_size=50, learning_rate=0.001, epochs=10,
              log_interval=10):
    
    # Create the train data loader
    train_dataset, _ = getDataset(Modes.TRAIN.value)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size,
        shuffle=True)

    # Create the test data loader
    test_dataset, history = getDataset(Modes.TEST.value)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size,
        shuffle=False)

    # Extract the input and output num of features
    in_num_of_features, out_num_of_features = getFeatureSize(*train_dataset[0])

    # Create the net
    net = Net(in_num_of_features, out_num_of_features)

    # Create an optimizer
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

    # Create a loss function
    criterion = nn.MSELoss(reduction='mean')
    # criterion = nn.L1Loss(reduction='mean')

    # Run the main training loop
    net = train(epochs, train_loader, net, optimizer, criterion, log_interval)
                
    # Run a test loop
    results, test_loss = test(test_loader, net, criterion)
    
    # De-normalize the data to the original domains
    results = denormalizeData(results, *history)
    
    # Save the results to output file
    saveResultsToOutputFile(Modes.TEST.value, config.output_filename, results)

In [None]:
if __name__ == "__main__":
    create_nn()