**Dependancies**

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os, sys, random, argparse, time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
#No module named keras OR cannot import name 'np_utils' if tensorflow.keras
#from keras.utils import np_utils
#from keras.models import load_model
#from keras.models import model_from_json

**Determine Whether Train or Sample** 

Run below block in Jupyter Notebook

In [93]:
class Args():
    def __init__(self):
        self.train = True
        self.stateful = False
args = Args()

Do NOT run this in Jupyter Notebook

In [None]:
parser = argparse.ArgumentParser()

parser.add_argument('-train', action="store_true",
                   help='True: Train on dataset, False: Sample with trained model')
parser.add_argument('-stateful', action="store_true",
                   help='True: Remeber previous state during training, False: Feed forward model')

args = parser.parse_args()

**Variables**

In [80]:
# For local run only
csv_data_dir = "../../../data/CSV/Raw"
np_data_dir = "../../../data/Numpy"
save_dir = "../../../logs"
dances = []
BATCH_SIZE = 20
STEPS = 1
N_TIMESTEPS = 20 # Sequence length
N_EPOCHS = 15

In [15]:
# For server run
csv_data_dir = "/Akamai/MLDance/data/CSV/Raw"
np_data_dir = "/Akamai/MLDance/data/Numpy"
save_dir = "/Akamai/MLDance/logs"
dances = []
BATCH_SIZE = 1
N_TIMESTEPS = 20 # Sequence length
N_EPOCHS = 15
N_NODES = 256

**Pull Names of Dance Data**

In [7]:
def getFileNames():
    filenames = [f for f in os.listdir(csv_data_dir) if f.endswith('.csv')]
    for file in enumerate(filenames): #enumerating creates an array where 0 corresponds to the index of the file in filenames and 1 corresponds to the filename
        filenames[file[0]] = '_'.join(file[1].split("_")[:-1])
    return set(filenames)

**Pre-Process Data**

In [58]:
def pre_process_data(filename):
    filename = os.path.join(csv_data_dir, filename)
    print(filename)
    
    pos_data = pd.read_csv(filename+"_worldpos.csv")
    rot_data = pd.read_csv(filename+"_rotations.csv")
    data = rot_data.copy()

    #standardize rotation (force values from -1 to 1)
    data = data/180.0

    #Add the root (hip) position data for spacial movement to the rotational data
    data['Hips.Pos.X'] = pos_data.pop('Hips.X')
    data['Hips.Pos.Y'] = pos_data.pop('Hips.Y')
    data['Hips.Pos.Z'] = pos_data.pop('Hips.Z')

    #Normalize the starting positions of the given dance
    #Making movement relative to an origin of 0,0,0 for consistancy within different dances
    data['Hips.Pos.X'] = data['Hips.Pos.X'] + (-1*data['Hips.Pos.X'][0])
    data['Hips.Pos.Y'] = data['Hips.Pos.Y'] + (-1*data['Hips.Pos.Y'][0])
    data['Hips.Pos.Z'] = data['Hips.Pos.Z'] + (-1*data['Hips.Pos.Z'][0])
    
    #Remove the all the columns were it's all zeroed (End ones)
    zeroed_columns = [column for column in data.columns if 'End' in column]
    for column in zeroed_columns:
        data.pop(column)

    #remove the time variable from the dataset
    time = data.pop('Time') #maybe change to time change value instead? To indicate speed
    data.head()

    return data

**Load Data and Separate Into Samples**

Generate the sequences

In [None]:
def sequence_by_frame(dataX, dataY, i):
    seqIn = data[i: i+N_TIMESTEPS]
    seqOut = data[i+N_TIMESTEPS]
    dataX.append(seqIn)
    dataY.append(seqOut)
        
def sequence_by_frames(dataX, dataY, i):
    seqIn = data[i: i+N_TIMESTEPS]
    seqOut = data[i+N_TIMESTEPS : i+N_TIMESTEPS+1]
    dataX.append(seqIn)
    dataY.append(seqOut)

In [69]:
def get_sample_data(filename):
    #Establish filenames (X is for input, Y is for expected output)
    loadedX = os.path.join(np_data_dir, filename+"X-"+str(N_TIMESTEPS))
    loadedY = os.path.join(np_data_dir, filename+"Y-"+str(N_TIMESTEPS))
    
    #If the corresponding numpy file doesn't yet exist, create and save it
    if not (os.path.exists(loadedX+".npy") and os.path.exists(loadedY+".npy")):
        #Print statement for status update
        print("create")
        #load the csv file and establish the number of rows and columns
        data = pre_process_data(os.path.join(csv_data_dir, filename))
        N_ROWS = data.values.shape[0]
        N_COLOMNS = data.values.shape[1]

        data = data.iloc[:].values #Enables selection/edit of cells in the dataset
        dataX = []
        dataY = []
        
        #Generate the sequences
        for i in range(0, N_ROWS - N_TIMESTEPS, STEPS): #range(start, stop, step) 
            sequence_by_frames(dataX, dataY, i)

        #X shape [samples, timesteps, features]
        #Y shape [samples, 1, features]
        X, Y = np.array(dataX), np.array(dataY)

        N_SAMPLES = len(dataX)
        Y = np.reshape(Y, (N_SAMPLES, N_COLOMNS))
        print("saving")
        np.save(loadedX, X)
        np.save(loadedY, Y)

    return np.load(loadedX+".npy"), np.load(loadedY+".npy")

**Set-Up Model**

In [91]:
def establish_stateful_model(N_COLOMNS):
    model = keras.Sequential()
    model.add(keras.layers.LSTM(N_NODES, activation='relu', 
                                batch_input_shape = (BATCH_SIZE, N_TIMESTEPS, N_COLOMNS),
                                return_sequences=True, 
                                stateful=True))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.LSTM(N_NODES, activation='relu', stateful=True))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(N_COLOMNS, activation='tanh'))
    return model

def establish_non_stateful_model(N_COLOMNS):
    model = keras.Sequential()
    model.add(keras.layers.LSTM(N_NODES, activation='relu', 
                                input_shape = (N_TIMESTEPS, N_COLOMNS), 
                                #batch_size = BATCH_SIZE, 
                                return_sequences=True))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.LSTM(N_NODES, activation='relu'))
    model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(N_COLOMNS, activation='tanh'))
    return model

In [95]:
def create_model():
    if args.stateful:
        model = establish_stateful_model(165)
    else:
        model = establish_non_stateful_model(165)
    model.compile(optimizer='adam', loss='mse') #metrics=['accuracy']
    print(model.summary())
    return model

**Train Model**

In [None]:
class MyCustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}): #changed from (epoch, _) was None
        savefile_weights = "weights-"+str(N_TIMESTEPS)+"_{loss:.4f}.h5"
        savefile_model = "model-"+str(N_TIMESTEPS)+"_{}.h5".format(epoch)
        #json_string = model.to_jason() #architecture
        self.model.save_weights(os.path.join(save_dir, savefile_weights)) #weights
        if epoch%2==0:
            self.model.save(os.path.join(save_dir, savefile_model))
            

In [97]:
def train_model():
    model = create_model()
    
    dances = list(getFileNames())
    
    for i in range(N_EPOCHS):
        print(str(i)+"/"+str(N_EPOCHS))
        callbacks_list = [MyCustomCallback()]
        
        for dance in dances:
            print(str(i)+"/"+str(N_EPOCHS)+": on dance", dance)
            X, Y = get_sample_data(dance)
            
            print ("X Shape:", X.shape)
            print ("Y Shape:", Y.shape)
            #train/fit the model
            model.fit(X, Y, batch_size = BATCH_SIZE, callbacks=callbacks_list)
        random.shuffle(dances)
    
    print("Done Training")

**Sample Model**

In [98]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)



**Run Script**

In [96]:
if(args.train):
    start_time = time.time()
    train_model()
    print("--- %s hours ---" % ((time.time() - start_time)/3600))
else:
    print("Will Sample in the Future")

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_32 (LSTM)               (None, 20, 256)           432128    
_________________________________________________________________
dropout_32 (Dropout)         (None, 20, 256)           0         
_________________________________________________________________
lstm_33 (LSTM)               (None, 256)               525312    
_________________________________________________________________
dropout_33 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 165)               42405     
Total params: 999,845
Trainable params: 999,845
Non-trainable params: 0
_________________________________________________________________
None
0/15
0/15: on dance Theodora_Satisfied_1
X Shape: (730, 20, 165)
Y Shape: (730, 165)

Epoch 00001: loss impr

KeyboardInterrupt: 