# Dependancies

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os, sys, random, argparse, time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
#No module named keras OR cannot import name 'np_utils' if tensorflow.keras
#from keras.utils import np_utils
#from keras.models import load_model
#from keras.models import model_from_json

# Variables

**Determine Whether Train or Sample** 

Run below block in Jupyter Notebook

In [41]:
class Args():
    def __init__(self):
        self.train = False
        self.stateful = True
        self.n_timesteps = 20
        self.n_epochs = 50
        self.n_units = 256
        self.n_frames = 500
args = Args()

Do NOT run this in Jupyter Notebook

In [40]:
parser = argparse.ArgumentParser()

#store_true sets the default to False
#store_false sets the default to True
parser.add_argument('-train', action="store_true",
                   help='True: Train on dataset, False: Sample with trained model')
parser.add_argument('-stateful', action="store_false",
                   help='True: Remeber previous state during training, False: Feed forward model')
parser.add_argument('-n_timesteps', type=int, const=1, default=args.n_timesteps
                   help='How many dance frames should be used as history during prediction')
parser.add_argument('-n_epochs', type=int, const=1, default=args.n_epochs
                   help='Number of repititions the data is fed into the model')
parser.add_argument('-n_units', type=int, const=1, default=args.n_units
                   help='The number of dance frames the AI should generate')

parser.add_argument('-n_frames', type=int, const=1, default=args.n_frames
                   help='The number of dance frames the AI should generate')

args = parser.parse_args()

SyntaxError: invalid syntax (<ipython-input-40-23cb9e155ac8>, line 10)

**Variables**

In [4]:
dances = []
BATCH_SIZE = 1
STEPS = 1
N_TIMESTEPS = args.n_timesteps # Sequence length
N_EPOCHS = args.n_epochs
N_NODES = args.n_units

In [16]:
# For local run only
csv_data_dir = "../../../data/CSV/Raw"
np_data_dir = "../../../data/Numpy"
save_dir = "../../../logs"

In [5]:
# For server run
csv_data_dir = "/Akamai/MLDance/data/CSV/Raw"
np_data_dir = "/Akamai/MLDance/data/Numpy"
save_dir = "/Akamai/MLDance/logs"

# Helper Functions

**General**

In [27]:
def progressbar(it, prefix="", size=60, file=sys.stdout):
    count = len(it)
    def show(j):
        x = int(size*j/count)
        file.write("%s[%s%s] %i/%i\r" % (prefix, "#"*x, "."*(size-x), j, count))
        file.flush()        
    show(0)
    for i, item in enumerate(it):
        yield item
        show(i+1)
    file.write("\n")
    file.flush()

def getFileNames():
    filenames = [f for f in os.listdir(csv_data_dir) if f.endswith('.csv')]
    for file in enumerate(filenames): #enumerating creates an array where 0 corresponds to the index of the file in filenames and 1 corresponds to the filename
        filenames[file[0]] = '_'.join(file[1].split("_")[:-1])
    return list(set(filenames))

### Data Related

**Load and Pre-Process Data**

In [29]:
def pre_process_data(filename):
    filename = os.path.join(csv_data_dir, filename)
    
    pos_data = pd.read_csv(filename+"_worldpos.csv")
    rot_data = pd.read_csv(filename+"_rotations.csv")
    data = rot_data.copy()

    #standardize rotation (force values from -1 to 1)
    data = data/180.0

    #Add the root (hip) position data for spacial movement to the rotational data
    data['Hips.Pos.X'] = pos_data.pop('Hips.X')
    data['Hips.Pos.Y'] = pos_data.pop('Hips.Y')
    data['Hips.Pos.Z'] = pos_data.pop('Hips.Z')

    #Normalize the starting positions of the given dance
    #Making movement relative to an origin of 0,0,0 for consistancy within different dances
    data['Hips.Pos.X'] = data['Hips.Pos.X'] + (-1*data['Hips.Pos.X'][0])
    data['Hips.Pos.Y'] = data['Hips.Pos.Y'] + (-1*data['Hips.Pos.Y'][0])
    data['Hips.Pos.Z'] = data['Hips.Pos.Z'] + (-1*data['Hips.Pos.Z'][0])
    
    #Remove the all the columns were it's all zeroed (End ones)
    zeroed_columns = [column for column in data.columns if 'End' in column]
    for column in zeroed_columns:
        data.pop(column)

    #remove the time variable from the dataset
    time = data.pop('Time') #maybe change to time change value instead? To indicate speed
    return data

def get_processed_data(filename):
    #Establish filenames (X is for input, Y is for expected output)
    loaded = os.path.join(np_data_dir, filename)
    
    #If the corresponding numpy file doesn't yet exist, create and save it
    if not (os.path.exists(loaded+".npy")):
        #Print statement for status update
        print("Creating pre-processed datafile:", filename)
        #load the csv file and establish the number of rows and columns
        data = pre_process_data(filename)
        data = data.iloc[:].values #Enables selection/edit of cells in the dataset
        print("Saving pre-processed data")
        np.save(loaded, data)

    return np.load(loaded+".npy")

**Sequence the Data (Separate Into Samples)**

In [42]:
#n_frames predict the next frame
def sequence_individual_frame(data, dataX, dataY, i):
    seqIn = data[i: i+N_TIMESTEPS]
    seqOut = data[i+N_TIMESTEPS]
    dataX.append(seqIn)
    dataY.append(seqOut)
        
#n_frames predict the next n_frames
def sequence_multiple_frames(data, dataX, dataY, i):
    seqIn = data[i: i+N_TIMESTEPS]
    seqOut = data[i+N_TIMESTEPS : i+N_TIMESTEPS+1]
    dataX.append(seqIn)
    dataY.append(seqOut)

In [31]:
def get_sequenced_data(filename):
    #Establish filenames (X is for input, Y is for expected output)
    loadedX = os.path.join(np_data_dir, filename+"X-"+str(N_TIMESTEPS))
    loadedY = os.path.join(np_data_dir, filename+"Y-"+str(N_TIMESTEPS))
    
    #If the corresponding numpy file doesn't yet exist, create and save it
    if not (os.path.exists(loadedX+".npy") and os.path.exists(loadedY+".npy")):
        #Print statement for status update
        print("Creating the sequenced data")
        #load the csv file and establish the number of rows and columns
        data = get_processed_data(filename)
        N_ROWS = data.values.shape[0]
        N_COLOMNS = data.values.shape[1]

        #data = data.iloc[:].values #Enables selection/edit of cells in the dataset
        dataX = []
        dataY = []
        
        #Generate the sequences
        for i in range(0, N_ROWS - N_TIMESTEPS, STEPS): #range(start, stop, step) 
            sequence_multiple_frames(data, dataX, dataY, i)

        #X shape [samples, timesteps, features]
        #Y shape [samples, 1, features]
        X, Y = np.array(dataX), np.array(dataY)

        N_SAMPLES = len(dataX)
        Y = np.reshape(Y, (N_SAMPLES, N_COLOMNS))
        print("Saving the sequenced data")
        np.save(loadedX, X)
        np.save(loadedY, Y)

    return np.load(loadedX+".npy"), np.load(loadedY+".npy")

# Functions related to the Model

**Set-Up Model**

In [9]:
def establish_stateful_model(N_COLOMNS):
    model = keras.Sequential()
    model.add(keras.layers.LSTM(N_NODES, activation='tanh', 
                                batch_input_shape = (BATCH_SIZE, N_TIMESTEPS, N_COLOMNS),
                                return_sequences=True, 
                                stateful=True))
    #model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.LSTM(N_NODES, activation='tanh', return_sequences=True, stateful=True))
    #model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.LSTM(N_NODES, activation='tanh', return_sequences=False, stateful=True))
    #model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(N_COLOMNS, activation='tanh')) #Setting it stateful is the same as TimeDistributedDense(Dense()) --> (#, None, #)
    return model

def establish_non_stateful_model(N_COLOMNS):
    model = keras.Sequential()
    model.add(keras.layers.LSTM(N_NODES, activation='tanh', 
                                input_shape = (N_TIMESTEPS, N_COLOMNS), 
                                #batch_size = BATCH_SIZE, 
                                return_sequences=True))
    #model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.LSTM(N_NODES, activation='tanh'))
    #model.add(keras.layers.Dropout(0.2))
    model.add(keras.layers.Dense(N_COLOMNS, activation='tanh'))
    return model

In [10]:
def create_model():
    if args.stateful:
        model = establish_stateful_model(165)
    else:
        model = establish_non_stateful_model(165)
    model.compile(optimizer='adam', loss='mse',metrics=['accuracy']) #
    print(model.summary())
    return model

**Train Model**

In [11]:
def save_model(epoch):
    temp_path = os.path.join(save_dir, "units-{}_timesteps-{}".format(N_NODES, N_TIMESTEPS))
    if not os.path.exists(temp_path):
        os.makedirs(temp_path)
    savefile_weights = "weights-"+str(N_TIMESTEPS)+"_loss-{:.4f}_acc-{:.4f}.h5".format(logs["loss"], logs["accuracy"])
    savefile_model = "model_epoch-{}.h5".format(epoch)
    #json_string = model.to_jason() #architecture
    model.save_weights(os.path.join(temp_path, savefile_weights)) #weights
    if epoch%3==0:
        model.save(os.path.join(temp_path, savefile_model))
            
class MyCustomCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None): #changed from (epoch, _) was None
        save_model(epoch)

In [12]:
def train_model():
    model = create_model()
    
    dances = getFileNames()
    
    for i in range(N_EPOCHS):
        #callbacks_list = [MyCustomCallback()]
        callbacks_list = [tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)]
        
        for dance in dances:
            print("Epoch ",str(i)+"/"+str(N_EPOCHS)+":", dance)
            X, Y = get_sample_data(dance)
            
            print ("X Shape:", X.shape)
            print ("Y Shape:", Y.shape)
            #train/fit the model
            model.fit(X, Y, 
                      batch_size = BATCH_SIZE, 
                      callbacks=callbacks_list, 
                      shuffle=False, 
                      validation_split=0.2, 
                      epochs=1, 
                      verbose=1)
        save_model(i)
        random.shuffle(dances)
    
    print("Done Training")

**Sample Model**

In [36]:
def load_model(filename):
    return keras.models.load_model(os.path.join(save_dir, filename), compile=True)

def temperature(preds, temperature=1.0): #Not yet altered
    print(preds)
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    print (preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def save_to_csv(generated_data, original_file, save_file):    
    rotation = generated_data[:,:162] #get the first 162 columns
    position = generated_data[:,[162, 163, 164]] #get the last 3 columns
    hierarchy = pd.read_csv(os.path.join(csv_data_dir, "hierarchy/"+original_file.split('_')[0]+"_hierarchy.csv"))
    
    data = pd.read_csv(os.path.join(csv_data_dir, original_file+"_rotations.csv"), nrows=0)
    c_headers = [c for c in data.columns if 'End' not in c ][1:]
    rotation_df = pd.DataFrame(rotation, columns=c_headers)
    position_df = pd.DataFrame(position, columns=c_headers[:3])

    #undo the normalization and standardization of the data
    rotation_df = rotation_df*180
    position_df['Hips.X'] = position_df['Hips.X'] + hierarchy['offset.x'][0]
    position_df['Hips.Y'] = position_df['Hips.Y'] + hierarchy['offset.y'][0]
    position_df['Hips.Z'] = position_df['Hips.Z'] + hierarchy['offset.z'][0]
    
    new_headers = []
    joints = [j for j in hierarchy['joint'].to_numpy() if "End" not in j]
    for j in joints:
        new_headers.append(j+".Z")
        new_headers.append(j+".X")
        new_headers.append(j+".Y")

    rotation_df = rotation_df.reindex(columns=new_headers)  
    
    rotation_df.insert(0, 'time', np.arange(0.0, 0.03333333*len(rotation_df), 0.03333333))
    position_df.insert(0, 'time', np.arange(0.0, 0.03333333*len(position_df), 0.03333333))
    
    rotation_df.to_csv(save_file+"_rot.csv", index=False)
    position_df.to_csv(save_file+"_pos.csv", index=False)
    
def benchmark(n_frames, units=N_NODES, timesteps=N_TIMESTEPS):
    #loads the most recent saved model
    temp_path = os.path.join(save_dir, "units-{}_timesteps-{}".format(units, timesteps))
    filename = [f for f in os.listdir(temp_path) if "model" in f][-1]
    model = load_model(os.path.join(temp_path, filename))
    print("Model loaded")
    
    #select random dance for seed
    dances = getFileNames()
    seed_index = random.randint(0, len(dances) - 1)
    seed = get_processed_data(dances[seed_index])[:N_TIMESTEPS]
    
    print("Generating dance with seed from", dances[seed_index])
    #for diversity in [0.2, 0.5, 1.0, 1.2]:
    for diversity in [1.0]:
        start_time = time.time()
        generated = seed
        for i in progressbar(range(n_frames),"{} Progress: ".format(diversity)):
            preds = model.predict(np.array([generated[-N_TIMESTEPS:]]), verbose=0)[0]
            generated = np.vstack((generated, preds))
        save_location = os.path.join(temp_path, "generated_dance_{}-frames_{}-diversity".format(n_frames, diversity))
        save_to_csv(generated, dances[seed_index], save_location)
        print("\tTotal Elapsed time (in sec.):", time.time()-start_time)
        print("\tSaved to", save_location)

# Run Script

In [35]:
if(args.train):
    start_time = time.time()
    train_model()
    print("--- %s hours ---" % ((time.time() - start_time)/3600))
else:
    benchmark(args.n_frames, args.n_units, args.n_timesteps)

Model loaded
Creating pre-processed datafile: Andria_Bored_v1_0
Saving pre-processed data
Generating dance with seed from Andria_Bored_v1_0
1.0 Progress: [############################################################] 500/500
	Total Elapsed time (in sec.): 27.179787635803223
Saved the generated dance to /Akamai/MLDance/logs/units-256_timesteps-20/generated_dance_500-frames_1.0-diversity
