The API I built for processing data and interacting with the model.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.python.data import Dataset
from functools import reduce
from matplotlib import pyplot as plt

In [16]:
def normalize_features(features):
    """
    Normalizes data
    :param features: map of features to values 
    :return: normalized feature
    """
    
    norm_features = {}
    for key, val in features.items():
        min_val = np.amin(val)
        max_val = np.amax(val)
        diff = max_val - min_val    
        norm_features[key] = (val - min_val)/diff
        
    return norm_features

In [2]:
def select_features(features, dataframe):
    """
    Extracts selected features from data
    :param features: list of column names 
    :param dataframe: the dataframe to select features from
    :return: map of selected features to numpy arrays
    """
    
    feature_frame = {}
    
    # add features to feature_frame
    for feature in features:
        try:
            # convert values to numpy array
            feature_frame[feature] = np.array(dataframe[feature])
        except:
            print("feature not in dataframe")
            raise
        
    return feature_frame

def process_categorical_data(feature,delimiter=None):
    """
    convert categorical data to usable format
    :param feature: panda series to be processed
    :param delimiter: to separate multiple values for a sample's feature 
    :return: tensor with modified values
    """
    
    # create new tensors
    values = []
    for value in feature:
        # in case number present
        try:
            new_value = value.split(delimiter)
            values.append(new_value)
        except:
            values.append([value])
        
    return values

In [3]:
def encode_feature(feature):
    """
    Convert categorical data to numerical array using multihot-encoding
    :param feature: list of categories
    :return: numpy array of data, encoding scheme
    """
    
    samples = len(feature)
   
    # index of 1's for each sample encoding
    data_indices = []
    
    # encode each sample
    vocab = {}
    index = 0
    for sample in feature:
        sample_indices = []
        for category in sample:
            if category not in vocab: 
                vocab[category] = index
                index+=1
            sample_indices.append(vocab[category])
        data_indices.append(sample_indices)
        
    # create tensor and load in 1's
    num_data = np.zeros((samples,index),dtype=np.float32)
    row = 0
    for sample_indices in data_indices:
        for index in sample_indices:
            num_data[row,index] = 1
        row+=1
        
    return num_data, vocab

def integrate_features(features):
    """
    concatenates features into a big matrix
    :param features: list of tensors
    :return: np array
    """
    return reduce(lambda x,y: np.column_stack((x,y)),features).astype(np.float32)

In [4]:
def select_labels(targets,dataframe):
    """
    Extracts selected labels from data
    :param targets: list of target data
    :param dataframe: the dataframe to select targets from
    :return: dataframe of target data
    """
    
    target_frame = pd.DataFrame()
    
    # add targets to target_frame
    for target in targets:
        try:
            target_frame[target] = dataframe[target]
        except:
            print("feature not in dataframe")
            raise
        
    return target_frame

In [5]:
def multihot_binarycolumns(labels,binary_values):
    """
    Multihot encoding of multiple columns with binary values
    :param labels: dataframe to be encoded
    :param binary_values: dictionary mapping dataset binary values to a 1 or 0
    :param new_column_name: name of encoded column
    :return: numpy array
    """
       
    # create encoded column
    data = np.zeros((labels.shape),dtype=np.float32)
    
    # convert binary values to 1's and 0's for each item    
    for index, row in labels.iterrows():
        
        # weird bug where index can be equal to num rows
        if index >= labels.shape[0]:
            break
        data[index] = row.apply(lambda death: binary_values[death])
    
    return data

In [6]:
def create_batches(features,targets,batch_size = None):
    """
    create batches to be fed into model
    :param features: tensor of features
    :param targets: tensor of targets
    :param batch_size: desired size of batches
    :param num_epochs: number of epochs
    :return: batch iterator
    """
    
    # construct a dataset and configure batching/repeating
    ds = Dataset.from_tensor_slices((features,targets)).shuffle(1000)
    ds = ds.batch(batch_size)
    
    # retrieve next batch
    return ds.make_initializable_iterator()

Special note: In tensorflow, a Graph contains the components of the model (the optimizer, the weights, the loss function, etc.) in the form of "Tensors" and "Operations". Later on, these objects will be activated in a separate step for training, called a Session.

In [15]:
def create_linear_classifier(num_features,num_labels,optimizer_name, pred_type):
    """
    creates a linear model
    :param num_features: number of features to train on (NOT number of samples)
    :param num_labels: number of target labels (NOT number of samples)
    :param optimizer_name: name of optimizer
    :param pred_type: the type of predictions being made (multi-class/multi-label)
    :param name: name for graph's variable scope (must NOT be an empty string)
    :return: tensorflow graph of net and list of relevant graph variables
    """
        
    # construct graph
    graph = tf.Graph() 
    
    # construct list of graph vars to run in session -- predictions, loss, optimizer, accuracy
    graph_vars = {}
    
    # invoke tensorflow dataflow context
    with graph.as_default():
        
        #with tf.variable_scope(name):
            
            # set up placeholder variables for input and output data
            x_data = tf.placeholder(tf.float32,name="x_data")
            y_data = tf.placeholder(tf.float32,name="y_data")
            graph_vars['x_data'] = "x_data:0"
            graph_vars['y_data'] = "y_data:0"
            
            # set up batching
            batch_size = tf.placeholder(tf.int64,name="batch_size")
            iterator = create_batches(x_data,y_data,batch_size=batch_size)
            init_iterator = tf.variables_initializer([iterator],name="iterator") ## create iterator init op to reinitialize each epoch
            graph_vars['iterator'] = "iterator"
            
            
            # get next batch 
            batch_x_train, batch_y_train = iterator.get_next()
            graph_vars['batch_size'] = "batch_size:0"
            
            # set up weights, biases, logits
            weights = tf.Variable(tf.truncated_normal([num_features,num_labels]))
            biases = tf.Variable(tf.zeros([num_labels]))
            logits = tf.add(tf.matmul(batch_x_train,weights), biases)
            
            # apply appropriate transformation to logits and get loss
            error_fn = select_error(pred_type)
            loss = tf.reduce_mean(error_fn(logits = logits,labels = batch_y_train),name="loss")
            graph_vars['loss'] = "loss:0"
            
            # optimizer + backpropagation
            learning_rate = tf.placeholder(tf.float32,name="learning_rate")
            optimizer = select_optimizer(optimizer_name,learning_rate)
            back_propagation = optimizer.minimize(loss,name="back_propagation")
            graph_vars['back prop'] = "back_propagation"
            graph_vars['learning_rate'] = "learning_rate:0"
            
            # calculate accuracy
            predictions = tf.identity(get_predictions(pred_type,logits),name="predictions")
            equality = tf.equal(predictions,batch_y_train)
            accuracy = tf.reduce_mean(tf.cast(equality,tf.float32),name="accuracy")
            graph_vars['accuracy'] = "accuracy:0"
            graph_vars['predictions'] = "predictions:0"

    return graph, graph_vars

In [8]:
def create_vanilla_nn(num_features, num_labels, optimizer_name, pred_type, layers = None):
    """
    creates a vanilla (aka fully connected feed forward) neural network
    :param num_features: number of features to train on (NOT number of samples)
    :param num_labels: number of target labels (NOT number of samples)
    :param optimizer_name: name of optimizer
    :param pred_type: the type of predictions being made (multi-class/multi-label)
    :param layers: list of tuples for each layer (num hidden nodes, name of activation func). None = linear classifier
    :param name: name for graph's scope
    :return: tensorflow graph of net and list of relevant graph variables
    """
    
    if layers == None:
        return create_linear_classifier(num_features,num_labels,optimizer_name, pred_type)
            
    # construct graph
    graph = tf.Graph() 
    
    # construct map of graph vars to run in session -- predictions, loss, optimizer, accuracy, data, etc.
    graph_vars = {}
        
    # invoke tensorflow dataflow context
    with graph.as_default():
        
        #with tf.variable_scope(name):
        
            # set up placeholder variables for input and output data
            x_data = tf.placeholder(tf.float32,name="x_data")
            y_data = tf.placeholder(tf.float32,name="y_data")
            graph_vars['x_data'] = "x_data:0"
            graph_vars['y_data'] = "y_data:0"
            
            # set up batching
            batch_size = tf.placeholder(tf.int64,name="batch_size")
            iterator = create_batches(x_data,y_data,batch_size=batch_size)
            init_iterator = tf.variables_initializer([iterator],name="iterator") ## create iterator init op to reinitialize each epoch
            graph_vars['iterator'] = "iterator"
            
            
            # get next batch 
            batch_x_train, batch_y_train = iterator.get_next()
            graph_vars['batch_size'] = "batch_size:0"
                    
            prev_rows = num_features 
            prev_layer = batch_x_train
            num_layer = 1
            
            # create hidden layers
            for layer in layers:
                
                # extract number of nodes in hidden layer and activation func
                num_nodes = layer[0]
                activation_func = select_activation_func(layer[1])
                
                # create layer
                weights = tf.Variable(tf.truncated_normal([prev_rows,num_nodes]))
                biases = tf.Variable(tf.zeros([num_nodes]))
                hidden = activation_func(tf.add(tf.matmul(prev_layer,weights),biases))
                graph_vars["hidden "+str(num_layer)] = hidden
                
                num_layer+=1
                prev_layer = hidden
                
                # update shape for next layer
                prev_rows = num_nodes
                          
            # final layer
            final = tf.Variable(tf.truncated_normal([prev_rows,num_labels]))
            
            # set up biases and logits
            biases = tf.Variable(tf.zeros([num_labels]))
            logits = tf.add(tf.matmul(prev_layer,final), biases)
            
            # apply appropriate transformation to logits and get loss
            error_fn = select_error(pred_type)
            loss = tf.reduce_mean(error_fn(logits = logits,labels = batch_y_train),name="loss")
            graph_vars['loss'] = "loss:0"
            
            # optimizer + backpropagation
            learning_rate = tf.placeholder(tf.float32,name="learning_rate")
            optimizer = select_optimizer(optimizer_name,learning_rate)
            back_propagation = optimizer.minimize(loss,name="back_propagation")
            graph_vars['back prop'] = "back_propagation"
            graph_vars['learning_rate'] = "learning_rate:0"
            
            # calculate accuracy
            predictions = tf.identity(get_predictions(pred_type,logits),name="predictions")
            equality = tf.equal(predictions,batch_y_train)
            accuracy = tf.reduce_mean(tf.cast(equality,tf.float32),name="accuracy")
            graph_vars['accuracy'] = "accuracy:0"
            graph_vars['predictions'] = "predictions:0"
        
    return graph, graph_vars

In [9]:
def select_optimizer(name,learning_rate):
    """
    Select user specified optimizer
    :param name: name of optimizer
    :param learning_rate: optimizer's learning rate
    :return: optimizer object
    """
    
    name = name.lower()
    
    # list of optimizers    
    optimizers = set(["adam","gd","adagrad","adadelta"])
    assert name in optimizers
    
    # TODO: make more memory efficient
    return {
        "gd" : tf.train.GradientDescentOptimizer(learning_rate=learning_rate),
        "adam" : tf.train.AdamOptimizer(learning_rate=learning_rate),
        "adagrad" : tf.train.AdagradOptimizer(learning_rate=learning_rate),
        "adadelta": tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
    }.get(name)

def select_activation_func(name):
    """
    Selects user specified activation fn
    :param name: name of activation fn
    :return: activation fn
    """
    
    name = name.lower()
    
    # list of activation fn
    activations = set(["sigmoid","relu","tanh"])
    assert name in activations
    return {
        "sigmoid" : tf.sigmoid,
        "relu" : tf.nn.relu,
        "tanh" : tf.tanh
    }.get(name)


def select_error(pred_type):
    """
    Select type of error based on classification task
    :param pred_type: type of predictions being made
    :return: error op
    """
    
    # list of classification types
    types = set(["multi-class","multi-label"])
    assert pred_type in types
      
    return {
        "multi-class" : tf.nn.softmax_cross_entropy_with_logits_v2,
        "multi-label" : tf.nn.sigmoid_cross_entropy_with_logits
    }.get(pred_type)

def get_predictions(pred_type, logits):
    """
    Calculate predictions based on classification task
    :param pred_type: type of prediction being made
    :param logits: unscaled predictions to be transformed
    :return: predictions
    """
    
    # list of classification types
    types = set(["multi-class","multi-label"])
    assert pred_type in types
    
    def multi_label_accuracy(logits):
        return tf.round(tf.nn.sigmoid(logits))
    
    def multi_class_accuracy(logits):
        return tf.round(tf.nn.softmax(logits))
    
    return {
        "multi-class": multi_class_accuracy(logits),
        "multi-label": multi_label_accuracy(logits)
    }.get(pred_type)

Special note: In low level tensorflow, training the model (which is a contained in a Graph) occurs in a Session. Here I also plot the accuracy and error of the model.

In [10]:
def train_model(x_data, y_data, model,model_vars, learning_rate = 0.1, epochs = 1, batch_size = None, save_path = None):
    """
    train model and plot accuracy
    :param x_data: input data
    :param y_data: ground truth data
    :param model: graph object containing net
    :param model_vars: map of tensors/ops such as optimizer, loss, iterator etc. to run in session
    :param learning_rate: the optimizer's learning rate in the model
    :param epochs: number of epochs to train
    :param batch_size: number of samples in each batch
    :param save_path: file-path to save model if specified
    :return: 
    """
    
    # lists of average accuracy and error for each epoch
    accuracy_points = []
    error_points = []
    
    tf.reset_default_graph()
    # start session
    with tf.Session(graph=model) as session:
        
        # initialize global variables
        tf.global_variables_initializer().run()       
        
        # load data
        feed_dict = {
                        model_vars['x_data']: x_data,
                        model_vars['y_data']: y_data,
                        model_vars['batch_size']: batch_size if batch_size else x_data.shape[0],
                    }
        
        # session variables
        sess_vars = [model_vars['loss'],model_vars['back prop'],model_vars['accuracy']]
        for epoch in range(epochs):
            
            # reset for each epoch
            epoch_acc = 0.0
            epoch_err = 0.0
            denom = 0
            session.run([model_vars['iterator']],feed_dict=feed_dict) # feed data into model
            
            # get batch until no batches left
            while(True):
                try:
                    l, _, acc= session.run(sess_vars,feed_dict={model_vars['learning_rate']:learning_rate}) # feed in learning rate
                    epoch_acc+=acc
                    epoch_err+=l
                    denom+=1
                except:
                    break
                                
            accuracy_points.append(epoch_acc/denom)
            error_points.append(epoch_err/denom)
        
        # save if specified
        if save_path:    
            save_model(session,save_path)
            
        # plot data and end session
        session.close()        
        
        x_values = np.arange(1,epochs+1)
        fig, axes = plt.subplots(ncols=2)
        fig.set_figheight(5)
        fig.set_figwidth(10)
        
        axes[0].plot(x_values, accuracy_points, 'b-')
        axes[0].set_ylim(0.0,1.0)
        axes[0].set_title("Average Accuracy/epoch")
        axes[0].set_xlabel("Epochs")
        axes[0].set_ylabel("Accuracy (%)")
        
        axes[1].plot(x_values,error_points,'r-')
        axes[1].set_title("Loss/epoch")
        axes[1].set_xlabel("Epochs")
        axes[1].set_ylabel("Loss")
        fig.tight_layout()
        
        plt.show()

In [11]:
def test_model(x_data,y_data, model_vars, restore_paths,vocab=None):
    """
    tests model on test data
    :param x_data: new feature data to perform predictions 
    :param y_data: ground truth data
    :param model_vars: map of tensors/ops such as optimizer, loss, iterator etc. to run in session
    :param restore_paths: map of path names to paths
    :param vocab: optional parameter to decode predictions if necessary
    :return: predictions, accuracy, and loss
    """
    
    tf.reset_default_graph()
    with tf.Session() as session:
                
        # restore model in this session
        restore_model(session,restore_paths['metagraph'], restore_paths['checkpoint'])     
               
        # load data        
        feed_dict = {
                        model_vars['x_data']: x_data,
                        model_vars['y_data']: y_data,
                        model_vars['batch_size']: x_data.shape[0],
                    }
        
        # feed in test data and extract results
        session.run(model_vars['iterator'],feed_dict=feed_dict)
        sess_vars = [model_vars['accuracy'],model_vars['loss'],model_vars['predictions']]
        acc, loss, preds = session.run(sess_vars, feed_dict=feed_dict)
                        
        # terminate session
        session.close()
        return preds, acc, loss

In [12]:
def save_model(sess, save_path):
    """
    Saves model
    :param sess: session object containing graph variables in trained state
    :param save_path: file-path to save session graph
    :return: 
    """
    
    saver = tf.train.Saver()
    saver.save(sess,save_path)

In [17]:
def restore_model(sess, meta_graph_path, checkpoint_path):
    """
    Restores model in current session
    :param sess: session object to restore model under
    :param meta_graph_path: path to meta graph file created when saving model
    :param checkpoint_path: path to checkpoint file for current notebook
    :return: 
    """
    
    saver = tf.train.import_meta_graph(meta_graph_path)
    saver.restore(sess,checkpoint_path)