In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
RANDOM_SEED = 0
tf.set_random_seed(RANDOM_SEED)
%run initialize_interactivity

### Import and pre-process data

In [2]:
# create dataframe
raw_data = pd.read_excel('./nation_data.xlsx')

# drop unwated columns
raw_data.drop(['8 - Structure Number'],axis=1,inplace=True)

# drop columns with nan
raw_data.dropna(axis=1,how='all',inplace=True)

# fill nan values with 0
raw_data.fillna(value=0, inplace=True)

# replace 0 values with error code
replace = {'58 - Deck': {0 : '99 - Null Value'}}
raw_data.replace(to_replace=replace, inplace=True)

# factorize categorical variables and create dict of preprocessed data
categorical_data = {}
data_dict = {}
for i,x in enumerate(raw_data.dtypes.values): 
    col = raw_data.columns[i] 
    if x == object:        
        categorical_data[col] = pd.factorize(raw_data[col])
        data_dict[col] = categorical_data[col][0]
    else:
        data_dict[col] = raw_data[col]
        
# create dataframe of preprocessed data        
data = pd.DataFrame(data_dict,dtype=np.float32)

In [3]:
# extract features and targets
features = data.drop(['58 - Deck'],axis=1)
target = data['58 - Deck']

# add bias term to features
features.insert(0,'bias',np.float32(1))

# convert targets into one-hot vectors
num_labels = len(target.unique())
target = pd.get_dummies(target)

# convert to type: ndarray
features = features.as_matrix()
target = target.as_matrix()
train_X, test_X, train_y, test_y = train_test_split(features, target, test_size=0.25, random_state=RANDOM_SEED)

# get shape values
shape_X = train_X.shape
shape_y = train_y.shape

In [4]:
# constants
N_INPUTS = shape_X[1]
N_OUTPUTS = shape_y[1]

# tuning parameters
hidden_layers = 1
hidden_size = 10
learning_rate = 0.05

# initialize placeholders for inputs, outputs, and weight tensors
X = tf.placeholder(tf.float32, shape = [None, N_INPUTS], name = "X")
y = tf.placeholder(tf.float32, shape = [None, N_OUTPUTS], name = "y")
w1 = tf.Variable(tf.random_normal((N_INPUTS, hidden_size), stddev=0.1))
w2 = tf.Variable(tf.random_normal((hidden_size, N_OUTPUTS), stddev=0.1))

In [5]:
# forward propogation using the soigmoid activation function
z1 = tf.nn.sigmoid(tf.matmul(X, w1))  
z2 = tf.matmul(z1, w2)  
predictions = tf.argmax(z2, axis=1) # get column with largest activation value

In [6]:
# back propogation: reducing the mean error of softmax function
error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=z2))
optm = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)

In [13]:
# initialize the session and global variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# run the gradient decent to optimize the 
for steps in range(500):
    
    # Train with each example
    for n in range(shape_X[0]):    
        sess.run(optm, feed_dict = {X: train_X[n: n+1], y: train_y[n: n+1]})

    train_accuracy = np.mean(tf.argmax(train_y, axis=1) == sess.run(predictions, feed_dict={X: train_X, y: train_y}))
    test_accuracy  = np.mean(tf.argmax(test_y, axis=1) == sess.run(predictions, feed_dict={X: test_X, y: test_y}))

    if steps % 10 == 0:
        print(f"Step = {steps}, train accuracy = {round(100*train_accuracy,4)}%, test accuracy = {round(100*test_accuracy,4)}%")

sess.close()


Step = 0, train accuracy = 0.0%, test accuracy = 0.0%
Step = 10, train accuracy = 0.0%, test accuracy = 0.0%
Step = 20, train accuracy = 0.0%, test accuracy = 0.0%
Step = 30, train accuracy = 0.0%, test accuracy = 0.0%
Step = 40, train accuracy = 0.0%, test accuracy = 0.0%
Step = 50, train accuracy = 0.0%, test accuracy = 0.0%
Step = 60, train accuracy = 0.0%, test accuracy = 0.0%
Step = 70, train accuracy = 0.0%, test accuracy = 0.0%
Step = 80, train accuracy = 0.0%, test accuracy = 0.0%
Step = 90, train accuracy = 0.0%, test accuracy = 0.0%
Step = 100, train accuracy = 0.0%, test accuracy = 0.0%
Step = 110, train accuracy = 0.0%, test accuracy = 0.0%
Step = 120, train accuracy = 0.0%, test accuracy = 0.0%
Step = 130, train accuracy = 0.0%, test accuracy = 0.0%
Step = 140, train accuracy = 0.0%, test accuracy = 0.0%
Step = 150, train accuracy = 0.0%, test accuracy = 0.0%
Step = 160, train accuracy = 0.0%, test accuracy = 0.0%
Step = 170, train accuracy = 0.0%, test accuracy = 0.0%
Ste