### This notebook demonstrates a basic example of how to create a vocabulary from input texts, and tranform text into vector format.
The purpose is to point out some of the different problems faced in sequence modeling with traditional methods.

In [1]:
import tensorflow as tf
import os
import numpy as np
os.environ['CUDA_VISIBLE_DEVICES'] = '' # avoids using GPU for this session

### Data Transformation
- Create a vocab from input texts
- Create a 1/0 vector for the input text

In [2]:
sentences = ["The food was good not bad at all", "The food was bad not good at all", "I love the coldness"]
labels = ["happy", "sad", "happy"]

In [3]:
### create vocabulary
def create_vocab(ss):
    vocab = []
    [vocab.append(w) for s in ss for w in s.split() if w not in vocab]    
    return vocab

In [4]:
vocab = create_vocab(sentences)

In [5]:
### create BoW vector
def create_vector(vocab, s):
    vector = np.zeros((len(vocab)))
    for w in s.split():
        vector[vocab.index(w)] = 1
    return vector

In [6]:
create_vector(vocab, sentences[2])

array([0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.])

### Feed into 2 hidden layer neural network

In [7]:
n_input = len(vocab)
n_classes = 2

X = tf.placeholder(tf.float32, [None, n_input])
Y = tf.placeholder(tf.float32, [None, n_classes])

### TODO: can be refactored to dictionary format
W1 = tf.Variable(tf.random_normal(shape=[n_input, n_classes], dtype=tf.float32))
b1 = tf.Variable(tf.random_normal(shape=[n_classes], dtype=tf.float32))
hidden_1 = tf.matmul(X, W1) + b1

W2 = tf.Variable(tf.random_normal(shape=[n_classes, n_classes], dtype=tf.float32))
b2 = tf.Variable(tf.random_normal(shape=[n_classes], dtype=tf.float32))

### TODO: can apply activation on output
output = tf.matmul(hidden_1, W2) + b2

In [8]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for i in range(len(sentences)):
        input_x = create_vector(vocab, sentences[i]).reshape((1,-1))
        input_y = create_vector(list(set(labels)), labels[i]).reshape((1,-1))
        hidden = sess.run([output], feed_dict = {X:input_x, Y:input_y})
        print (hidden)

[array([[2.3146973, 1.5333501]], dtype=float32)]
[array([[2.3146973, 1.5333501]], dtype=float32)]
[array([[-9.740375 , -5.6059237]], dtype=float32)]
