## ref 

- https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/01_Simple_Linear_Model.ipynb

In [5]:
# OP
import numpy as np
import pandas as pd 
from sklearn import datasets
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

# DL 
import tensorflow as tf 

In [25]:
# GENERATE TOY DATA 
# n_classes = 4 
# n_samples = 10000
# n_features = 20 
X, y = datasets.make_classification(n_samples=10000, n_features=20,n_informative=2,
                                    n_clusters_per_class=1,n_redundant=5,n_classes=4)

In [160]:
X

array([[ 0.70488674,  0.86190855, -0.54545458, ..., -0.97611583,
        -0.72269921, -0.3918303 ],
       [ 1.02894295,  0.36738741, -0.11840193, ...,  1.10361663,
        -0.96512791, -0.0288549 ],
       [-0.22908098, -1.54314282,  1.07098599, ..., -0.47356331,
        -1.43360698, -2.35546839],
       ..., 
       [-0.58256902, -0.32832111,  0.16417028, ..., -0.1385447 ,
        -2.39407493,  1.29910305],
       [ 0.31383021, -1.03369215, -0.68900052, ...,  0.35693534,
        -1.83667937,  0.05052402],
       [-1.47401046, -2.00451064,  0.24261489, ...,  1.48395664,
        -0.52222599, -1.02889809]])

In [161]:
y

array([0, 2, 0, ..., 2, 2, 2])

In [34]:
# TRAIN-TEST SPLIT 

num_data = len(X)
train_split = 0.9
num_train = int(train_split * num_data)
num_test = num_data - num_train
x_train = X[0:num_train]
x_test = X[num_train:]
y_train = y[0:num_train]
y_test = y[num_train:]


print("Size of:")
print("- Training-set:\t\t{}".format(x_train.shape))
#print("- Validation-set:\t{}".format(data.num_val))
print("- Test-set:\t\t{}".format(x_test.shape))


Size of:
- Training-set:		(9000, 20)
- Test-set:		(1000, 20)


In [39]:
# The images are stored in one-dimensional arrays of this length.
data_size_flat = X.shape[0]

# Tuple with height and width of images used to reshape arrays.
data_shape = X.shape[1]

# Number of classes, one class for each of 10 digits.
num_classes = len(set(y))

In [46]:
# SET UP PLACEHOLDER for x 
# shape of x = [None, data_size_flat]
# [None, data_size_flat], where None means that the tensor may hold an arbitrary number of data 
# with each image being a vector of length img_size_flat.


x = tf.placeholder(tf.float32, [None, data_size_flat])

In [47]:
# SET UP PLACEHOLDER for y_true 
# shape of y_true = [None, num_classes]  
# [None, num_classes] which means it may hold an arbitrary number of labels and each label is a vector of length num_classes 
# which is 4 in this case.

y_true = tf.placeholder(tf.float32, [None, num_classes])

In [49]:
# SET UP PLACEHOLDER for y_true_cls 
# shape of y_true_cls = [None]
# variable is set to [None] which means the placeholder variable is a one-dimensional vector of arbitrary length.


y_true_cls = tf.placeholder(tf.int64, [None])

In [52]:
# VARIABLE TO BE TUNE 
# LOGITS = X*w + b 
# weights : w 
# bias : b 

# 1)  weights
# The first variable that must be optimized is called weights and is defined here as a TensorFlow variable 
# that must be initialized with zeros and whose shape is [data_size_flat, num_classes], 
# so it is a 2-dimensional tensor (or matrix) with img_size_flat rows and num_classes columns.
weights = tf.Variable(tf.zeros([data_size_flat, num_classes]))


# 2) bias 
# The second variable that must be optimized is called biases and is defined as a 1-dimensional tensor (or vector) 
# of length num_classes.
biases = tf.Variable(tf.zeros([num_classes]))




In [53]:
# DEFINE SIMPLE LOGITS MODEL 
# LOGITS = X*w + b 

# x : x has shape [num_images, img_size_flat]
# result matrix : [num_images, num_classes] 

logits = tf.matmul(x, weights) + biases

In [54]:
# DEFINE PREDICT 
y_pred = tf.nn.softmax(logits)
y_pred_cls = tf.argmax(y_pred, axis=1)

In [56]:
# Cost-function : metric to be optimized

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                           labels=y_true)
cost = tf.reduce_mean(cross_entropy)


In [57]:
# DEFINE optimizer

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5).minimize(cost)

In [58]:
# PERFORMANCE MEASRUE 

correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [61]:
# TENSORFLOW RUN 

# define session 
session = tf.Session()

# Initialize variables¶
session.run(tf.global_variables_initializer())

# SUPER PARAMETER DEFINE 

batch_size = 100

In [79]:
x_train[:,:].shape

(9000, 20)

In [83]:
x_train[batch_size :batch_size+,:].shape

(1000, 20)

In [117]:
random_int = np.random.randint(10, size=(1))[0]
x_train[(random_int)*batch_size :(random_int+1)*batch_size,:].shape

(100, 20)

In [150]:
random_int = np.random.randint(10, size=(1))[0]
y_train[(random_int)*batch_size :(random_int+1)*batch_size].shape

(100,)

In [104]:
np.random.randint(10, size=(1))[0]

0

In [154]:
### HELP FUNC HELP TUNE THE MODEL 

def optimize(num_iterations):
    for i in range(num_iterations):
        # Get a batch of training examples.
        # x_batch now holds a batch of images and
        # y_true_batch are the true labels for those images.
        # x_batch, y_true_batch, _ = data.random_batch(batch_size=batch_size)
        
        ##################### modify origin code here #####################
        random_int = np.random.randint(10, size=(1))[0]
        x_batch = x_train[(random_int)*batch_size :(random_int+1)*batch_size,:]
        y_true_batch  = y_train[(random_int)*batch_size :(random_int+1)*batch_size,:]
        ####################################################################
        
        # Put the batch into a dict with the proper names
        # for placeholder variables in the TensorFlow graph.
        # Note that the placeholder for y_true_cls is not set
        # because it is not used during training.
        feed_dict_train = {x: x_batch,
                           y_true: y_true_batch}

        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train
        # to the placeholder variables and then runs the optimizer.
        session.run(optimizer, feed_dict=feed_dict_train)
        
        
def print_accuracy():
    # Use TensorFlow to compute the accuracy.
    acc = session.run(accuracy, feed_dict=feed_dict_test)
    
    # Print the accuracy.
    print("Accuracy on test-set: {0:.1%}".format(acc))


In [158]:
feed_dict_test = {x: x_test,
                  y_true: y_test,}
                 # y_true_cls: y_test_cls}

In [None]:
# RUN 

print_accuracy()