In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


## Tensor in numpy array

In [None]:
# Create a 3d TENSOR in numpy
a = np.array([[[1., 2., 3.]], [[7., 8., 9.]]])

In [15]:
# Check the dimension 
a.shape
# First tuple of [] contains TWO elements,
# Each of this element contains ONE element beside the []
# The third level of [] includes THREE elements which are scalar numbers

(2, 1, 3)

## Tensorflow constant definition and session

In [18]:
# Adding to tensorflow constant and check the "inside"
a = tf.constant(3.0, dtype=tf.float32)
b = tf.constant(4.0)
total = a + b
print(a)
print(b)
print(total)
# Just the schema about what the information structure

Tensor("Const_3:0", shape=(), dtype=float32)
Tensor("Const_4:0", shape=(), dtype=float32)
Tensor("add_1:0", shape=(), dtype=float32)


In [17]:
# Write the computational graph into a file -> analyse by Tensorboard
writer = tf.summary.FileWriter('.')
writer.add_graph(tf.get_default_graph())
# Run command 'tensorboard --logdir .' to get the visualization

In [19]:
# SESSION to get the actual evaluation
# Create the session
session1 = tf.Session()

In [24]:
print(type(session1))
print(session1)

<class 'tensorflow.python.client.session.Session'>
<tensorflow.python.client.session.Session object at 0x7f5754f5c990>


In [25]:
# Run the 'total' variable to pass through numbers and get the result
print(session1.run(total))

7.0


In [26]:
# tf.Session.run() can handle 'structure' input as tuple and dictionary
print(session1.run({'ab':(a,b) , 'total':total}))

{'total': 7.0, 'ab': (3.0, 4.0)}


In [28]:
# Trying with a bigger size of Tensor
vec = tf.random_uniform(shape=(3,)) # an array of 3 element
out1 = vec + 1
out2 = vec + 2

In [31]:
# Run for 1st time of random numbers
print(session1.run(vec))

[0.7223319  0.41441822 0.11913502]


In [30]:
# Run for 2nd time of random numbers
print(session1.run(vec))

[0.6073792  0.2517655  0.56504536]


In [33]:
# Run a tuple of inputs -> return a tuple
print(session1.run((out1,out2)))

(array([1.3413564, 1.8592318, 1.8292767], dtype=float32), array([2.3413563, 2.859232 , 2.8292766], dtype=float32))


### Placeholders for parameterize the run inputs

In [34]:
# FEEDING: parameterize to get not constant input, external inputs (placeholder)
# placeholder the type of data
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
z = x + y

In [35]:
# Feeding into the run function by the feed_dict parameter
print(session1.run(z, feed_dict={x:1, y:2.4}))

3.4


In [36]:
print(session1.run(z, feed_dict={x:[1,3], y:[2.4,5]}))

[3.4 8. ]


## Datasets for streaming data into model

In [42]:
# Dataset -> tf.Sensor : data -> tf.data.Iterator -> Iterator.get_next()
my_data = np.random.randn(4,2)
print(my_data)

[[-0.43345846  0.34598637]
 [-0.76853845 -0.20668458]
 [ 0.23660165 -1.63582336]
 [-2.04334704 -0.74773724]]


In [44]:
# Convert data -> tf.data.Iterator and get_next
slices = tf.data.Dataset.from_tensor_slices(my_data)
next_item = slices.make_one_shot_iterator().get_next()

##### https://www.tensorflow.org/versions/master/programmers_guide/datasets

- make_one_shot_iterator()

- A one-shot iterator is the simplest form of iterator, which only supports iterating once through a dataset, with no need for explicit initialization

- get_next()
- A tf.data.Iterator provides the main way to extract elements from a dataset. The operation returned by Iterator.get_next() yields the next element of a Dataset when executed, and typically acts as the interface between input pipeline code and your model

In [45]:
# Get the data until the end of input data
while True:
    try:
        print(session1.run(next_item))
    except tf.errors.OutOfRangeError:
        break

[-0.43345846  0.34598637]
[-0.76853845 -0.20668458]
[ 0.23660165 -1.63582336]
[-2.04334704 -0.74773724]


## Creating layers and initializing

In [59]:
# Creat the placeholder for the input variable with varies length but 3 features
x = tf.placeholder(tf.float32, shape=[None,3 ])
# One linear layer with 1 output, still don't know about the input size
linear_model = tf.layers.Dense(units=1) # units: Integer or Long, dimensionality of the output space
# Adding the input specification
y = linear_model(x)

In [48]:
# Initializing layers by the global initializer
init = tf.global_variables_initializer()
session1.run(init)

### Excuting layers

In [51]:
# Passing some values into the linear model, to see the output
print(session1.run(y, feed_dict = {x : [[1,2,3],[4,5,6]]}))

[[-4.5577044]
 [-8.845448 ]]


### Faster way to do 

In [62]:
# Create the input structure frame, declare the input and put it directly
x = tf.placeholder(tf.float32, shape=[None,3 ])
y = tf.layers.dense(x, units=1) # NOTICED THAT: dense not Dense, they are different

init = tf.global_variables_initializer()
session1.run(init)

print(session1.run(y, feed_dict={x: [[1,2,3],[4,5,6]]}))

[[2.6116967]
 [6.3482356]]


## Feature columns
- to make the feature 'layout' or 'scheme' into the network

##### The example of input data with categorical and numerical data
- For the categorical data -> convert to 'categorical_column' -> wrap to 'indicator_column' to get the final 'dense' numeric input
- merge array of numerical and converted categorical data together
- Key power: tf.feature_column. xxx

In [67]:
# Raw features (dict)

features = {'sales':[[5], [10], [8], [9]] , \
           'department': ['sports', 'sports', 'gardening', 'gardening']}

In [68]:
features

{'department': ['sports', 'sports', 'gardening', 'gardening'],
 'sales': [[5], [10], [8], [9]]}

In [75]:
# 'Schema' definition
# Define feature transform for the categorical data

department_column = tf.feature_column.categorical_column_with_vocabulary_list\
('department', vocabulary_list= ['sports', 'gardening'])

department_column = tf.feature_column.indicator_column(department_column)
department_column

_IndicatorColumn(categorical_column=_VocabularyListCategoricalColumn(key='department', vocabulary_list=('sports', 'gardening'), dtype=tf.string, default_value=-1, num_oov_buckets=0))

In [73]:
# Define feature transform for the numeric data

columns = [tf.feature_column.numeric_column('sales'), department_column]
columns

[_NumericColumn(key='sales', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _IndicatorColumn(categorical_column=_VocabularyListCategoricalColumn(key='department', vocabulary_list=('sports', 'gardening'), dtype=tf.string, default_value=-1, num_oov_buckets=0))]

In [76]:
# combine of 'raw' data and the 'schema'

inputs = tf.feature_column.input_layer(features, columns)
inputs

<tf.Tensor 'input_layer/concat:0' shape=(4, 3) dtype=float32>

In [77]:
# Initializer for variables and tables
var_init = tf.global_variables_initializer()
table_init = tf.tables_initializer()
session1.run((var_init, table_init))

(None, None)

In [79]:
# Run the data processing to get the output
print(session1.run(inputs))

[[ 1.  0.  5.]
 [ 1.  0. 10.]
 [ 0.  1.  8.]
 [ 0.  1.  9.]]


## A complete program

In [80]:
# Define the data
x = tf.constant([[1], [2], [3], [4] ], dtype=tf.float32)
y_true = tf.constant([[-11], [-22], [-33], [-44] ], dtype=tf.float32)

In [81]:
# Define the linear model
linear_model = tf.layers.Dense(units = 1)
y_pred = linear_model(x)

In [83]:
# Define the loss function and the optimizer
loss = tf.losses.mean_squared_error(labels= y_true, predictions= y_pred)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

# put them to the train procedure (optimizer need a loss func to minimize)
train_prog = optimizer.minimize(loss)

In [84]:
# Init the variables
init = tf.global_variables_initializer()
session2 = tf.Session()

session2.run(init)

In [85]:
for i in range(199):
    _, loss_val = session2.run((train_prog,loss)) # remember inputting tuple or dict only
    print(loss_val)

1193.4536
828.85364
575.8608
400.31018
278.49487
193.96547
135.30795
94.60246
66.3535
46.747883
33.139748
23.693127
17.13416
12.578892
9.413944
7.2137647
5.6830263
4.616831
3.8729935
3.352855
2.9879692
2.7308295
2.5484662
2.4180255
2.3236272
2.2542696
2.202303
2.1624308
2.1309707
2.105371
2.0838625
2.0652132
2.0485713
2.033343
2.0191157
2.0056105
1.9926219
1.9800162
1.9677012
1.9556015
1.9436787
1.9319006
1.9202386
1.9086812
1.8972154
1.8858322
1.8745325
1.863308
1.8521557
1.8410742
1.8300608
1.8191121
1.8082331
1.7974205
1.7866749
1.7759907
1.765372
1.7548146
1.7443225
1.7338965
1.7235274
1.7132232
1.7029787
1.6927996
1.6826787
1.672616
1.6626168
1.6526766
1.6427946
1.6329725
1.6232108
1.6135049
1.6038581
1.59427
1.5847363
1.5752629
1.5658444
1.5564815
1.5471768
1.537925
1.5287313
1.5195906
1.5105042
1.5014741
1.4924968
1.4835731
1.4747033
1.4658858
1.4571211
1.4484109
1.43975
1.4311423
1.4225844
1.4140803
1.4056256
1.3972228
1.3888685
1.3805639
1.3723085
1.3641045
1.3559486
1.3478408

In [86]:
session2.run(y_pred)

array([[-12.397317],
       [-22.677095],
       [-32.956875],
       [-43.236656]], dtype=float32)

## Tensorflow recap

- Define the tesorflow variables
- Write the computation around them
- Initialize these tensor by constant val or placeholder or global initializer
- Create the session
- Call the session to run the wanted outputs/ operators. Inputting data here by feed or from the model definition