# https://docs.google.com/presentation/d/1SJMsa4BdOFVRCPD9uwaAqDBYYfgbQcbOm7HaxRVpaaY/edit#slide=id.g2f28335886_0_17

# Name scope

TensorFlow doesn’t know what nodes should be grouped together, unless you tell it to

Group nodes together with tf.name_scope(name)


```
with tf.name_scope(name_of_that_scope):
	# declare op_1
	# declare op_2
```







```
with tf.name_scope('data'):
    iterator = dataset.make_initializable_iterator()
    center_words, target_words = iterator.get_next()
with tf.name_scope('embed'):
    embed_matrix = tf.get_variable('embed_matrix', 
                                    shape=[VOCAB_SIZE, EMBED_SIZE], ...)
    embed = tf.nn.embedding_lookup(embed_matrix, center_words)
with tf.name_scope('loss'):
    nce_weight = tf.get_variable('nce_weight', shape=[VOCAB_SIZE, EMBED_SIZE], ...)
    nce_bias = tf.get_variable('nce_bias', initializer=tf.zeros([VOCAB_SIZE]))
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weight, biases=nce_bias, …)
with tf.name_scope('optimizer'):
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)

```



## Name scope vs variable scope

tf.name_scope() vs tf.variable_scope()

Variable scope facilitates variable sharing



```
def two_hidden_layers(x):
    w1 = tf.Variable(tf.random_normal([100, 50]), name='h1_weights')
    b1 = tf.Variable(tf.zeros([50]), name='h1_biases')
    h1 = tf.matmul(x, w1) + b1

    w2 = tf.Variable(tf.random_normal([50, 10]), name='h2_weights')
    b2 = tf.Variable(tf.zeros([10]), name='2_biases')
    logits = tf.matmul(h1, w2) + b2
    return logits

logits1 = two_hidden_layers(x1)
logits2 = two_hidden_layers(x2)

```
# # What will happen if we make these two calls?

Sharing Variable: The problem

Two sets of variables are created.

You want all your inputs to use the same weights and biases!


### solution
```
 tf.get_variable( <name>, <shape>, <initializer>)
```

If a variable with <name> already exists, reuse it
If not, initialize it with <shape> using <initializer>
  
```
def two_hidden_layers(x):
    assert x.shape.as_list() == [200, 100]
    w1 = tf.get_variable("h1_weights", [100, 50], initializer=tf.random_normal_initializer())
    b1 = tf.get_variable("h1_biases", [50], initializer=tf.constant_initializer(0.0))
    h1 = tf.matmul(x, w1) + b1
    assert h1.shape.as_list() == [200, 50]  
    w2 = tf.get_variable("h2_weights", [50, 10], initializer=tf.random_normal_initializer())
    b2 = tf.get_variable("h2_biases", [10], initializer=tf.constant_initializer(0.0))
    logits = tf.matmul(h1, w2) + b2
    return logits
logits1 = two_hidden_layers(x1)
logits2 = two_hidden_layers(x2)

```

## problem
ValueError: Variable h1_weights already exists, disallowed. Did you mean to set reuse=True in VarScope?

```
with tf.variable_scope('two_layers') as scope:
    logits1 = two_hidden_layers(x1)
    scope.reuse_variables()
    logits2 = two_hidden_layers(x2)

```
Put your variables within a scope and reuse all variables within that scope

Only one set of variables, all within the variable scope “two_layers”

They take in two different inputs

tf.variable_scope implicitly creates a name scope

## Reusable code?

```
def two_hidden_layers(x):
    assert x.shape.as_list() == [200, 100]
    w1 = tf.get_variable("h1_weights", [100, 50], initializer=tf.random_normal_initializer())
    b1 = tf.get_variable("h1_biases", [50], initializer=tf.constant_initializer(0.0))
    h1 = tf.matmul(x, w1) + b1
    assert h1.shape.as_list() == [200, 50]  
    w2 = tf.get_variable("h2_weights", [50, 10], initializer=tf.random_normal_initializer())
    b2 = tf.get_variable("h2_biases", [10], initializer=tf.constant_initializer(0.0))
    logits = tf.matmul(h1, w2) + b2
    return logits
with tf.variable_scope('two_layers') as scope:
    logits1 = two_hidden_layers(x1)
    scope.reuse_variables()
    logits2 = two_hidden_layers(x2)

```
## layer'em up
Fetch variables if they already exist 

Else, create them

```
def fully_connected(x, output_dim, scope):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:  # reuse=tf.AUTO_REUSE
        w = tf.get_variable("weights", [x.shape[1], output_dim], initializer=tf.random_normal_initializer())
        b = tf.get_variable("biases", [output_dim], initializer=tf.constant_initializer(0.0))
        return tf.matmul(x, w) + b

def two_hidden_layers(x):
    h1 = fully_connected(x, 50, 'h1')
    h2 = fully_connected(h1, 10, 'h2')

with tf.variable_scope('two_layers') as scope:
    logits1 = two_hidden_layers(x1)
    logits2 = two_hidden_layers(x2)
```


# Graph collections
 As you create a model, you might put your variables to different parts of the graph
you’d want an easy way to access them

```

tf.get_collection(
    key,
    scope=None
)

```
## tf.get_collection lets you access a certain collection of variables, with key being the name of the collection, scope is the scope of the variables.

By default, all variables are placed in tf.GraphKeys.GLOBAL_VARIABLES. 

To get all variables in scope “my_scope”, simply call. This turns a list of variables in “my_scope”.

```
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='my_scope')
```

If you set trainable=True (which is always set by default) when you create your variable, 

that variable will be in the collection tf.GraphKeys.TRAINABLE_VARIABLES. 



# Manage Experiments

tf.train.Saver
saves graph’s variables in binary files

Saves sessions, not graphs!

```
tf.train.Saver.save(sess, save_path, global_step=None...)
tf.train.Saver.restore(sess, save_path)

```
## Save parameters after 1000 steps
```
# define model
model = SkipGramModel(params)

# create a saver object
saver = tf.train.Saver()

with tf.Session() as sess:
	for step in range(training_steps): 
		sess.run([optimizer])
		
		# save model every 1000 steps
		if (step + 1) % 1000 == 0:
			saver.save(sess,  'checkpoint_directory/model_name', global_step=step)

```
## Specify the step at which the model is saved
```
# define model
model = SkipGramModel(params)

# create a saver object
saver = tf.train.Saver()

with tf.Session() as sess:
	for step in range(training_steps): 
		sess.run([optimizer])
		
		# save model every 1000 steps
		if (step + 1) % 1000 == 0:
			saver.save(sess,  'checkpoint_directory/model_name', global_step=step)

```
## Global step
Very common in TensorFlow program

```
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

global_step = tf.Variable(0,  dtype=tf.int32,  trainable=False,   name='global_step')

optimizer = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)

```
Need to tell optimizer to increment global step

This can also help your optimizer know when to decay learning rate

### tf.train.Saver
Only save variables, not graph
Checkpoints map variable names to tensors

```
# Can also choose to save certain variables
v1 = tf.Variable(..., name='v1') 
v2 = tf.Variable(..., name='v2') 

# You can save your variables in one of three ways:
saver = tf.train.Saver({'v1': v1, 'v2': v2})
saver = tf.train.Saver([v1, v2])
saver = tf.train.Saver({v.op.name: v for v in [v1, v2]}) # similar to a dict

```
## Restore variables

Still need to first build graph

```
aver.restore(sess, 'checkpoints/name_of_the_checkpoint')

e.g. saver.restore(sess, 'checkpoints/skip-gram-99999')

```
### Restore the latest checkpoint


```
# check if there is checkpoint

ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))

# check if there is a valid checkpoint path

if ckpt and ckpt.model_checkpoint_path:
     saver.restore(sess, ckpt.model_checkpoint_path)


```
1.checkpoint file keeps track of the latest checkpoint

2.restore checkpoints only when there is a valid checkpoint path






# tf.summary

Why matplotlib when you can summarize?

Visualize our summary statistics during our training

tf.summary.scalar

tf.summary.histogram

tf.summary.image


---

## Step 1: create summaries


```
with tf.name_scope("summaries"):
    tf.summary.scalar("loss", self.loss)
    tf.summary.scalar("accuracy", self.accuracy)            
    tf.summary.histogram("histogram loss", self.loss)
    summary_op = tf.summary.merge_all()

```
merge them all into one summary op to make managing them easier


---
## Step 2: run them
Like everything else in TF, summaries are ops. 
For the summaries to be built, you have to run it in a session

```
loss_batch, _, summary = sess.run([loss, optimizer, summary_op])

```


---

## Step 3: write summaries to file
Need global step here so the model knows what summary corresponds to what step

```
writer.add_summary(summary, global_step=step)
```
---

## Putting it together
```
tf.summary.scalar("loss", self.loss)
tf.summary.histogram("histogram loss", self.loss)
summary_op = tf.summary.merge_all()

saver = tf.train.Saver() # defaults to saving all variables

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)

    writer = tf.summary.FileWriter('./graphs', sess.graph)
    for index in range(10000):
        ...
        loss_batch, _, summary = sess.run([loss, optimizer, summary_op])
        writer.add_summary(summary, global_step=index)

        if (index + 1) % 1000 == 0:
            saver.save(sess, 'checkpoints/skip-gram', index)

```







# Control Randomization

## Op level random seed

```
my_var = tf.Variable(tf.truncated_normal((-1.0,1.0), stddev=0.1, seed=0)) # seed
```
## Sessions keep track of random state
Each new session restarts the random state

```
c = tf.random_uniform([], -10, 10, seed=2)

with tf.Session() as sess:
	print(sess.run(c)) # >> 3.57493
	print(sess.run(c)) # >> -5.97319

--------------------------------------------
c = tf.random_uniform([], -10, 10, seed=2)

with tf.Session() as sess:
	print(sess.run(c)) # >> 3.57493

with tf.Session() as sess:
	print(sess.run(c)) # >> 3.57493

```
## Op level seed: each op keeps its own seed

```
c = tf.random_uniform([], -10, 10, seed=2)
d = tf.random_uniform([], -10, 10, seed=2)

with tf.Session() as sess:
	print(sess.run(c)) # >> 3.57493
	print(sess.run(d)) # >> 3.5749

```
## Graph level seed
Note that the result is different from op-level seed

```
tf.set_random_seed(2) # tf.set_random_seed(2)
c = tf.random_uniform([], -10, 10)
d = tf.random_uniform([], -10, 10)

with tf.Session() as sess:
    print(sess.run(c)) # >> -4.00752
    print(sess.run(d)) # >> -2.98339

```





# Autodiff

## Where are the gradients?
## TensorFlow builds the backward path for you!
## Reverse mode automatic differentiation
The computation graph makes computing symbolic gradients straightforward

Chain rule

# tf.gradients(y, [xs])
Take derivative of y with respect to each tensor in the list [xs]

```
x = tf.Variable(2.0)
y = 2.0 * (x ** 3)
z = 3.0 + y ** 2
grad_z = tf.gradients(z, [x, y])
with tf.Session() as sess:
	sess.run(x.initializer)
	print(sess.run(grad_z)) # >> [768.0, 32.0]
# 768 is the gradient of z with respect to x, 32 with respect to y

```
## Gradient Computation

```
tf.gradients(ys, xs, grad_ys=None, ...)
tf.stop_gradient(input, name=None)

# prevents the contribution of its inputs to be taken into account
tf.clip_by_value(t, clip_value_min, clip_value_max, name=None)
tf.clip_by_norm(t, clip_norm, axes=None, name=None)

```
## Should I still learn to take gradients?
