# 1- Linear Regression:Normal Equation

In [1]:
import numpy as np
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [2]:
# Data load: 
from sklearn.datasets import fetch_california_housing
housing=fetch_california_housing()
housing["data"].shape # equal: housing.data.shape

(20640, 8)

In [3]:
housing.DESCR

'.. _california_housing_dataset:\n\nCalifornia Housing dataset\n--------------------------\n\n**Data Set Characteristics:**\n\n    :Number of Instances: 20640\n\n    :Number of Attributes: 8 numeric, predictive attributes and the target\n\n    :Attribute Information:\n        - MedInc        median income in block\n        - HouseAge      median house age in block\n        - AveRooms      average number of rooms\n        - AveBedrms     average number of bedrooms\n        - Population    block population\n        - AveOccup      average house occupancy\n        - Latitude      house block latitude\n        - Longitude     house block longitude\n\n    :Missing Attribute Values: None\n\nThis dataset was obtained from the StatLib repository.\nhttp://lib.stat.cmu.edu/datasets/\n\nThe target variable is the median house value for California districts.\n\nThis dataset was derived from the 1990 U.S. census, using one row per census\nblock group. A block group is the smallest geographical unit

In [4]:
housing.target
#!: tableau a 1 seule dim:=> le transformer en 1! vecteur colonne

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [5]:
m,n=housing.data.shape
housing_data_plus_bias=np.c_[np.ones((m,1)),housing.data] #Ajout de B0 sur ttes les lignes

X=tf.constant(housing_data_plus_bias,dtype=tf.float32, name="X")
y=tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
XT=tf.transpose(X)
theta=tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)),XT),y)
with tf.Session() as sess:
    theta_val=theta.eval()
print(theta_val)

[[-3.7185181e+01]
 [ 4.3633747e-01]
 [ 9.3952334e-03]
 [-1.0711310e-01]
 [ 6.4479220e-01]
 [-4.0338000e-06]
 [-3.7813708e-03]
 [-4.2348403e-01]
 [-4.3721911e-01]]


# 2 - Batch Gradient Descent 

In [12]:
# normalization of data:
from sklearn.preprocessing import StandardScaler
Scaler=StandardScaler()
scaled_housing_data=Scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias=np.c_[np.ones((m,1)),scaled_housing_data]

In [7]:
reset_graph()

n_epochs=1000
learning_rate=0.01

# Variables Build
X=tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y=tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta=tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0),name="theta") # cree 1 node

#instruction Build
y_pred=tf.matmul(X,theta,name="predictions")
error=y_pred - y 
mse=tf.reduce_mean(tf.square(error),name="mse")
gradients=2/m * tf.matmul(tf.transpose(X),error)# Gradients calcules!!!!
training_op=tf.assign(theta,theta - learning_rate * gradients) #assign function cree 1 node

#passons a l'execution:
init=tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch: ",epoch,"MSE: ",mse.eval())
        training_op.eval()# OR, sess.run(training_op)
    best_theta=theta.eval()

Epoch:  0 MSE:  12.408011
Epoch:  100 MSE:  0.755197
Epoch:  200 MSE:  0.5420873
Epoch:  300 MSE:  0.5331699
Epoch:  400 MSE:  0.5305383
Epoch:  500 MSE:  0.5287961
Epoch:  600 MSE:  0.52754897
Epoch:  700 MSE:  0.5266499
Epoch:  800 MSE:  0.52600086
Epoch:  900 MSE:  0.5255331


In [8]:
best_theta

array([[ 2.0685523e+00],
       [ 8.1063598e-01],
       [ 1.2685776e-01],
       [-2.0784086e-01],
       [ 2.4839850e-01],
       [-1.3083885e-03],
       [-3.9607048e-02],
       [-8.5861266e-01],
       [-8.2600272e-01]], dtype=float32)

# 3 - Automatic Differentiation:

In [9]:
reset_graph()

n_epochs=1000
learning_rate=0.01

# Variables Build
X=tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y=tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta=tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0),name="theta") # cree 1 node

#instruction Build
y_pred=tf.matmul(X,theta,name="predictions")
error=y_pred - y 
mse=tf.reduce_mean(tf.square(error),name="mse")

In [10]:
#Gradient: fourni par tensorflow, vecteur a 9 composantes
gradients=tf.gradients(mse,[theta])[0]

In [11]:
training_op=tf.assign(theta,theta - learning_rate * gradients)
#passons a l'execution:
init=tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch: ",epoch,"MSE: ",mse.eval())
        training_op.eval()# OR, sess.run(training_op)
    best_theta=theta.eval()
print("Best theta:")
print(best_theta)

Epoch:  0 MSE:  12.408011
Epoch:  100 MSE:  0.75519687
Epoch:  200 MSE:  0.5420873
Epoch:  300 MSE:  0.5331699
Epoch:  400 MSE:  0.5305383
Epoch:  500 MSE:  0.5287961
Epoch:  600 MSE:  0.52754897
Epoch:  700 MSE:  0.52664983
Epoch:  800 MSE:  0.52600086
Epoch:  900 MSE:  0.5255331
Best theta:
[[ 2.0685525e+00]
 [ 8.1063598e-01]
 [ 1.2685777e-01]
 [-2.0784083e-01]
 [ 2.4839847e-01]
 [-1.3083883e-03]
 [-3.9607048e-02]
 [-8.5861266e-01]
 [-8.2600272e-01]]


# 4 - tensorflow GD Optimizer:

In [12]:
reset_graph()

n_epochs=1000
learning_rate=0.01

# Variables Build
X=tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y=tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta=tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0),name="theta") # cree 1 node

#instruction Build
y_pred=tf.matmul(X,theta,name="predictions")
error=y_pred - y 
mse=tf.reduce_mean(tf.square(error),name="mse")

In [13]:
# Definition de l'objet fonction d'optimization:
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
# minimize objectif function: 
training_op=optimizer.minimize(mse)

In [14]:
#passons a l'execution:
init=tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch: ",epoch,"MSE: ",mse.eval())
        sess.run(training_op) #!! plus de fonction eval().
    best_theta=theta.eval()
print("Best theta:")
print(best_theta)

Epoch:  0 MSE:  12.408011
Epoch:  100 MSE:  0.75519687
Epoch:  200 MSE:  0.5420873
Epoch:  300 MSE:  0.5331699
Epoch:  400 MSE:  0.5305383
Epoch:  500 MSE:  0.5287961
Epoch:  600 MSE:  0.52754897
Epoch:  700 MSE:  0.52664983
Epoch:  800 MSE:  0.52600086
Epoch:  900 MSE:  0.5255331
Best theta:
[[ 2.0685525e+00]
 [ 8.1063598e-01]
 [ 1.2685777e-01]
 [-2.0784083e-01]
 [ 2.4839847e-01]
 [-1.3083883e-03]
 [-3.9607048e-02]
 [-8.5861266e-01]
 [-8.2600272e-01]]


# 5 - tensorflow momentum optimizer:

In [15]:
reset_graph()

n_epochs=1000
learning_rate=0.01

# Variables Build
X=tf.constant(scaled_housing_data_plus_bias,dtype=tf.float32,name="X")
y=tf.constant(housing.target.reshape(-1,1),dtype=tf.float32,name="y")
theta=tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0),name="theta") # cree 1 node

#instruction Build
y_pred=tf.matmul(X,theta,name="predictions")
error=y_pred - y 
mse=tf.reduce_mean(tf.square(error),name="mse")

In [16]:
# Definition de l'objet fonction d'optimization avec l'inertie:
optimizer=tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
# minimize objectif function: 
training_op=optimizer.minimize(mse)

In [17]:
#passons a l'execution:
init=tf.global_variables_initializer()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        if epoch % 100 == 0:
            print("Epoch: ",epoch,"MSE: ",mse.eval())
        sess.run(training_op) #!! plus de fonction eval().
    best_theta=theta.eval()
print("Best theta:")
print(best_theta)

Epoch:  0 MSE:  12.408011
Epoch:  100 MSE:  0.5252007
Epoch:  200 MSE:  0.5243332
Epoch:  300 MSE:  0.5243213
Epoch:  400 MSE:  0.52432084
Epoch:  500 MSE:  0.524321
Epoch:  600 MSE:  0.5243211
Epoch:  700 MSE:  0.52432096
Epoch:  800 MSE:  0.52432096
Epoch:  900 MSE:  0.52432096
Best theta:
[[ 2.068558  ]
 [ 0.8296182 ]
 [ 0.11875144]
 [-0.26552498]
 [ 0.3056947 ]
 [-0.00450307]
 [-0.03932622]
 [-0.89988816]
 [-0.8705434 ]]


# 6 - Substitution nodes:

In [18]:
# call placeholder function:
A = tf.placeholder(tf.float32,shape=(None,3))
B = A + 5
# pour l'evaluation: on passe a eval() de B, un feed_dict qui precise la valeur de A
with tf.Session():
    B_val1=B.eval(feed_dict={A:[[1,2,3]]})
    B_val2=B.eval(feed_dict={A:[[4,5,6],[7,8,9]]})
print("B_val1: ")
print(B_val1)
print("\n")
print("B_val2: ")
print(B_val2)

B_val1: 
[[6. 7. 8.]]


B_val2: 
[[ 9. 10. 11.]
 [12. 13. 14.]]


# 7 - Mini-batch Gradient Descent:

In [19]:
reset_graph()

learning_rate=0.01
n_epochs=10
batch_size=100 #ce qui fait epochs total:1000
n_batches=int(np.ceil(m/batch_size))

# Variables Build
X=tf.placeholder(tf.float32,shape=(None,n+1),name="X")
y=tf.placeholder(tf.float32,shape=(None,1),name="y")
theta=tf.Variable(tf.random_uniform([n+1,1],-1.0,1.0),name="theta") # cree 1 node

#instruction Build
y_pred=tf.matmul(X,theta,name="predictions")
error=y_pred - y 
mse=tf.reduce_mean(tf.square(error),name="mse")
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op=optimizer.minimize(mse)
init=tf.global_variables_initializer()

In [20]:
def fetch_batch(epoch,batch_index,batch_size):
    np.random.seed(epoch * n_batches + batch_index) #eviter les memes lignes
    indices=np.random.randint(m,size=batch_size)
    X_batch=scaled_housing_data_plus_bias[indices]
    y_batch=housing.target.reshape(-1,1)[indices]
    return X_batch, y_batch
#Execution: 
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch,y_batch=fetch_batch(epoch,batch_index,batch_size) #input
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch}) #feed_dict: passer les inputs
    best_theta=theta.eval() #theta n'a besoin de rien pour son evaluation;
    
print("Best_theta: ")
print(best_theta)   

Best_theta: 
[[ 2.0701346 ]
 [ 0.8322434 ]
 [ 0.11718354]
 [-0.25360975]
 [ 0.33584762]
 [ 0.00311589]
 [-0.01102609]
 [-0.90628386]
 [-0.8712015 ]]


# 8 - Save/restore models

In [21]:
reset_graph()

n_epochs = 1000                                                                      
learning_rate = 0.01                                                                  

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")            
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")            
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")                                      
error = y_pred - y                                                                    
mse = tf.reduce_mean(tf.square(error), name="mse")                                    
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)           
training_op = optimizer.minimize(mse)

init=tf.global_variables_initializer()
# Creation du Noeud Saver a la fin de la phase de construction
# Apres la creation de tous les noeuds et variables !!!
saver=tf.train.Saver() #ici, toutes vars et nodes sont sauvegardes:

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        if epoch % 100 == 0 :
            print("Epoch: ",epoch,"MSE: ",mse.eval())
            save_path=saver.save(sess,"./tmp/my_regmodel.ckpt") #save every 100 iters
        sess.run(training_op)
    best_theta=theta.eval()
    save_path=saver.save(sess,"./tmp/my_final_model.ckpt")

Epoch:  0 MSE:  9.161543
Epoch:  100 MSE:  0.7145006
Epoch:  200 MSE:  0.566705
Epoch:  300 MSE:  0.5555719
Epoch:  400 MSE:  0.5488112
Epoch:  500 MSE:  0.5436362
Epoch:  600 MSE:  0.5396294
Epoch:  700 MSE:  0.5365092
Epoch:  800 MSE:  0.5340678
Epoch:  900 MSE:  0.5321474


In [22]:
best_theta

array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [23]:
# Restore the model:

with tf.Session() as sess:
    saver.restore(sess,"./tmp/my_final_model.ckpt")
    best_theta_restored=theta.eval()

INFO:tensorflow:Restoring parameters from ./tmp/my_final_model.ckpt


In [24]:
np.allclose(best_theta,best_theta_restored)

True

On peut preciser la variable a sauvegarder avec son nom:

In [25]:
saver=tf.train.Saver({"weights":theta})

L'objet Saver enregistre aussi un graphe qui a le nom du fichier sauvegarder + l'extension .meta
on peut le charger avec le graphe par defaut, comme suit: 

In [26]:
reset_graph() # graph vide mntnt
saver=tf.train.import_meta_graph("./tmp/my_final_model.ckpt.meta") # on load la structure du graph
theta=tf.get_default_graph().get_tensor_by_name("theta:0")

with tf.Session() as sess: 
    saver.restore(sess,"./tmp/my_final_model.ckpt") #restore l'etat du graph
    best_theta_restored=theta.eval()

best_theta_restored

INFO:tensorflow:Restoring parameters from ./tmp/my_final_model.ckpt


array([[ 2.0685525 ],
       [ 0.8874027 ],
       [ 0.14401658],
       [-0.34770882],
       [ 0.36178368],
       [ 0.00393811],
       [-0.04269556],
       [-0.6614528 ],
       [-0.6375277 ]], dtype=float32)

In [27]:
np.allclose(best_theta,best_theta_restored)
#on peut tjrs loader un model sans pour autant le code qui le build

True

# 9 -  Visualisation du Graph:

## Using Tensorboard 

In [9]:
from datetime import datetime
now=datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir="tf_logs"
logdir="{}/run-{}".format(root_logdir,now)

                                                              
learning_rate = 0.01    
n_epochs = 10
batch_size = 100
m,n=housing.data.shape
n_batches = int(np.ceil(m / batch_size))
housing_data_plus_bias=np.c_[np.ones((m,1)),housing.data]

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")            
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")                                      
error = y_pred - y                                                                    
mse = tf.reduce_mean(tf.square(error), name="mse")                                    
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)           
training_op = optimizer.minimize(mse)

init=tf.global_variables_initializer()

In [10]:
mse_summary=tf.summary.scalar("MSE",mse)
file_writer=tf.summary.FileWriter(logdir,tf.get_default_graph())

In [17]:
def fetch_batch(epoch,batch_index,batch_size):
    np.random.seed(epoch * n_batches + batch_index) #eviter les memes lignes
    indices=np.random.randint(m,size=batch_size)
    X_batch=scaled_housing_data_plus_bias[indices]
    y_batch=housing.target.reshape(-1,1)[indices]
    return X_batch, y_batch
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch,y_batch=fetch_batch(epoch,batch_index,batch_size) #input
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch,y:y_batch})
                step=epoch * n_batches + batch_index
                file_writer.add_summary(summary_str,step)
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch}) #feed_dict: passer les inputs
    best_theta=theta.eval() #theta n'a besoin de rien pour son evaluation;
    
print("Best_theta: ")
print(best_theta)

Best_theta: 
[[ 2.070016  ]
 [ 0.8204561 ]
 [ 0.1173173 ]
 [-0.22739051]
 [ 0.3113402 ]
 [ 0.00353193]
 [-0.01126994]
 [-0.91643935]
 [-0.8795008 ]]


In [16]:
file_writer.close()

## name scope: 

In [14]:
learning_rate = 0.01    
n_epochs = 10
batch_size = 100
m,n=housing.data.shape
n_batches = int(np.ceil(m / batch_size))
housing_data_plus_bias=np.c_[np.ones((m,1)),housing.data]
# normalization of data:
from sklearn.preprocessing import StandardScaler
Scaler=StandardScaler()
scaled_housing_data=Scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias=np.c_[np.ones((m,1)),scaled_housing_data]

In [15]:
reset_graph()
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")

In [16]:
# Definition du name scope:
with tf.name_scope("loss") as scope:
    error = y_pred - y                                                                    
    mse = tf.reduce_mean(tf.square(error), name="mse") 

In [17]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)           
training_op = optimizer.minimize(mse)

init=tf.global_variables_initializer()
mse_summary=tf.summary.scalar("MSE",mse)
file_writer=tf.summary.FileWriter(logdir,tf.get_default_graph())
def fetch_batch(epoch,batch_index,batch_size):
    np.random.seed(epoch * n_batches + batch_index) #eviter les memes lignes
    indices=np.random.randint(m,size=batch_size)
    X_batch=scaled_housing_data_plus_bias[indices]
    y_batch=housing.target.reshape(-1,1)[indices]
    return X_batch, y_batch
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch,y_batch=fetch_batch(epoch,batch_index,batch_size) #input
            if batch_index % 10 == 0:
                summary_str = mse_summary.eval(feed_dict={X:X_batch,y:y_batch})
                step=epoch * n_batches + batch_index
                file_writer.add_summary(summary_str,step)
            sess.run(training_op,feed_dict={X:X_batch,y:y_batch}) #feed_dict: passer les inputs
    best_theta=theta.eval() #theta n'a besoin de rien pour son evaluation;
    
print("Best_theta: ")
print(best_theta)

Best_theta: 
[[ 2.0714476 ]
 [ 0.8462012 ]
 [ 0.11558535]
 [-0.26835832]
 [ 0.32982782]
 [ 0.00608358]
 [ 0.07052915]
 [-0.87988573]
 [-0.8634251 ]]


In [18]:
print(error.op.name)
print(mse.op.name)

loss/sub
loss/mse


## modularite: 

In [19]:
#Penser au fonctions pour automatiser: 
# fonction de somme des tensors:
reset_graph()
def relu(X):
    w_shape=(int(X.get_shape()[1]),1)
    w=tf.Variable(tf.random_normal(w_shape),name="weights")
    b=tf.Variable(0.0,name="bias")
    z=tf.add(tf.matmul(X,w),b,name="z")
    return tf.maximum(z,0.,name="relu")

n_features=3
X=tf.placeholder(tf.float32,shape=(None,n_features),name="X")
relus=[relu(X) for i in range(5)]
output=tf.add_n(relus,name="output")

In [21]:
file_writer=tf.summary.FileWriter("logs/relu1",tf.get_default_graph())

In [24]:
#USING Name Scope:
reset_graph()
def relu(X):
    with tf.name_scope("relu") as scope:
        w_shape=(int(X.get_shape()[1]),1)
        w=tf.Variable(tf.random_normal(w_shape),name="weights")
        b=tf.Variable(0.0,name="bias")
        z=tf.add(tf.matmul(X,w),b,name="z")
        return tf.maximum(z,0.,name="relu")

n_features=3
X=tf.placeholder(tf.float32,shape=(None,n_features),name="X")
relus=[relu(X) for i in range(5)]
output=tf.add_n(relus,name="output")
file_writer=tf.summary.FileWriter("logs/relu2",tf.get_default_graph())

## Shared Variables: 

In [33]:
def relu(X):
    with tf.variable_scope("relu",reuse=True):

        #reutiliser la var existante
        threshold=tf.get_variable("threshold")
        w_shape=(int(X.get_shape()[1]),1)
        w=tf.Variable(tf.random_normal(w_shape),name="weights")
        b=tf.Variable(0.0,name="bias")
        z=tf.add(tf.matmul(X,w),b,name="z")
        return tf.maximum(z,0.,name="relu")

n_features=3
X=tf.placeholder(tf.float32,shape=(None,n_features),name="X")
with tf.variable_scope("relu",reuse=True):                                #Creation de la variable
    threshold = tf.get_variable("threshold",initializer=tf.constant(0.0))

relus=[relu(X) for relu_index in range(5)]
output=tf.add_n(relus,name="output")
file_writer=tf.summary.FileWriter("logs/relu3",tf.get_default_graph())
file_writer.close()

In [34]:
## Definir la variable thershold 1!!! seule fois dans la fonction ReLU:

In [43]:
reset_graph()
def relu(X):
    with tf.variable_scope("relu",reuse=True):
        threshold=tf.get_variable("threshold",initializer=tf.constant(0.0))
        w_shape=(int(X.get_shape()[1]),1)
        w=tf.Variable(tf.random_normal(w_shape),name="weights")
        b=tf.Variable(0.0,name="bias")
        z=tf.add(tf.matmul(X,w),b,name="z")
        return tf.maximum(z,0.,name="relu")

n_features=3
X=tf.placeholder(tf.float32,shape=(None,n_features),name="X")
relus = []
for relu_index in range(5):
    with tf.variable_scope("relu", reuse=(relu_index >= 1)) as scope:
        relus.append(relu(X))
output = tf.add_n(relus, name="output")
file_writer=tf.summary.FileWriter("logs/relu5",tf.get_default_graph())
file_writer.close()

ValueError: If initializer is a constant, do not specify shape.