@@ -276,7 +276,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
276276 for param in params :
277277 value = param .get_value (borrow = True )
278278 velocity = aesara .shared (
279- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
279+ np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable
280280 )
281281 x = momentum * velocity + updates [param ]
282282 updates [velocity ] = x - param
@@ -391,7 +391,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
391391 for param in params :
392392 value = param .get_value (borrow = True )
393393 velocity = aesara .shared (
394- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
394+ np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable
395395 )
396396 x = momentum * velocity + updates [param ] - param
397397 updates [velocity ] = x
@@ -534,9 +534,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
534534
535535 for param , grad in zip (params , grads ):
536536 value = param .get_value (borrow = True )
537- accu = aesara .shared (
538- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
539- )
537+ accu = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
540538 accu_new = accu + grad ** 2
541539 updates [accu ] = accu_new
542540 updates [param ] = param - (learning_rate * grad / at .sqrt (accu_new + epsilon ))
@@ -662,9 +660,7 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
662660
663661 for param , grad in zip (params , grads ):
664662 value = param .get_value (borrow = True )
665- accu = aesara .shared (
666- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
667- )
663+ accu = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
668664 accu_new = rho * accu + (one - rho ) * grad ** 2
669665 updates [accu ] = accu_new
670666 updates [param ] = param - (learning_rate * grad / at .sqrt (accu_new + epsilon ))
@@ -755,12 +751,10 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
755751 for param , grad in zip (params , grads ):
756752 value = param .get_value (borrow = True )
757753 # accu: accumulate gradient magnitudes
758- accu = aesara .shared (
759- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
760- )
754+ accu = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
761755 # delta_accu: accumulate update magnitudes (recursively!)
762756 delta_accu = aesara .shared (
763- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
757+ np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable
764758 )
765759
766760 # update accu (as in rmsprop)
@@ -850,12 +844,8 @@ def adam(
850844
851845 for param , g_t in zip (params , all_grads ):
852846 value = param .get_value (borrow = True )
853- m_prev = aesara .shared (
854- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
855- )
856- v_prev = aesara .shared (
857- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
858- )
847+ m_prev = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
848+ v_prev = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
859849
860850 m_t = beta1 * m_prev + (one - beta1 ) * g_t
861851 v_t = beta2 * v_prev + (one - beta2 ) * g_t ** 2
@@ -938,12 +928,8 @@ def adamax(
938928
939929 for param , g_t in zip (params , all_grads ):
940930 value = param .get_value (borrow = True )
941- m_prev = aesara .shared (
942- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
943- )
944- u_prev = aesara .shared (
945- np .zeros (value .shape , dtype = value .dtype ), broadcastable = param .broadcastable
946- )
931+ m_prev = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
932+ u_prev = aesara .shared (np .zeros (value .shape , dtype = value .dtype ), shape = param .broadcastable )
947933
948934 m_t = beta1 * m_prev + (one - beta1 ) * g_t
949935 u_t = at .maximum (beta2 * u_prev , abs (g_t ))
0 commit comments