# Training a NN for metric on CICY with homog

## Import the required packages/functions

In [1]:
import numpy as np
import gc
import sys
import os
import re
import logging
import pickle
import sys
#sys.path.append("/Users/kit/Documents/Phys_Working/MF metric")
#sys.path.append("/home/f/fraser-talientec/PhysicalYukawas")

logging.basicConfig(stream=sys.stdout)

from cymetric.pointgen.pointgen import PointGenerator
from cymetric.pointgen.nphelper import prepare_dataset, prepare_basis_pickle

import tensorflow as tf
import tensorflow.keras as tfk

tf.get_logger().setLevel('ERROR')


from cymetric.models.tfmodels import PhiFSModel, MultFSModel, FreeModel, MatrixFSModel, AddFSModel, PhiFSModelToric, MatrixFSModelToric
from cymetric.models.tfhelper import prepare_tf_basis, train_model
from cymetric.models.callbacks import SigmaCallback, KaehlerCallback, TransitionCallback, RicciCallback, VolkCallback, AlphaCallback
from cymetric.models.metrics import SigmaLoss, KaehlerLoss, TransitionLoss, RicciLoss, VolkLoss, TotalLoss

from NewCustomMetrics import *
from HarmonicFormModel import *
from BetaModel import *
from laplacian_funcs import *
from OneAndTwoFormsForLineBundles import *
from generate_and_train_all_nnsHOLO import *
from custom_networks import *
import sys
import importlib
from AlphaPrimeModel import *



## Point Cloud Generation

Set the properties of the defining polynomial. And the point in Kahler Moduli space

If correct, this should be for the following defining polynomial
$0.44 x_{1,0}^2 x_{3,0}^2 x_{4,0}^2 x_{2,0}^2+0.88 x_{1,1}^2 x_{3,0}^2 x_{4,0}^2 x_{2,0}^2+0.88 x_{1,0}^2 x_{3,1}^2 x_{4,0}^2 x_{2,0}^2+0.88 x_{1,1}^2 x_{3,1}^2 x_{4,0}^2 x_{2,0}^2-0.03 x_{1,0} x_{1,1} x_{3,0} x_{3,1} x_{4,0}^2 x_{2,0}^2+0.44 x_{1,0}^2 x_{3,0}^2 x_{4,1}^2 x_{2,0}^2+0.44 x_{1,1}^2 x_{3,0}^2 x_{4,1}^2 x_{2,0}^2+0.88 x_{1,0}^2 x_{3,1}^2 x_{4,1}^2 x_{2,0}^2+0.44 x_{1,1}^2 x_{3,1}^2 x_{4,1}^2 x_{2,0}^2-0.41 x_{1,0} x_{1,1} x_{3,0} x_{3,1} x_{4,1}^2 x_{2,0}^2-0.41 x_{1,0} x_{1,1} x_{3,0}^2 x_{4,0} x_{4,1} x_{2,0}^2-0.03 x_{1,0} x_{1,1} x_{3,1}^2 x_{4,0} x_{4,1} x_{2,0}^2+0.62 x_{1,0}^2 x_{3,0} x_{3,1} x_{4,0} x_{4,1} x_{2,0}^2+0.62 x_{1,1}^2 x_{3,0} x_{3,1} x_{4,0} x_{4,1} x_{2,0}^2-0.62 x_{1,0} x_{1,1} x_{2,1} x_{3,0}^2 x_{4,0}^2 x_{2,0}-0.62 x_{1,0} x_{1,1} x_{2,1} x_{3,1}^2 x_{4,0}^2 x_{2,0}+0.41 x_{1,0}^2 x_{2,1} x_{3,0} x_{3,1} x_{4,0}^2 x_{2,0}+0.03 x_{1,1}^2 x_{2,1} x_{3,0} x_{3,1} x_{4,0}^2 x_{2,0}-0.62 x_{1,0} x_{1,1} x_{2,1} x_{3,0}^2 x_{4,1}^2 x_{2,0}-0.62 x_{1,0} x_{1,1} x_{2,1} x_{3,1}^2 x_{4,1}^2 x_{2,0}+0.03 x_{1,0}^2 x_{2,1} x_{3,0} x_{3,1} x_{4,1}^2 x_{2,0}+0.41 x_{1,1}^2 x_{2,1} x_{3,0} x_{3,1} x_{4,1}^2 x_{2,0}+0.41 x_{1,0}^2 x_{2,1} x_{3,0}^2 x_{4,0} x_{4,1} x_{2,0}+0.03 x_{1,1}^2 x_{2,1} x_{3,0}^2 x_{4,0} x_{4,1} x_{2,0}+0.03 x_{1,0}^2 x_{2,1} x_{3,1}^2 x_{4,0} x_{4,1} x_{2,0}+0.41 x_{1,1}^2 x_{2,1} x_{3,1}^2 x_{4,0} x_{4,1} x_{2,0}+0.9 x_{1,0} x_{1,1} x_{2,1} x_{3,0} x_{3,1} x_{4,0} x_{4,1} x_{2,0}+0.44 x_{1,0}^2 x_{2,1}^2 x_{3,0}^2 x_{4,0}^2+0.88 x_{1,1}^2 x_{2,1}^2 x_{3,0}^2 x_{4,0}^2+0.44 x_{1,0}^2 x_{2,1}^2 x_{3,1}^2 x_{4,0}^2+0.44 x_{1,1}^2 x_{2,1}^2 x_{3,1}^2 x_{4,0}^2-0.41 x_{1,0} x_{1,1} x_{2,1}^2 x_{3,0} x_{3,1} x_{4,0}^2+0.88 x_{1,0}^2 x_{2,1}^2 x_{3,0}^2 x_{4,1}^2+0.88 x_{1,1}^2 x_{2,1}^2 x_{3,0}^2 x_{4,1}^2+0.88 x_{1,0}^2 x_{2,1}^2 x_{3,1}^2 x_{4,1}^2+0.44 x_{1,1}^2 x_{2,1}^2 x_{3,1}^2 x_{4,1}^2-0.03 x_{1,0} x_{1,1} x_{2,1}^2 x_{3,0} x_{3,1} x_{4,1}^2-0.03 x_{1,0} x_{1,1} x_{2,1}^2 x_{3,0}^2 x_{4,0} x_{4,1}-0.41 x_{1,0} x_{1,1} x_{2,1}^2 x_{3,1}^2 x_{4,0} x_{4,1}+0.62 x_{1,0}^2 x_{2,1}^2 x_{3,0} x_{3,1} x_{4,0} x_{4,1}+0.62 x_{1,1}^2 x_{2,1}^2 x_{3,0} x_{3,1} x_{4,0} x_{4,1}$

In [2]:
monomialsTQ = 5*np.eye(5, dtype=np.int64)
coefficientsTQ = np.ones(5)
kmoduliTQ = np.ones(1)
ambientTQ = np.array([4])
nameofmanifold="Quintic"




def generate_points_and_save_using_defaults(free_coefficient,number_points,force_generate=False,seed_set=0):
   coefficients=coefficientsTQ
   # coefficients=np.array([1, 0, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, \
   # 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, free_coefficient, 0, 0, 0, 0, 0, \
   # 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, \
   # 0, 0, 0, 1, 0, 2, 0, 0, 0, 2, 0, 1])
   monomials=monomialsTQ
   kmoduli=kmoduliTQ
   ambient=ambientTQ
   # monomials = 5*np.eye(5, dtype=np.int64)
   # coefficients = np.ones(5)
   
   # kmoduli = np.ones(1)
   # ambient = np.array([4])
   pg = PointGenerator(monomials, coefficients, kmoduli, ambient)
   pg._set_seed(seed_set)
   dirname = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   print("dirname: " + dirname)
   #test if the directory exists, if not, create it
   if force_generate or (not os.path.exists(dirname)):
      print("Generating: forced? " + str(force_generate))
      kappa = pg.prepare_dataset(number_points, dirname)
      pg.prepare_basis(dirname, kappa=kappa)
   elif os.path.exists(dirname):
      try:
         print("loading prexisting dataset")
         data = np.load(os.path.join(dirname, 'dataset.npz'))
         if (len(data['X_train'])+len(data['X_val']))!=number_points:
            print("wrong length - generating anyway")
            kappa = pg.prepare_dataset(number_points, dirname)
            pg.prepare_basis(dirname, kappa=kappa)
      except:
         print("error loading - generating anyway")
         kappa = pg.prepare_dataset(number_points, dirname)
         pg.prepare_basis(dirname, kappa=kappa)
   

def getcallbacksandmetrics(data):
   #rcb = RicciCallback((data['X_val'], data['y_val']), data['val_pullbacks'])
   scb = SigmaCallback((data['X_val'], data['y_val']))
   volkcb = VolkCallback((data['X_val'], data['y_val']))
   kcb = KaehlerCallback((data['X_val'], data['y_val']))
   tcb = TransitionCallback((data['X_val'], data['y_val']))
   #cb_list = [rcb, scb, kcb, tcb, volkcb]
   cb_list = [ scb, kcb, tcb, volkcb]
   cmetrics = [TotalLoss(), SigmaLoss(), KaehlerLoss(), TransitionLoss(), VolkLoss()]#, RicciLoss()]
   return cb_list, cmetrics

def train_and_save_nn(free_coefficient,nlayer=3,nHidden=128,nEpochs=50,stddev=0.1,bSizes=[192,50000],lRate=0.001,use_zero_network=False):
   dirname = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   name = 'phimodel_for_' + str(nEpochs) + '_' + str(bSizes[0]) + '_'+ str(bSizes[1]) + 's' + str(nlayer) + 'x' +str(nHidden)
   print('dirname: ' + dirname)
   print('name: ' + name)
   
   data = np.load(os.path.join(dirname, 'dataset.npz'))
   BASIS = prepare_tf_basis(np.load(os.path.join(dirname, 'basis.pickle'), allow_pickle=True))

   cb_list, cmetrics = getcallbacksandmetrics(data)

   act = 'gelu'
   alpha = [1., 1., 30., 1., 2.] # 1 AND 3??
   ambient=tf.cast(tf.math.abs(BASIS['AMBIENT']),tf.int32)

   #nfirstlayer=tf.reduce_prod(2*(np.array(ambient)+1)).numpy().item()
   nfirstlayer=tf.reduce_prod((np.array(ambient)+1)**2).numpy().item()
   shapeofinternalnetwork=[nHidden]*nlayer
   shapeofnetwork=[nfirstlayer]+shapeofinternalnetwork+[1]

   print("network shape: " + str(shapeofnetwork))
   nn_phi = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_phi=make_nn(10,1,nlayer,nHidden,act,use_zero_network=use_zero_network)
   nn_phi_zero =BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   # print(nn_phi_zero(tf.cast(data['X_val'][0:2],tf.float32)))
   #nn_phi_zero=make_nn(10,1,nlayer,nHidden,act,use_zero_network=True)
   phimodel = PhiFSModel(nn_phi, BASIS, alpha=alpha)
   phimodelzero = PhiFSModel(nn_phi_zero, BASIS, alpha=alpha)

   #Note, currently running legacy due to ongoing tf issue with M1/M2. 
   #Use the commented line instead if not on an M1/M2 machine
   opt = tfk.optimizers.Adam(learning_rate=lRate)
   #opt = tfk.optimizers.legacy.Adam(learning_rate=lRate)
   # compile so we can test on validation set before training
   phimodel.compile(custom_metrics=cmetrics)
   phimodelzero.compile(custom_metrics=cmetrics)

   ## compare validation loss before training for zero network and nonzero network
   datacasted=[tf.cast(data['X_val'],tf.float32),tf.cast(data['y_val'],tf.float32)]
   #need to re-enable learning, in case there's been a problem:
   phimodel.learn_transition = False
   phimodelzero.learn_transition = False
   phimodel.learn_volk = True
   phimodelzero.learn_volk = True
   #phimodel.learn_ricci_val= True
   #phimodelzero.learn_ricci_val= True
   valzero=phimodelzero.test_step(datacasted)
   valraw=phimodel.test_step(datacasted)
   # phimodel.learn_ricci_val=False 
   # phimodelzero.learn_ricci_val=False 
   valzero = {key: value.numpy() for key, value in valzero.items()}
   valraw = {key: value.numpy() for key, value in valraw.items()}

   phimodel, training_history = train_model(phimodel, data, optimizer=opt, epochs=nEpochs, batch_sizes=bSizes, 
                                       verbose=1, custom_metrics=cmetrics, callbacks=cb_list)
   print("finished training\n")
   phimodel.model.save(os.path.join(dirname, name))
   np.savez_compressed(os.path.join(dirname, 'trainingHistory-' + name),training_history)
   #now print the initial losses and final losses for each metric
   # first_metrics = {key: value[0] for key, value in training_history.items()}
   # lastometrics = {key: value[-1] for key, value in training_history.items()}
   phimodel.learn_transition = True
   phimodel.learn_volk = True
   #phimodel.learn_ricci_val= True
   valfinal=phimodel.test_step(datacasted)
   valfinal = {key: value.numpy() for key, value in valfinal.items()}
   #phimodel.learn_ricci_val=False 
   print("zero network validation loss: ")
   print(valzero)
   print("validation loss for raw network: ")
   print(valraw)
   print("validation loss for final network: ")
   print(valfinal)
   print("ratio of final to zero: " + str({key + " ratio": value/(valzero[key]+1e-8) for key, value in valfinal.items()}))
   print("ratio of final to raw: " + str({key + " ratio": value/(valraw[key]+1e-8) for key, value in valfinal.items()}))

   averagediscrepancyinstdevs,_=compute_transition_pointwise_measure(phimodel,tf.cast(data["X_val"],tf.float32))
   print("average transition discrepancy in standard deviations: " + str(averagediscrepancyinstdevs))
   #IMPLEMENT THE FOLLOWING
   #meanfailuretosolveequation,_,_=measure_laplacian_failure(phimodel,data)
   print("\n\n")
   return phimodel,training_history

def load_nn_phimodel(free_coefficient,nlayer=3,nHidden=128,nEpochs=50,bSizes=[192,50000],stddev=0.1,lRate=0.001,set_weights_to_zero=False):
   dirname = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   name = 'phimodel_for_' + str(nEpochs) + '_' + str(bSizes[0]) + '_'+ str(bSizes[1]) + 's' + str(nlayer) + 'x' +str(nHidden)
   print(dirname)
   print(name)
   
   data = np.load(os.path.join(dirname, 'dataset.npz'))
   BASIS = prepare_tf_basis(np.load(os.path.join(dirname, 'basis.pickle'), allow_pickle=True))

   cb_list, cmetrics = getcallbacksandmetrics(data)


   act = 'gelu'

   alpha = [1., 1., 30., 1., 2.] # 1 AND 3??
 


   ambient=tf.cast(BASIS['AMBIENT'],tf.int32)

   nfirstlayer=tf.reduce_prod((np.array(ambient)+1)**2).numpy().item()
   shapeofinternalnetwork=[nHidden]*nlayer
   shapeofnetwork=[nfirstlayer]+shapeofinternalnetwork+[1]

   print("network shape: " + str(shapeofnetwork))
   nn_phi = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   nn_phi_zero =BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_phi_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)


#    nn_phi = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)
#    nn_phi_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)
   phimodel = PhiFSModel(nn_phi, BASIS, alpha=alpha)
   phimodelzero = PhiFSModel(nn_phi_zero, BASIS, alpha=alpha)

   if set_weights_to_zero:
      training_history=0
   else:
      phimodel.model=tf.keras.models.load_model(os.path.join(dirname,name))
      training_history=np.load(os.path.join(dirname, 'trainingHistory-' + name +'.npz'),allow_pickle=True)['arr_0'].item()

   phimodel.compile(custom_metrics=cmetrics)
   phimodelzero.compile(custom_metrics=cmetrics)

   # compare validation loss before training for zero network and nonzero network
   datacasted=[tf.cast(data['X_val'],tf.float32),tf.cast(data['y_val'],tf.float32)]
   #need to re-enable learning, in case there's been a problem:
   phimodel.learn_transition = True
   phimodelzero.learn_transition = True
   phimodel.learn_volk = True
   phimodelzero.learn_volk = True
   #phimodel.learn_ricci_val= True
   #phimodelzero.learn_ricci_val= True
   valzero=phimodelzero.evaluate(datacasted[0],datacasted[1])
   valtrained=phimodel.evaluate(datacasted[0],datacasted[1])
   metricsnames=phimodel.metrics_names
   # phimodel.learn_ricci_val=False 
   # phimodelzero.learn_ricci_val=False 
   valzero = {metricsnames[i]: valzero[i] for i in range(len(valzero))}
   valtrained= {metricsnames[i]: valtrained[i] for i in range(len(valtrained))}

   #valzero = {key: value.numpy() for key, value in valzero.items()}
   #valtrained = {key: value.numpy() for key, value in valtrained.items()}

   #valtrained = {key: value.numpy() for key, value in valtrained.items()}

   phimodel.learn_transition = True
   phimodel.learn_volk = True

   print("zero network validation loss: ")
   print(valzero)
   print("validation loss for final network: ")
   print(valtrained)
   print("ratio of trained to zero: " + str({key + " ratio": value/(valzero[key]+1e-8) for key, value in valtrained.items()}))
   averagediscrepancyinstdevs,_=compute_transition_pointwise_measure(phimodel,tf.cast(data["X_val"],tf.float32))
   print("average transition discrepancy in standard deviations: " + str(averagediscrepancyinstdevs))
   print("\n\n")
   #IMPLEMENT THE FOLLOWING
   #meanfailuretosolveequation,_,_=measure_laplacian_failure(phimodel,data)
   #print("\n\n")
   #print("mean of difference/mean of absolute value of source, weighted by sqrt(g): " + str(meanfailuretosolveequation))
   return phimodel,training_history

In [4]:

def generate_points_and_save_using_defaultsAlpha(free_coefficient,phimodel,euler_char,force_generate=False,seed_set=0):
   coefficients=coefficientsTQ
   # coefficients=np.array([1, 0, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, \
   # 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, free_coefficient, 0, 0, 0, 0, 0, \
   # 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, \
   # 0, 0, 0, 1, 0, 2, 0, 0, 0, 2, 0, 1])
   monomials=monomialsTQ
   kmoduli=kmoduliTQ
   ambient=ambientTQ
   # monomials = 5*np.eye(5, dtype=np.int64)
   # coefficients = np.ones(5)
   
   # kmoduli = np.ones(1)
   # ambient = np.array([4])
   pg = PointGenerator(monomials, coefficients, kmoduli, ambient)
   pg._set_seed(seed_set)


   dirnameForMetric = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   dirnameAlpha= 'dataAlphaP/'+nameofmanifold+ "Alpha_pg_with_" + str(free_coefficient) 
   #dirnameAlpha = 'dataAlphaP/tetraquadricAlpha_pg_with_'+str(free_coefficient)+'forLB_'+lbstring
   #dirnameForMetric = 'dataAlphaP/tetraquadric_pg_with_'+str(free_coefficient)
   print("dirname for alpha: " + dirnameForMetric)
   print("dirname for alpha: " + dirnameAlpha)

   BASIS = prepare_tf_basis(np.load(os.path.join(dirnameForMetric, 'basis.pickle'), allow_pickle=True))
   
   data=np.load(os.path.join(dirnameForMetric, 'dataset.npz'))

   if force_generate or (not os.path.exists(dirnameAlpha)):
      print("Generating: forced? " + str(force_generate))
      kappaAlpha = prepare_dataset_Alpha(pg,data,dirnameAlpha,phimodel,euler_char,BASIS,normalize_to_vol_j=True);
   elif os.path.exists(dirnameAlpha):
      try:
         print("loading prexisting dataset")
         data = np.load(os.path.join(dirnameAlpha, 'dataset.npz'))
      except:
         print("problem loading data - generating anyway")
         kappaAlpha = prepare_dataset_Alpha(pg,data, dirnameAlpha,phimodel,euler_char,BASIS,normalize_to_vol_j=True);
      
   

def getcallbacksandmetricsAlpha(dataalpha):
   dataalpha_val_dict=dict(list(dict(dataalpha).items())[len(dict(dataalpha))//2:])
   tcb = TransitionCallback((dataalpha['X_val'], dataalpha['y_val']))
   lplcb = LaplacianCallback(dataalpha_val_dict)
   # lplcb = LaplacianCallback(data_val)
   cb_list = [lplcb,tcb]
   cmetrics = [TotalLoss(), LaplacianLoss(), TransitionLoss()]
   return cb_list, cmetrics

   
def train_and_save_nn_Alpha(free_coefficient,phimodel,euler_char,alphaprime,nlayer=3,nHidden=128,nEpochs=30,bSizes=[192,50000],stddev=0.1,lRate=0.001,use_zero_network=False,alpha=[1,1],load_network=False):
   
   dirnameForMetric = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   dirnameAlpha= 'dataAlphaP/'+nameofmanifold+ "Alpha_pg_with_" + str(free_coefficient) 

   #data = np.load(os.path.join(dirname, 'dataset.npz'))
   BASIS = prepare_tf_basis(np.load(os.path.join(dirnameForMetric, 'basis.pickle'), allow_pickle=True))


   dataalpha = np.load(os.path.join(dirnameAlpha, 'dataset.npz'))
   dataalpha_train=tf.data.Dataset.from_tensor_slices(dict(list(dict(dataalpha).items())[:len(dict(dataalpha))//2]))
   dataalpha_val_dict=dict(list(dict(dataalpha).items())[len(dict(dataalpha))//2:])
   dataalpha_val=tf.data.Dataset.from_tensor_slices(dataalpha_val_dict)
   # batch_sizes=[64,10000]
   dataalpha_train=dataalpha_train.shuffle(buffer_size=1024).batch(bSizes[0])

   cb_list, cmetrics = getcallbacksandmetricsAlpha(dataalpha)

   #nlayer = 3
   #nHidden = 128
   act = 'gelu'
   #nEpochs = 30
   #bSizes = [192, 150000]
   #alpha = [1., 1.] # 1 AND 3??
   nfold = 3
   n_in = 2*8
   n_out = 1
   #lRate = 0.001
   name = 'alphamodel_for_' + str(nEpochs) + '_' + str(bSizes[0]) + '_'+ str(nlayer) + 'x' +str(nHidden)
   print("name: " + name)

   ambient=tf.cast(BASIS['AMBIENT'],tf.int32)

   nfirstlayer=tf.reduce_prod((np.array(ambient)+1)**2).numpy().item()
   #nfirstlayer=tf.reduce_sum(((np.array(ambient)+1)**2)).numpy().item()
   shapeofinternalnetwork=[nHidden]*nlayer
   shapeofnetwork=[nfirstlayer]+shapeofinternalnetwork+[1]

   print("network shape: " + str(shapeofnetwork))
   #initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=stddev)
   #nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_alpha_zero = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   activ=tf.square
   #activ=tfk.activations.gelu
   #nn_alpha = BiholoModelFuncGENERALforHYMinv3(shapeofnetwork,BASIS,activation=activ,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_alpha_zero = BiholoModelFuncGENERALforHYMinv3(shapeofnetwork,BASIS,activation=activ,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=False)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   nn_alpha_zero =BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #copie from phi above
   #nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_alpha_zero =BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_phi_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)

   #initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.2)
   #nn_alpha = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)#note we don't need a last bias (flat direction)
   #nn_alpha = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network,kernel_initializer=initializer)#note we don't need a last bias (flat direction)
   #nn_alpha_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)#note we don't need a last bias (flat direction)
   
   alphamodel= AlphaPrimeModel(nn_alpha,BASIS, phimodel,alphaprime,euler_char,alpha=alpha,norm = [1. for _ in range(2)])
   if load_network:
      print("loading network")
      alphamodel.model=tf.keras.models.load_model(os.path.join(dirnameAlpha,name))
      print("network loaded")

   alphamodelzero= AlphaPrimeModel(nn_alpha_zero,BASIS,phimodel, alphaprime,euler_char,alpha=alpha,norm = [1. for _ in range(2)])

   #Note, currently running legacy due to ongoing tf issue with M1/M2. 
   #Use the commented line instead if not on an M1/M2 machine
   #opt = tfk.optimizers.Adam(learning_rate=lRate)
   opt = tfk.optimizers.legacy.Adam(learning_rate=lRate)
   # compile so we can test on validation set before training
   alphamodel.compile(custom_metrics=cmetrics)
   alphamodelzero.compile(custom_metrics=cmetrics)
   
   #datacasted=[tf.cast(data['X_val'],tf.float32),tf.cast(data['y_val'],tf.float32)]
   valzero=alphamodelzero.test_step(dataalpha_val_dict)
   valraw=alphamodel.test_step(dataalpha_val_dict)
   valzero = {key: value.numpy() for key, value in valzero.items()}
   valraw = {key: value.numpy() for key, value in valraw.items()}
   
   training_historyAlpha={'transition_loss': [10**(-8)],'laplacian_loss': [1000000000000000]}
   i=0
   newLR=lRate
   #while (training_historyAlpha['transition_loss'][-1]<10**(-5)) or (training_historyAlpha['laplacian_loss'][-1]>1.):
   # continue looping if >10 or is nan
   while i==0:#(training_historyAlpha['laplacian_loss'][-1]>10000000000000.) or (np.isnan( training_historyAlpha['laplacian_loss'][-1])):
      print("trying iteration of training "+str(i))
      if i >0:

         print('trying again laplacian_loss too big')
         #initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.2)
         #nn_alpha = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network,kernel_initializer=initializer)#note we don't need a last bias (flat direction)
         #nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
         #nn_alpha = BiholoModelFuncGENERALforAlphainv2(shapeofnetwork,BASIS,activation=tfk.activations.gelu,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
         #nn_alpha = BiholoModelFuncGENERALforHYMinv3(shapeofnetwork,BASIS,activation=activ,stddev=stddev,use_zero_network=use_zero_network)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
         nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=False)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
         #nn_alpha = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)#note we don't need a last bias (flat direction)
         if newLR>0.0002:
             newLR=newLR/2
             print("new LR " + str(newLR))
         opt = tfk.optimizers.legacy.Adam(learning_rate=newLR)
         alphamodel= AlphaPrimeModel(nn_alpha,BASIS, phimodel,alphaprime,euler_char,alpha=alpha,norm = [1. for _ in range(2)])
         cb_list, cmetrics = getcallbacksandmetricsAlpha(dataalpha)
         alphamodel.compile(custom_metrics=cmetrics)
      alphamodel, training_historyAlpha= train_modelalpha(alphamodel, dataalpha_train, optimizer=opt, epochs=nEpochs, batch_sizes=bSizes, 
                                        verbose=1, custom_metrics=cmetrics, callbacks=cb_list)
      i+=1
   print("finished training\n")
   alphamodel.model.save(os.path.join(dirnameAlpha, name))
   np.savez_compressed(os.path.join(dirnameAlpha, 'trainingHistory-' + name),training_historyAlpha)
   valfinal =alphamodel.test_step(dataalpha_val_dict)
   valfinal = {key: value.numpy() for key, value in valfinal.items()}
   #return training_historyAlpha
   #now print the initial losses and final losses for each metric, by taking the first element of each key in the dictionary
   #first_metrics = {key: value[0] for key, value in training_historyAlpha.items()}
   #last_metrics = {key: value[-1] for key, value in training_historyAlpha.items()}

   #print("initial losses")
   #print(first_metrics)
   #print("final losses")
   #print(last_metrics)


   print("zero network validation loss: ")
   print(valzero)
   print("validation loss for raw network: ")
   print(valraw)
   print("validation loss for final network: ")
   print(valfinal)
   print("ratio of final to zero: " + str({key + " ratio": value/(valzero[key]+1e-8) for key, value in valfinal.items()}))
   print("ratio of final to raw: " + str({key + " ratio": value/(valraw[key]+1e-8) for key, value in valfinal.items()}))


   averagediscrepancyinstdevs,_=compute_transition_pointwise_measure(alphamodel,tf.cast(dataalpha["X_val"],tf.float32))
   print("average transition discrepancy in standard deviations: " + str(averagediscrepancyinstdevs))
   meanfailuretosolveequation,_,_=HYM_measure_val(alphamodel,dataalpha)
   print("mean of difference/mean of absolute value of source, weighted by sqrt(g): " + str(meanfailuretosolveequation))
   print("\n\n")
   tf.keras.backend.clear_session()
   return alphamodel,training_historyAlpha

def load_nn_Alpha(free_coefficient,phimodel,euler_char,alphaprime,nlayer=3,nHidden=128,nEpochs=30,bSizes=[192,50000],stddev=0.1,lRate=0.001,use_zero_network=False,alpha=[1,1],load_network=False):
   dirnameForMetric = 'dataAlphaP/'+nameofmanifold+ "_pg_with_" + str(free_coefficient) 
   dirnameAlpha= 'dataAlphaP/'+nameofmanifold+ "Alpha_pg_with_" + str(free_coefficient) 

   name = 'alphamodel_for_' + str(nEpochs) + '_' + str(bSizes[0]) + '_'+ str(nlayer) + 'x' +str(nHidden)
   print("name: " + name)

   #data = np.load(os.path.join(dirname, 'dataset.npz'))
   BASIS = prepare_tf_basis(np.load(os.path.join(dirnameForMetric, 'basis.pickle'), allow_pickle=True))


   dataalpha = np.load(os.path.join(dirnameAlpha, 'dataset.npz'))
   dataalpha_train=tf.data.Dataset.from_tensor_slices(dict(list(dict(dataalpha).items())[:len(dict(dataalpha))//2]))
   dataalpha_val_dict=dict(list(dict(dataalpha).items())[len(dict(dataalpha))//2:])
   dataalpha_val=tf.data.Dataset.from_tensor_slices(dataalpha_val_dict)
   # batch_sizes=[64,10000]
   dataalpha_train=dataalpha_train.shuffle(buffer_size=1024).batch(bSizes[0])

   cb_list, cmetrics = getcallbacksandmetricsAlpha(dataalpha)

   #nlayer = 3
   #nHidden = 128
   act = 'gelu'
   #nEpochs = 30
   #bSizes = [192, 150000]
   alpha = [1., 1.] # 1 AND 3??
   nfold = 3
   n_in = 2*8
   n_out = 1
   #lRate = 0.001
   ambient=tf.cast(BASIS['AMBIENT'],tf.int32)

   nfirstlayer=tf.reduce_prod((np.array(ambient)+1)**2).numpy().item()
   shapeofinternalnetwork=[nHidden]*nlayer
   shapeofnetwork=[nfirstlayer]+shapeofinternalnetwork+[1]

   print("network shape: " + str(shapeofnetwork))
   #initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=stddev)
   #nn_alpha = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,stddev=stddev,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #nn_alpha_zero = BiholoModelFuncGENERAL(shapeofnetwork,BASIS,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   activ=tf.square
   nn_alpha = BiholoModelFuncGENERALforHYMinv3(shapeofnetwork,BASIS,activation=activ,stddev=stddev)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   nn_alpha_zero = BiholoModelFuncGENERALforHYMinv3(shapeofnetwork,BASIS,activation=activ,use_zero_network=True)#make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=use_zero_network)
   #copie from phi above
   #nn_phi_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)
   
   #nn_alpha = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)#note we don't need a last bias (flat direction)
   #nn_alpha_zero = make_nn(n_in,n_out,nlayer,nHidden,act,use_zero_network=True)#note we don't need a last bias (flat direction)
   alphamodel= AlphaPrimeModel(nn_alpha,BASIS, phimodel,alphaprime,euler_char,alpha=alpha,norm = [1. for _ in range(2)])
   alphamodelzero= AlphaPrimeModel(nn_alpha_zero,BASIS,phimodel, alphaprime,euler_char,alpha=alpha,norm = [1. for _ in range(2)])

   if set_weights_to_zero:
      training_historyAlpha=0
   else:
      alphamodel.model=tf.keras.models.load_model(os.path.join(dirnameAlpha,name))
      training_historyAlpha=np.load(os.path.join(dirnameAlpha, 'trainingHistory-' + name +'.npz'),allow_pickle=True)['arr_0'].item()

   alphamodel.compile(custom_metrics=cmetrics)
   alphamodelzero.compile(custom_metrics=cmetrics)
   
   valzero=alphamodelzero.evaluate(dataalpha_val_dict)
   valtrained=alphamodel.evaluate(dataalpha_val_dict)
   #valzero = {key: value.numpy() for key, value in valzero.items()}
   #valtrained= {key: value.numpy() for key, value in valtrained.items()}


   metricsnames=alphamodel.metrics_names

   valzero = {metricsnames[i]: valzero[i] for i in range(len(valzero))}
   valtrained= {metricsnames[i]: valtrained[i] for i in range(len(valtrained))}


   

   print("zero network validation loss: ")
   print(valzero)
   print("validation loss for trained network: ")
   print(valtrained)
   print("ratio of trained to zero: " + str({key + " ratio": value/(valzero[key]+1e-8) for key, value in valtrained.items()}))


   averagediscrepancyinstdevs,_=compute_transition_pointwise_measure(alphamodel,tf.cast(dataalpha["X_val"],tf.float32))
   print("average transition discrepancy in standard deviations: " + str(averagediscrepancyinstdevs))
   meanfailuretosolveequation,_,_=HYM_measure_val(alphamodel,dataalpha)
   print("mean of difference/mean of absolute value of source, weighted by sqrt(g): " + str(meanfailuretosolveequation))
   print("\n\n")
   return alphamodel,training_historyAlpha



Now generate example points with a point generator

Geneate the point cloud for our NN training - note that this will take a few mins


Note that "free_coefficient" is just a label for this particular quintic - for the TQ it was psi. Here, it just lets you have different runs not overwrite each other.


In [5]:


nPoints=100000

free_coefficient = 1.9#float(sys.argv[1])
free_coefficient=2.342351
free_coefficient=2.342343234
#free_coefficient=1.# when the coefficient is 1, ensure that it's 1., not 1 for the sake of the filename
#nEpochsPhi=100
nEpochsPhi=10

depthPhi=3
widthPhi=64#128 4 in the 1.0s


train_phi=False
generate_points_and_save_using_defaults(free_coefficient,nPoints)


dirname: dataAlphaP/Quintic_pg_with_2.342343234
loading prexisting dataset


## Training the NN

Now we can start preperation for training the NN

Begin by loading in the required data

In [6]:

if False:
    phimodel1,training_history=train_and_save_nn(free_coefficient,depthPhi,widthPhi,nEpochsPhi,stddev=0.05,bSizes=[64,50000],lRate=0.001) 
else:
    phimodel1,training_history=load_nn_phimodel(free_coefficient,depthPhi,widthPhi,nEpochsPhi,[64,50000],set_weights_to_zero=False)


dataAlphaP/Quintic_pg_with_2.342343234
phimodel_for_10_64_50000s3x64
network shape: [25, 64, 64, 64, 1]
zero network validation loss: 
{'loss': 0.5023784041404724, 'sigma_loss': 0.502377986907959, 'kaehler_loss': 0.0, 'transition_loss': 0.0, 'volk_loss': 2.811732144891721e-07}
validation loss for final network: 
{'loss': 0.5453054904937744, 'sigma_loss': 0.00666390173137188, 'kaehler_loss': 0.0, 'transition_loss': 8.115599303692989e-10, 'volk_loss': 0.2693207859992981}
ratio of trained to zero: {'loss ratio': 1.0854476927054013, 'sigma_loss ratio': 0.013264716552848506, 'kaehler_loss ratio': 0.0, 'transition_loss ratio': 0.08115599303692989, 'volk_loss ratio': 924950.4164446876}
average transition discrepancy in standard deviations: tf.Tensor(2.3270998e-06, shape=(), dtype=float32)





In [7]:
generate_points_and_save_using_defaultsAlpha(free_coefficient,phimodel1,-200.,force_generate=False,seed_set=0)


dirname for alpha: dataAlphaP/Quintic_pg_with_2.342343234
dirname for alpha: dataAlphaP/QuinticAlpha_pg_with_2.342343234
loading prexisting dataset


In [17]:
alphaprime=1
euler_char=-200
depthAlpha=3
widthAlpha=128
nEpochsAlpha=100
if True:
    AlphaModel1,training_historyAlpha=train_and_save_nn_Alpha(free_coefficient,phimodel1,euler_char,alphaprime,depthAlpha,widthAlpha,nEpochsAlpha,bSizes=[64,50000],stddev=0.05,lRate=0.1,use_zero_network=False,alpha=[1.,1.],load_network=False)
else:
    AlphaModel1,training_historyAlpha=load_nn_Alpha(free_coefficient,phimodel1,euler_char,alphaprime,depthPhi,widthPhi,nEpochsPhi,[64,50000],set_weights_to_zero=False)

#

name: alphamodel_for_100_64_3x128
network shape: [25, 128, 128, 128, 1]
trying iteration of training 0

Epoch  1/100
 - Transition measure val: 4.1232e-07

Epoch  2/100
 - Transition measure val: 5.4331e-07

Epoch  3/100
 - Transition measure val: 5.2767e-07

Epoch  4/100
 - Transition measure val: 5.2376e-07

Epoch  5/100
 - Transition measure val: 5.4407e-07

Epoch  6/100
 - Transition measure val: 5.3329e-07

Epoch  7/100
 - Transition measure val: 5.3635e-07

Epoch  8/100
 - Transition measure val: 5.1861e-07

Epoch  9/100
 - Transition measure val: 5.5342e-07

Epoch 10/100
 - Transition measure val: 5.5180e-07

Epoch 11/100
 - Transition measure val: 5.4312e-07
cutting LR, multiplying by 0.1 - new LR: <tf.Variable 'Adam/learning_rate:0' shape=() dtype=float32, numpy=0.010000001>

Epoch 12/100
 - Transition measure val: 5.0907e-07

Epoch 13/100
 - Transition measure val: 5.2481e-07

Epoch 14/100
 - Transition measure val: 5.3854e-07

Epoch 15/100
 - Transition measure val: 5.2948e-

InvalidArgumentError: Graph execution error:

Detected at node gradients/AddN_2 defined at (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/runpy.py", line 86, in _run_code

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/var/folders/n5/4xlf9f2j41z1r42htkz4vw0m0000gn/T/ipykernel_91769/2454447764.py", line 7, in <module>

  File "/var/folders/n5/4xlf9f2j41z1r42htkz4vw0m0000gn/T/ipykernel_91769/2703670363.py", line 157, in train_and_save_nn_Alpha

  File "/Users/kit/Documents/Phys_Working/PhysicalYukawas/AlphaPrimeModel.py", line 587, in train_modelalpha

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/opt/homebrew/Caskroom/miniforge/base/envs/cymetric/lib/python3.10/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/Users/kit/Documents/Phys_Working/PhysicalYukawas/AlphaPrimeModel.py", line 330, in train_step

Inputs to operation PartitionedCall_5/gradients/AddN_2 of type AddN must have the same size and shape.  Input 0: [10,64,5] != input 1: [0]
	 [[{{node gradients/AddN_2}}]] [Op:__inference_train_function_1724362]

In [25]:
training_history.keys()

dict_keys(['sigma_val', 'volk_loss', 'transition_val', 'kaehler_val', 'kaehler_loss', 'loss', 'transition_loss', 'sigma_loss', 'volk_val', 'epochs'])

In [48]:
import numpy as np
from sklearn.model_selection import ParameterGrid

alphaprime=1
euler_char=-200
depthAlpha=3
widthAlpha=64
nEpochsAlpha=10

def train_model_with_lr(lr):
    print("Training with learning rate: ", lr)
    model, history = train_and_save_nn_Alpha(
        free_coefficient,
        phimodel1,
        euler_char,
        alphaprime,
        depthAlpha,
        widthAlpha,
        nEpochsAlpha,
        bSizes=[64, 50000],
        stddev=0.05,
        lRate=lr,
        use_zero_network=False,
        alpha=[1., 1.],
        load_network=False
    )
    # Return the final validation loss or any other metric you want to optimize
    return history['laplacian_loss'][-1]

lr_space = {
    'lr': np.logspace(-4, -1, 10)  # 20 log-spaced values between 10^-4 and 10^-1
}

best_lr = None
best_performance = float('inf')

results = []
for params in ParameterGrid(lr_space):
    lr = params['lr']
    performance = train_model_with_lr(lr)
    results.append((lr, performance))








Training with learning rate:  0.0001
name: alphamodel_for_10_64_3x64
network shape: [25, 64, 64, 64, 1]
trying iteration of training 0

Epoch  1/10
 - Transition measure val: 6.3086e-08

Epoch  2/10
 - Transition measure val: 2.7872e-08

Epoch  3/10
 - Transition measure val: 1.5282e-08

Epoch  4/10
 - Transition measure val: 1.7444e-08

Epoch  5/10
 - Transition measure val: 3.7340e-08

Epoch  6/10
 - Transition measure val: 2.1039e-08

Epoch  7/10
 - Transition measure val: 8.8781e-09

Epoch  8/10
 - Transition measure val: 1.6047e-08

Epoch  9/10
 - Transition measure val: 1.8965e-08

Epoch 10/10
 - Transition measure val: 1.8771e-08
finished training

zero network validation loss: 
{'loss': 0.0, 'laplacian_loss': 176.30818, 'transition_loss': 0.0, 'sigma_loss': 0.0, 'kaehler_loss': 0.0, 'volk_loss': 0.0}
validation loss for raw network: 
{'loss': 0.0, 'laplacian_loss': 176.29546, 'transition_loss': 0.0, 'sigma_loss': 0.0, 'kaehler_loss': 0.0, 'volk_loss': 0.0}
validation loss for f

: 

In [None]:
# Sort results by learning rate
results.sort(key=lambda x: x[0])

# Unpack the results
learning_rates, performances = zip(*results)

# Plot learning rate vs performance
plt.figure(figsize=(10, 6))
plt.semilogx(learning_rates, performances, 'bo-')
plt.xlabel('Learning Rate')
plt.ylabel('Validation Loss')
plt.title('Learning Rate vs Validation Loss')
plt.grid(True)
plt.show()

# Plot learning rate vs performance (scatter plot)
plt.figure(figsize=(10, 6))
plt.scatter(learning_rates, performances)
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Validation Loss')
plt.title('Learning Rate vs Validation Loss (Scatter)')
plt.grid(True)
plt.show()

# Plot performance distribution
plt.figure(figsize=(10, 6))
plt.hist(performances, bins=20)
plt.xlabel('Validation Loss')
plt.ylabel('Frequency')
plt.title('Distribution of Validation Loss')
plt.grid(True)
plt.show()

# Plot the best performance
best_lr = min(results, key=lambda x: x[1])[0]
best_performance = min(results, key=lambda x: x[1])[1]

plt.figure(figsize=(10, 6))
plt.semilogx(learning_rates, performances, 'bo-')
plt.plot(best_lr, best_performance, 'r*', markersize=15)
plt.annotate(f'Best: {best_lr:.2e}', (best_lr, best_performance), xytext=(5, 5),
             textcoords='offset points')
plt.xlabel('Learning Rate')
plt.ylabel('Validation Loss')
plt.title('Learning Rate vs Validation Loss (Best Highlighted)')
plt.grid(True)
plt.show()

In [16]:
importlib.reload(sys.modules['AlphaPrimeModel'])
from AlphaPrimeModel import *