In [1]:
import os
import time
import pkg_resources
import math
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.optim as optim

In [2]:
# !pip install --upgrade git+https://github.com/vallis/TrueBayes.git

In [3]:
import numpy as np
##geometry.py
def setgeometry(q):
    global qdim, xmin, xmax, xstops, xmid, xwid

    # bins
    qdim = q

    # prior range for x (will be uniform)
    xmin, xmax = 0, 1

    # definition of quantization bins
    xstops = np.linspace(xmin, xmax, qdim + 1)

    # to plot histograms
    xmid = 0.5 * (xstops[:-1] + xstops[1:])
    xwid = xstops[1] - xstops[0]

setgeometry(64)

In [4]:
#utils.py
import torch

def numpy2cuda(array, single=True):
  array = torch.from_numpy(array)
  
  if single:
    array = array.float()
    
  if torch.cuda.is_available():
    array = array.cuda()
    
  return array


def cuda2numpy(tensor):
  return tensor.detach().cpu().numpy()

In [5]:
##network.py
import torch
import torch.nn as nn
import torch.nn.functional as F
# import torch.optim as optim


def makenet(dims, softmax=True, single=True):
  """Make a fully connected DNN with layer widths described by `dims`.
  CUDA is always enabled, and double precision is set with `single=False`.
  The output layer applies a softmax transformation,
  disabled by setting `softmax=False`."""

  ndims = len(dims)

  class Net(nn.Module):
    def __init__(self):
      super(Net, self).__init__()

      # the weights must be set explicitly as attributes in the class
      # (i.e., we can't collect them in a single list)
      for l in range(ndims - 1):
        layer = nn.Linear(dims[l], dims[l+1])
        
        if not single:
          layer = layer.double()
        
        if torch.cuda.is_available():
          layer = layer.cuda()
        
        setattr(self, f'fc{l}', layer)
                
    def forward(self, x):
      # per Alvin's recipe, apply relu everywhere but last layer
      for l in range(ndims - 2):
        x = F.leaky_relu(getattr(self, f'fc{l}')(x), negative_slope=0.2)

      x = getattr(self, f'fc{ndims - 2}')(x)

      if softmax:
        return F.softmax(x, dim=1)
      else:
        return x
  
  return Net


def makenetbn(dims, softmax=True, single=True):
  """A batch-normalizing version of makenet. Experimental."""

  ndims = len(dims)

  class Net(nn.Module):
    def __init__(self):
      super(Net, self).__init__()

      # the weights must be set explicitly as attributes in the class
      # (i.e., we can't collect them in a single list)
      for l in range(ndims - 1):
        layer = nn.Linear(dims[l], dims[l+1])
        bn = nn.BatchNorm1d(num_features=dims[l+1])
        
        if not single:
          layer = layer.double()
          bn = bn.double()
        
        if torch.cuda.is_available():
          layer = layer.cuda()
          bn = bn.cuda()
        
        setattr(self, f'fc{l}', layer)
        setattr(self, f'bn{l}', bn)
                
    def forward(self, x):
      # per Alvin's recipe, apply relu everywhere but last layer
      for l in range(ndims - 2):
        x = getattr(self, f'bn{l}')(F.leaky_relu(getattr(self, f'fc{l}')(x), negative_slope=0.2))

      x = getattr(self, f'fc{ndims - 2}')(x)

      if softmax:
        return F.softmax(x, dim=1)
      else:
        return x
  
  return Net

In [6]:
##loss.py
import math

import numpy as np
import torch

from truebayes.geometry import qdim
from truebayes.utils import numpy2cuda, cuda2numpy

def lossfunction(o, l: 'indicator'):
  """MSE loss for DNN histogram output, labels represented as indicator arrays."""

  return torch.mean(torch.sum(o**2,dim=1) - 2*torch.sum(o*l,dim=1))


def kllossfunction(o, l: 'indicator'):
  """KL loss for DNN histogram output, labels represented as indicator arrays."""

  return -torch.mean(2*torch.sum(torch.log(o)*l, dim=1))


def lossG1(o, l: 'xtrue'):
  """MSE loss for normal-PDF output (represented as a mean/variance pair)."""

  # since int N^2(x;x0,s) dx = 1/(2 sqrt(pi) s)
  # the sqerr loss is 1/(2 sqrt(pi) s) - 2 * e^{-(x_tr - x0)^2/2 s^2} / sqrt(2 pi s^2)
  # multiplying by 2 sqrt(pi)
  
  return torch.mean((1 - 2*math.sqrt(2)*torch.exp(-0.5*(l - o[:,0])**2/o[:,1]**2)) / o[:,1])


def kllossGn(o, l: 'xtrue'):
  """KL loss for Gaussian-mixture output (represented as a vector of concatenated mean/variance/weight triples)."""

  x0 = o[:,0::3]
  std = o[:,1::3]
  weight = torch.softmax(o[:,2::3], dim=1)

  # numerically unstable
  # return -torch.mean(2*torch.log(torch.sum(weight * torch.exp(-0.5*(x0 - l[:,np.newaxis])**2/std**2) / torch.sqrt(2 * math.pi * std**2),dim=1)))
  
  return -torch.mean(torch.logsumexp(torch.log(weight) - 0.5*(x0 - l[:,np.newaxis])**2/std**2 - 0.5*torch.log(2 * math.pi * std**2), dim=1))


def netmeanGn(inputs, net=None, single=True):
  if isinstance(inputs, np.ndarray):
    inputs = numpy2cuda(inputs, single)
    
  pars = cuda2numpy(net(inputs))

  dx  = pars[:,0::3] 
  std = pars[:,1::3]
  pweight = torch.softmax(torch.from_numpy(pars[:,2::3]),dim=1).numpy()

  # see https://en.wikipedia.org/wiki/Mixture_distribution
  xmean = np.sum(pweight * dx, axis=1)
  xerr  = np.sqrt(np.sum(pweight * (dx**2 + std**2), axis=1) - xmean**2)

  return xmean, xerr


def kllossfunction2(o, l: 'indicator'):
  """KL loss over 2-D histogram."""

  q = o.reshape((o.shape[0], qdim, qdim))

  return torch.mean(-torch.sum(torch.log(q)*l, dim=[1,2]))


def kllossGn2(o, l: 'xtrue'):
  """KL loss for Gaussian-mixture output, 2D, precision-matrix parameters."""

  dx  = o[:,0::6] - l[:,0,np.newaxis]
  dy  = o[:,2::6] - l[:,1,np.newaxis]
  
  # precision matrix is positive definite, so has positive diagonal terms
  Fxx = o[:,1::6]**2
  Fyy = o[:,3::6]**2
  
  # precision matrix is positive definite, so has positive 
  Fxy = torch.atan(o[:,4::6]) / (0.5*math.pi) * o[:,1::6] * o[:,3::6]
  
  weight = torch.softmax(o[:,5::6], dim=1)
   
  # omitting the sqrt(4*math*pi) since it's common to all templates
  return -torch.mean(torch.logsumexp(torch.log(weight) - 0.5*(Fxx*dx*dx + Fyy*dy*dy + 2*Fxy*dx*dy) + 0.5*torch.log(Fxx*Fyy - Fxy*Fxy), dim=1))


def netmeanGn2(inputs, net=None, single=True):
  if isinstance(inputs, np.ndarray):
    inputs = numpy2cuda(inputs, single)
    
  pars = cuda2numpy(net(inputs))

  dx, dy = pars[:,0::6], pars[:,2::6] 
  
  Fxx, Fyy = pars[:,1::6]**2, pars[:,3::6]**2
  Fxy = np.arctan(pars[:,4::6]) / (0.5*math.pi) * pars[:,1::6] * pars[:,3::6]

  det = Fxx*Fyy - Fxy*Fxy
  Cxx, Cyy, Cxy = Fyy/det, Fxx/det, -Fxy/det

  pweight = torch.softmax(torch.from_numpy(pars[:,5::6]),dim=1).numpy()

  xmean, ymean = np.sum(pweight * dx, axis=1), np.sum(pweight * dy, axis=1)
  xerr,  yerr  = np.sqrt(np.sum(pweight * (dx**2 + Cxx), axis=1) - xmean**2), np.sqrt(np.sum(pweight * (dy**2 + Cyy), axis=1) - ymean**2) 
  xycov        = np.sum(pweight * (dx*dy + Cxy), axis=1) - xmean*ymean

  return np.vstack((xmean, ymean)).T, np.vstack((xerr, yerr)).T, xycov


def sqerr(o, l: 'xtrue'):
  """Squared error loss for estimator output."""

  return torch.mean((o - l)**2)

In [7]:
from scipy import constants
m_sun = 1.98840987e30 * constants.G / (constants.c**3)                    #solar mass in seconds
chirp_min = 32.9 #see notes for 4/11/21 of explanation.
chirp_max = 42.9 #guess?
from GWFunctions import h_func_f
from GWNoise import noise_f

In [8]:
torch.cuda.is_available()

True

In [18]:
h_func_f(39)

array([-5.02630673e-14-5.79518299e-13j,  1.07659106e-13-5.70428824e-13j,
        2.57213039e-13-5.19068634e-13j,  3.87264114e-13-4.29218054e-13j,
        4.88084630e-13-3.07547446e-13j,  5.52093515e-13-1.63140299e-13j,
        5.74443114e-13-6.82955928e-15j,  5.53406535e-13+1.49605562e-13j,
        4.90533252e-13+2.94323597e-13j,  3.90557681e-13+4.16319745e-13j,
        2.61063956e-13+5.06271436e-13j,  1.11928857e-13+5.57265063e-13j,
       -4.54177577e-14+5.65346305e-13j, -1.98860830e-13+5.29848956e-13j,
       -3.36531199e-13+4.53473430e-13j, -4.47728654e-13+3.42104996e-13j,
       -5.23764963e-13+2.04381833e-13j, -5.58659747e-13+5.10427690e-14j,
       -5.49632323e-13-1.05897603e-13j, -4.97347582e-13-2.54084229e-13j,
       -4.05892578e-13-3.81795163e-13j, -2.82481403e-13-4.78876813e-13j,
       -1.36907487e-13-5.37565801e-13j,  1.92169807e-14-5.53129956e-13j,
        1.73376946e-13-5.24274113e-13j,  3.13154497e-13-4.53274222e-13j,
        4.27233540e-13-3.45824493e-13j,  5.06327737

In [15]:
def syntrain(size,  region=[[chirp_min, chirp_max]], snrs=[8,16], varx='Mc', varall=False, seed=None,
             single=True, noi=1):
    """Makes a training set using the ROMAN NN. It returns labels (for `varx`,
        or for all if `varall=True`), indicator vectors, and ROM coefficients
        (with `snr` and `noise`). Note that the coefficients are kept on the GPU.
        Parameters are sampled randomly within `region`."""
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu:0'
    
    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)
    
    with torch.no_grad():
        xs = torch.zeros((size, 1), dtype=torch.float, device=device)

        for i, r in enumerate(region):
            xs[:,0] = r[0] + (r[1] - r[0]) * torch.rand((size,), dtype=torch.float, device=device)

        xs_1 = xs.detach().cpu().double().numpy()
        
        #generating signal and noise
        signal = np.apply_along_axis(h_func_f, 1, xs_1)[:,:,0]
        signal_r, signal_i = numpy2cuda(signal.real), numpy2cuda(signal.imag)
        
        #noise = np.apply_along_axis(noise_f, 1, f)[:,:,0]
        #print(noise)
        #noise_r, noise_i = numpy2cuda(noise.real), numpy2cuda(noise.imag)
        
        alphas = torch.zeros((size, 200*2), dtype=torch.float if single else torch.double, device=device)
        
        ##Normalize the vector basis 
        normalize1 = torch.sqrt(torch.sum(signal_r*signal_r + signal_i*signal_i, dim=1))
        #normalize2 = torch.sqrt(torch.sum(noise_r*noise_r + noise_i*noise_i, dim=1))
        
        ##Add noise and normalise.
        alphas[:,0::2] = 10 * signal_r /normalize1[:, np.newaxis] + noi* torch.randn((size,200), device=device)
        alphas[:,1::2] = 10 * signal_i /normalize1[:, np.newaxis] + noi* torch.randn((size,200), device=device)

        xr = np.zeros((size, 1), 'd')
        xr = xs.detach().cpu().double().numpy()

    del xs

    for r in region:
        xr[:,0] = (xr[:,0] - r[0]) / (r[1] - r[0])
    
    
    ##i is index of the bins for each value in the input array
    i = np.digitize(xr[:,0], xstops, False) - 1
    i[i == -1] = 0; i[i == qdim] = qdim - 1
    px = np.zeros((size, qdim), 'd'); px[range(size), i] = 1

    if varall:
        return xr, px, alphas
    else:
        return xr[:,0], px, alphas

In [10]:
%%time
syntrain(1)

[[0.0001    ]
 [0.00010018]
 [0.00010036]
 [0.00010053]
 [0.00010072]
 [0.0001009 ]
 [0.00010108]
 [0.00010126]
 [0.00010145]
 [0.00010164]
 [0.00010182]
 [0.00010201]
 [0.0001022 ]
 [0.0001024 ]
 [0.00010259]
 [0.00010278]
 [0.00010298]
 [0.00010318]
 [0.00010338]
 [0.00010358]
 [0.00010378]
 [0.00010398]
 [0.00010418]
 [0.00010439]
 [0.0001046 ]
 [0.00010481]
 [0.00010502]
 [0.00010523]
 [0.00010544]
 [0.00010566]
 [0.00010587]
 [0.00010609]
 [0.00010631]
 [0.00010653]
 [0.00010676]
 [0.00010698]
 [0.00010721]
 [0.00010744]
 [0.00010767]
 [0.0001079 ]
 [0.00010814]
 [0.00010837]
 [0.00010861]
 [0.00010885]
 [0.00010909]
 [0.00010934]
 [0.00010958]
 [0.00010983]
 [0.00011008]
 [0.00011033]
 [0.00011059]
 [0.00011084]
 [0.0001111 ]
 [0.00011136]
 [0.00011162]
 [0.00011189]
 [0.00011216]
 [0.00011243]
 [0.0001127 ]
 [0.00011297]
 [0.00011325]
 [0.00011353]
 [0.00011381]
 [0.0001141 ]
 [0.00011439]
 [0.00011468]
 [0.00011497]
 [0.00011526]
 [0.00011556]
 [0.00011586]
 [0.00011617]
 [0.00

NameError: name 'f' is not defined

In [None]:
# def syntrain(snr=[8,12], size=100000, varx='Mc', nets=(ar, ai), seed=None, noise=1, varall=False,
#              region=[[0.2,0.5], [0.2,0.25], [-1,1], [-1,1]], single=True):
#   """Makes a training set using the ROMAN NN. It returns labels (for `varx`,
#   or for all if `varall=True`), indicator vectors, and ROM coefficients
#   (with `snr` and `noise`). Note that the coefficients are kept on the GPU.
#   Parameters are sampled randomly within `region`."""

#   device = 'cuda:0' if torch.cuda.is_available() else 'cpu:0'

#   if seed is not None:
#     np.random.seed(seed)
#     torch.manual_seed(seed)

#   with torch.no_grad():
#     xs = torch.zeros((size,4), dtype=torch.float, device=device)
    
#     for i,r in enumerate(region):
#       xs[:,i] = r[0] + (r[1] - r[0])*torch.rand((size,), dtype=torch.float, device=device)
    
#     # handle banks with reduced dimensionality 
#     for i in range(len(region),4):
#       xs[:,i] = 0.0

#     snrs = numpy2cuda(np.random.uniform(*snr,size=size))
      
#     alphas = torch.zeros((size, 241*2), dtype=torch.float if single else torch.double, device=device)

#     alphar, alphai = nets[0](xs), nets[1](xs)
#     norm = torch.sqrt(torch.sum(alphar*alphar + alphai*alphai,dim=1))
 
#     alphas[:,0::2] = snrs[:,np.newaxis] * alphar / norm[:,np.newaxis] + noise * torch.randn((size,241), device=device)
#     alphas[:,1::2] = snrs[:,np.newaxis] * alphai / norm[:,np.newaxis] + noise * torch.randn((size,241), device=device)
  
#   xr = np.zeros((size, 5),'d')
#   xr[:,:4] = xs.detach().cpu().double().numpy()
#   xr[:,4] = snrs.detach().cpu()
  
#   del xs, alphar, alphai, norm

#   # normalize (for provided regions)
#   for i, r in enumerate(region):
#     xr[:,i] = (xr[:,i] - r[0]) / (r[1] - r[0])

#   if isinstance(varx, list):
#     ix = ['Mc','nu','chi1','chi2'].index(varx[0])
#     jx = ['Mc','nu','chi1','chi2'].index(varx[1])    

#     i = np.digitize(xr[:,ix], xstops, False) - 1
#     i[i == -1] = 0; i[i == qdim] = qdim - 1
#     px = np.zeros((size, qdim), 'd'); px[range(size), i] = 1

#     j = np.digitize(xr[:,jx], xstops, False) - 1
#     j[j == -1] = 0; j[j == qdim] = qdim - 1
#     py = np.zeros((size, qdim), 'd'); py[range(size), j] = 1

#     if varall:
#       return xr, np.einsum('ij,ik->ijk', px, py), alphas
#     else:
#       return xr[:,[ix,jx]], np.einsum('ij,ik->ijk', px, py), alphas    
#   else:
#     ix = ['Mc','nu','chi1','chi2'].index(varx)
  
#     i = np.digitize(xr[:,ix], xstops, False) - 1
#     i[i == -1] = 0; i[i == qdim] = qdim - 1
#     px = np.zeros((size, qdim), 'd'); px[range(size), i] = 1
  
#     if varall:
#       return xr, px, alphas
#     else:
#       return xr[:,ix], px, alphas

In [11]:
def syntrainer(net, syntrain, lossfunction=None, iterations=300, 
               batchsize=None, initstep=1e-3, finalv=1e-5, clipgradient=None, validation=None,
               seed=None, single=True):
  """Trains network NN against training sets obtained from `syntrain`,
  iterating at most `iterations`; stops if the derivative of loss
  (averaged over 20 epochs) becomes less than `finalv`."""

  if seed is not None:
    np.random.seed(seed)
    torch.manual_seed(seed)

  indicatorloss = 'l' in lossfunction.__annotations__ and lossfunction.__annotations__['l'] == 'indicator'  
  
  if validation is not None:
    raise NotImplementedError
    
    vlabels = numpy2cuda(validation[1] if indicatorloss else validation[0], single)
    vinputs = numpy2cuda(validation[2], single)
  
  optimizer = optim.Adam(net.parameters(), lr=initstep)

  training_loss, validation_loss = [], []
  
  for epoch in range(iterations):
    t0 = time.time()

    xtrue, indicator, inputs = syntrain()
    labels = numpy2cuda(indicator if indicatorloss else xtrue, single)

    if batchsize is None:
      batchsize = inputs.shape[0]
    batches = inputs.shape[0] // batchsize

    averaged_loss = 0.0    
    
    for i in range(batches):
      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize
      outputs = net(inputs[i*batchsize:(i+1)*batchsize])
      loss = lossfunction(outputs, labels[i*batchsize:(i+1)*batchsize])
      loss.backward()
      
      if clipgradient is not None:
        torch.nn.utils.clip_grad_norm_(net.parameters(), clipgradient)
      
      optimizer.step()

      # print statistics
      averaged_loss += loss.item()

    training_loss.append(averaged_loss/batches)

    if validation is not None:
      loss = lossfunction(net(vinputs), vlabels)
      validation_loss.append(loss.detach().cpu().item())

    if epoch == 1:
      print("One epoch = {:.1f} seconds.".format(time.time() - t0))

    if epoch % 50 == 0:
      print(epoch,training_loss[-1],validation_loss[-1] if validation is not None else '')

    try:
      if len(training_loss) > iterations/10:
        training_rate = np.polyfit(range(20), training_loss[-20:], deg=1)[0]
        if training_rate < 0 and training_rate > -finalv:
          print(f"Terminating at epoch {epoch} because training loss stopped improving sufficiently: rate = {training_rate}")
          break

      if len(validation_loss) > iterations/10:
        validation_rate = np.polyfit(range(20), validation_loss[-20:], deg=1)[0]        
        if validation_rate > 0:
          print(f"Terminating at epoch {epoch} because validation loss started worsening: rate = {validation_rate}")
          break
    except:
      pass
          
  print("Final",training_loss[-1],validation_loss[-1] if validation is not None else '')
      
  if hasattr(net,'steps'):
    net.steps += iterations
  else:
    net.steps = iterations

In [16]:
dimensions = [200*2] + [1024]*8 + [1*6]
percival_network = makenet(dimensions, softmax=False)

network_to_use = percival_network()

##Training data to pass through Percival network
training_data = lambda: syntrain(size=1000, varx='Mc')

##Train Percival network on above data

In [17]:
%%time
##training the network
syntrainer(network_to_use, training_data, lossfunction=lossG1, iterations=5000,
           initstep=1e-4, finalv=1e-8)

0 109.65727233886719 
One epoch = 0.4 seconds.
50 -3.1678547859191895 
100 -7.168209552764893 
150 -10.986236572265625 
200 -19.550262451171875 
250 -7.834001064300537 
300 -9.823049545288086 
350 -14.58784008026123 
400 -18.5667781829834 
450 -21.263141632080078 
500 -23.868864059448242 
550 -23.86467170715332 
600 -26.513507843017578 
650 -30.551664352416992 
700 -24.40126609802246 
750 -12.856674194335938 
800 -30.910032272338867 
850 -31.389015197753906 
900 -38.95603942871094 
950 -31.47637176513672 
1000 -36.50532150268555 
1050 -7.687808513641357 
1100 -3.475628614425659 
1150 -3.3763017654418945 
1200 -3.388878583908081 
1250 -3.410987377166748 
1300 -3.5615220069885254 
1350 -3.5643470287323 
1400 -3.6366965770721436 
1450 -3.7702858448028564 
1500 -3.783756971359253 
1550 -3.88922119140625 
1600 -4.054741382598877 
1650 -3.949733018875122 
1700 -4.057702541351318 
1750 -4.3162150382995605 
1800 -4.442042350769043 
1850 -4.572079181671143 
1900 -4.877509117126465 
1950 -5.1480

KeyboardInterrupt: 

In [None]:
PATH = "C:\Users\Luke\year-4-project-lisa\Luke"

In [None]:
#torch.save(network_to_use.state_dict(), PATH + '') #insert name for net e.g #percival/Mc-nu_l1024x8_g1_SNR8-16_2d.pt