In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
import numpy as np
import math
import time
import torch.nn.functional as F
from torch.utils.data import DataLoader
from NN_arch import LSTM_sMNIST, LeNet, FCP, Autoencoder
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
class FourierPathNN(nn.Module):
    def __init__(self, x1, x2, num_terms=25):
        super(FourierPathNN, self).__init__()
        self.num_terms = num_terms
        self.register_buffer('x1', x1.flatten())  # Store as a constant buffer
        self.register_buffer('x2', x2.flatten())  # Store as a constant buffer
        
        # Fourier coefficients for sine terms (b_n terms)
        self.b = nn.Parameter(torch.zeros(num_terms, x1.numel()))  # Learnable sine coefficients

    def forward(self, t_values):
        """
        Generate weights array of t values
        """
        t_values = t_values.view(-1, 1)
        # Linear interpolation for the base term a_0
        a_0 = (1 - t_values) * self.x1 + t_values * self.x2

        sine_terms = torch.zeros_like(a_0)
        for n in range(1, self.num_terms + 1):
            sine_terms += self.b[n-1].view(1, -1) * torch.sin(n * torch.pi * t_values)
        weights = a_0 + sine_terms  # Combine a_0 and sine terms
        return weights.view(-1, *self.x1.shape)

def loss_fn_simplified(weights,model,model_name, device, images, labels,b_gradients,num_terms,t_values):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    """
    Compute the loss by injecting weights into the CNN model.
    """
    set_seed(42)
    model.train()
    total_loss = 0.0
    if model_name == "AE": #MSE Loss for Autoencoder, cross entropy for other.
        criterion=nn.MSELoss()
    else:
        criterion = nn.CrossEntropyLoss()
    Data_array=np.zeros(len(weights))
    Loss_sum=0.0
    #print(len(weights))
    smoothness_loss = 0.0
    Length=0.0
    
    for j in range(len(weights) - 1): # 
        smoothness_loss += torch.norm(weights[j+1] - weights[j])**2
        Length+=torch.norm(weights[j+1] - weights[j]).item()
    Loss_sum=0.0
    for i in range(len(weights)): #loop through 
        
        #weights_fc1, weights_fc2, weights_fc3 = load_and_reshape_weights(weights[i])
        index=0
        for param in model.parameters():
            param.data = weights[i][index:index + param.numel()].reshape(param.shape)
            index += param.numel()
        model.zero_grad()
        outputs = model(images)
        if model_name == "AE":
            loss = criterion(outputs, images)
        else:
            loss = criterion(outputs, labels)
                
        total_loss+=loss
        loss.backward() #compute gradient 
        flattened_gradients = torch.tensor([]).to(device)
        for param in model.parameters():
            flattened_gradients = torch.cat((flattened_gradients, param.grad.flatten()))
        for n in range(num_terms):
            b_gradients[n]+=flattened_gradients*torch.sin((n+1)*np.pi*t_values[i])
        Data_array[i]=loss.item()       
    return total_loss,Data_array,Length,b_gradients,0.001*smoothness_loss



def train_fourier_nn_simplified(path_model,model, model_name, images, labels, num_terms, num_steps=100, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    optimizer = optim.Adam(path_model.parameters(), lr=lr) #Can also be SGD or rmsprop
    t_values = torch.linspace(0, 1, 51).unsqueeze(1).to(device)  # 50 points between t= 0 and 1
    Min_Loss=None
    for step in range(num_steps):
        optimizer.zero_grad()
        path_weights = path_model(t_values)  # Generate weights with time t along the opt path.
        b_gradients=torch.zeros_like(path_model.b)
        loss,Data_array,L,b_gradients,smoothness_loss = loss_fn_simplified(path_weights,model,model_name, device,images, labels,b_gradients,num_terms,t_values)
        smoothness_loss.backward()
        path_model.b.grad.add_(b_gradients)
        optimizer.step()
        if Min_Loss is None or Min_Loss > loss:
            Min_array=Data_array
            Min_Loss=loss
            Min_Length=L
        print(f"Loss: {loss:.6f}","Length:", L)
    return Min_array,Min_Length

def main_simplified(x1,x2,model_name,data_set):
    # Dataset Preparation
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model_name == "AE":
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.view(-1))]) 
    else: 
        transform = transforms.Compose([transforms.ToTensor()])
    
    if data_set == "Test":
        trainset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
        trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=True)
    else: 
        trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
        trainloader = DataLoader(trainset, batch_size=len(trainset), shuffle=True)
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        break
    num_terms=10 #10 terms in truncated Fourier series
    
    #Generate initial path
    fourier_nn = FourierPathNN(x1, x2, num_terms=num_terms).to(device)

    # Landscape used for pathfinding
    if model_name == "LN" :
        model = LeNet()
    elif model_name == "FCP" :
        model = FCP()
    elif model_name == "AE" :
        model = Autoencoder()
    elif model_name == "LSTM" :
        model=LSTM_sMNIST()
    # Train the FourierPathNN
    Min_path,L=train_fourier_nn_simplified(fourier_nn,model,model_name,images, labels,num_terms)
    return Min_path,L

model_name_array=["FCP","LN","AE","LSTM"] #Select which architecture
data_set_array=["Test","Train"]
model_name=model_name_array[0]
if model_name == "FCP":
    x_array=np.load("FC_BFGS_Training_best48_weights.npy")
if model_name == "LN":
    x_array=np.load("LN_BFGS_Training_best48_weights.npy")
if model_name == "AE":
    x_array=np.load("AE_BFGS_Training_best48_weights.npy")
if model_name == "LSTM":
    x_array=np.load("LSTM_BFGS_Training_best48_weights.npy")
set_seed(0)
#pick start and end points 
x1=torch.from_numpy(x_array[0])
x2=torch.from_numpy(x_array[1])
main_simplified(x1,x2,model_name,data_set_array[1])

Loss: 475.563049 Length: 68.32694578170776
Loss: 357.752014 Length: 68.8485255241394
Loss: 264.756348 Length: 69.38675284385681
Loss: 196.999329 Length: 70.05835568904877
Loss: 150.284332 Length: 70.8364908695221
Loss: 119.987793 Length: 71.72914159297943
Loss: 100.173187 Length: 72.67347288131714
Loss: 86.302536 Length: 73.6369047164917
Loss: 76.050720 Length: 74.61664998531342
Loss: 68.294945 Length: 75.59973192214966
Loss: 62.156147 Length: 76.56736087799072
Loss: 56.861256 Length: 77.51413536071777
Loss: 52.047848 Length: 78.4438978433609
Loss: 47.754478 Length: 79.3578907251358
Loss: 44.029400 Length: 80.25340187549591
Loss: 40.807632 Length: 81.12836050987244
Loss: 37.953770 Length: 81.98033511638641
Loss: 35.342098 Length: 82.8065196275711
Loss: 32.940788 Length: 83.60471057891846
Loss: 30.796051 Length: 84.37524616718292
Loss: 28.948427 Length: 85.11936092376709
Loss: 27.379807 Length: 85.83615911006927
Loss: 26.007095 Length: 86.5222897529602
Loss: 24.739134 Length: 87.1744177

(array([1.08659233e-08, 4.49384970e-07, 4.12673435e-05, 7.63868215e-04,
        2.95695895e-03, 6.45107450e-03, 1.08467238e-02, 1.59296021e-02,
        2.21172329e-02, 3.06826755e-02, 4.29794565e-02, 5.88968471e-02,
        7.62456506e-02, 9.43317041e-02, 1.13942496e-01, 1.34630710e-01,
        1.54949337e-01, 1.76157534e-01, 2.02274844e-01, 2.33991459e-01,
        2.68712372e-01, 3.02353710e-01, 3.32254380e-01, 3.54883075e-01,
        3.72154951e-01, 3.76967698e-01, 3.57264251e-01, 3.24091822e-01,
        2.90929466e-01, 2.60786235e-01, 2.31080890e-01, 2.01969817e-01,
        1.77981094e-01, 1.58683077e-01, 1.41387358e-01, 1.24401800e-01,
        1.07049510e-01, 8.97246450e-02, 7.25202486e-02, 5.60392402e-02,
        4.13657390e-02, 2.97917333e-02, 2.13797204e-02, 1.52749233e-02,
        1.02849100e-02, 5.86445164e-03, 2.36677355e-03, 5.19371824e-04,
        2.80024105e-05, 3.22261769e-07, 1.09354632e-08]),
 102.4889509677887)

In [3]:
print(x_array[0])

[ 0.01798414 -0.02193643 -0.03122183 ...  0.8760873  -1.2382613
 -0.14082627]


In [3]:
import sys
import torch
import torchvision
import numpy as np

print("Python:", sys.version.split()[0])
print("PyTorch:", torch.__version__)
print("torchvision:", torchvision.__version__)
print("NumPy:", np.__version__)
print("CUDA:", torch.version.cuda)
print("CUDA available:", torch.cuda.is_available())

Python: 3.10.18
PyTorch: 2.8.0+cpu
torchvision: 0.23.0+cpu
NumPy: 2.1.2
CUDA: None
CUDA available: False
