In [30]:
# Loading training and test data
import torch

data_tr = torch.load("training_data_nonlinear.txt")
target_tr = torch.load("training_targets_nonlinear.txt")
data_test = torch.load("test_data_nonlinear.txt")
target_test = torch.load("test_targets_nonlinear.txt")

num_samples = 512
num_samples_test = 100 # number of test samples
# new_dim1 = 28 * 1 # first dimension
# new_dim2 = 28 * 1 # second dimension
# old_dim = 28 # MNIST original dimension

new_dim1 = 32 # first dimension
new_dim2 = 32 # second dimension
old_dim = 32 # CIFAR original dimension

print(data_tr.shape)


torch.Size([512, 1, 32, 32])


In [48]:
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math
from matplotlib.colors import LogNorm
import numpy as np
import matplotlib.pyplot as plt

# Two-layer linear convolutional neural network
output_channels = 1
class Net(nn.Module):
    def __init__(self, ker_size1, ker_size2, output_channels):
        super(Net, self).__init__()
        self.ker_size1 = ker_size1
        self.ker_size2 = ker_size2
        self.output_channels = output_channels
        self.conv1 = nn.Conv2d(1, output_channels, kernel_size=(self.ker_size1, self.ker_size2), bias=False) 
        self.fc1 = nn.Linear(int(new_dim1 * new_dim2 * output_channels), 1, bias=True)


    def forward(self, x):
        y1 = F.pad(x, (0,self.ker_size2-1,0,self.ker_size1-1), mode='circular') # Circular padding 
        y1 = self.conv1(y1)
        y1 = F.relu(y1) # ReLU activations
        y1 = y1.reshape(y1.size(0), -1)
        y1 = self.fc1(y1) 
        return y1

    def initialize(self, initialization_scale, ker_size1):
        nn.init.normal_(self.fc1.weight, mean=0.0, std=initialization_scale/np.sqrt(new_dim1))
        nn.init.normal_(self.conv1.weight, mean=0.0, std=initialization_scale/np.sqrt(ker_size1))


output = torch.zeros((num_samples, 1))
output = output.float()
output_test = torch.zeros((num_samples_test, 1))
output_test = output.float()


# Batch gradient descent
def train_minibatch(network, optimizer):
    minibatch_size = 512
    num_batch = int(num_samples/minibatch_size)
    for i in range(num_batch):
        network.train()
        optimizer.zero_grad()
        start_index = i * minibatch_size
        end_index = start_index + minibatch_size
        output = network(data_tr[start_index:end_index])
        loss = torch.sum(torch.exp(-1 * torch.mul(output.flatten(), target_tr[start_index:end_index]))) / minibatch_size
        loss.backward()
        optimizer.step()

# Evaluate training data loss
def train_eval(network):
    network.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        output = network(data_tr)
        train_loss = torch.sum(torch.exp(-1 * torch.mul(output.flatten(), target_tr)))
        pred = output.apply_(lambda x: 1 if x > 0 else -1)
        correct += pred.eq(target_tr.data.view_as(pred)).sum()
    train_loss /= num_samples
    print('\nTrain set: Avg. loss: {:.9f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    train_loss, correct, num_samples,
    100. * correct / num_samples))
    return train_loss

def test(network):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        output_test = network(data_test)
        test_loss = torch.sum(torch.exp(-1 * torch.mul(output_test.flatten(), target_test)))
        pred = output_test.apply_(lambda x: 1 if x > 0 else -1)
        correct += pred.eq(target_test.data.view_as(pred)).sum()
    test_loss /= num_samples_test
    accuracy = 100. * correct / num_samples_test
    losses = test_loss
    return (accuracy, losses)


# Get the information about beta
def extract_info(network, show_photo): 

  # Compute beta for linear CNNs
    beta_test = np.zeros((new_dim1,new_dim2))
    for i in range(new_dim1):
        for j in range(new_dim2):
            tempimg = torch.zeros((1,1,new_dim1, new_dim2))
            tempimg[0,0,i,j]=1
            beta_test[i,j] = network(tempimg)  

  # Compute margin
    with torch.no_grad():
        network.eval()
        output_np = np.ndarray.flatten(network(data_tr).data.numpy())
        target_np = np.ndarray.flatten(target_tr.data.numpy())
        margins = [target_np[i] * output_np[i] for i in range(num_samples)]
        min_margin = min(margins) # get the minimum margin for any datapoint 


    # Compute R(beta)
    w1 = network.conv1.weight.detach().numpy()
    w2 = network.fc1.weight.detach().numpy()
    w1_norm_sq = np.sum(np.square(w1))
    w2_norm_sq = np.sum(np.square(w2))
    print(w1_norm_sq, w2_norm_sq)
    Rbeta = (np.sum(np.square(w1)) + np.sum(np.square(w2))) * np.sqrt(new_dim1 * new_dim2)


    # Normalize by margin 
    beta_test = beta_test / min_margin # normalize to have margin 1
    hat_beta = np.absolute(np.fft.fft2(beta_test,norm='ortho'))
    Rbeta = Rbeta / min_margin

    print("l2 norm: " + str(2 * np.sqrt(new_dim1 * new_dim2)* np.linalg.norm(beta_test, ord="fro")))
    print("l1 norm: " + str(2 * np.sum(hat_beta)))
    print("Rbeta: " + str(Rbeta))

    if show_photo:
        print("Time domain:")
        plt.imshow(np.absolute(beta_test), cmap='gray')
        plt.show()
        print("Frequency domain:")
        plt.imshow(np.absolute(hat_beta), cmap='gray', norm=LogNorm(vmin=0.0001, vmax=0.08))
        plt.show()
  
    return (Rbeta, beta_test)


In [50]:
# Train and extract info about beta
import seaborn as sns
n_epochs = 100000
learning_rate_start = 0.001
momentum = 0.3
initialization_scale = 0.01


from tqdm import tqdm_notebook as tqdm
def experiment(ker_size1, ker_size2, output_channels):
  # print(class1, class2)
    network = Net(ker_size1, ker_size2, output_channels)
    network.initialize(initialization_scale, ker_size1)
    optimizer =  optim.SGD(network.parameters(), lr=learning_rate_start, momentum=momentum)
    print("Before training:")
    train_eval(network)
    extract_info(network, False)
    # test()
    
    print("Start training:")
    for epoch in tqdm(range(1, n_epochs + 1)):
        train_minibatch(network, optimizer)
        if epoch % 100 == 0:
            loss = train_eval(network)
            if loss <= 0.000001: # stop at 10^-6 loss 
                break
            extract_info(network, False)
        # After enough epochs, change the learning rate to be higher to expedite convergence

        if epoch == 200 == 0:
            optimizer =  optim.SGD(network.parameters(), lr=0.005, momentum=momentum)
            print("Learning rate change")
              # optimizer =  optim.SGD(network.parameters(), lr=0.001, momentum=momentum)

        if epoch == 500:
            optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)
            print("Learning rate change")
              # optimizer =  optim.SGD(network.parameters(), lr=0.001, momentum=momentum)

#         if epoch == 1000:
#             optimizer = optim.SGD(network.parameters(), lr=0.05, momentum=momentum)
              # print("Learning rate change")
#               optimizer =  optim.SGD(network.parameters(), lr=0.005, momentum=momentum)

#         if epoch == 2000:
#             optimizer = optim.SGD(network.parameters(), lr=0.1, momentum=momentum)
#               # optimizer =  optim.SGD(network.parameters(), lr=0.007, momentum=momentum)

#         if epoch == 1500:
#             optimizer = optim.SGD(network.parameters(), lr=0.5, momentum=momentum)

#         if epoch == 2000:
#             optimizer = optim.SGD(network.parameters(), lr=1, momentum=momentum)
#               # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

#         if epoch == 3000:
#             optimizer = optim.SGD(network.parameters(), lr=2, momentum=momentum)
#               # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

#         if epoch == 4000:
#             optimizer = optim.SGD(network.parameters(), lr=4, momentum=momentum)
#               # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

#         if epoch == 4500:
#             optimizer = optim.SGD(network.parameters(), lr=10, momentum=momentum)

#         if epoch == 5000:
#             optimizer = optim.SGD(network.parameters(), lr=20, momentum=momentum)


        if epoch % 500 == 0:
            print(test(network))

    print("After training:")
    train_eval(network)
    (accuracy, losses) = test(network)
    print(accuracy, losses)

    (rk, beta) = extract_info(network, True)

    return (rk, beta)



In [None]:
# Run experiments and write data to a CSV
import pandas as pd


k_vals = [1, 3, 5, 8]
Cout = [1, 2, 3, 4]

for k in k_vals:
    pairs = []
    for c in Cout:
        pairs.append((k, c))

    betas = []
    rbetas = []

    for (k, output_channels) in pairs:
        print(k, output_channels)
        (Rbeta, beta) = experiment(k, k, output_channels)
        rbetas.append(Rbeta)
        betas.append(beta)

    # Write rbetas 
    name =  str(k) + "rbeta" + str(Cout) + "nonlinear" + ".csv"
    pd.DataFrame(rbetas).to_csv(name, header=False, index=False)

    # Write betas
    for i in range(len(pairs)):
        beta = betas[i]
        beta = beta_array[j]
        name = str(pairs[i]) + "nonlinear" + ".csv"
        print(str(pairs[i]))
        pd.DataFrame(beta).to_csv(name, header=False, index=False)
    

1 1
Before training:

Train set: Avg. loss: 1.000209451, Accuracy: 256/512 (50%)

4.6750563e-05 0.0031192952
l2 norm: 2023.2466891153192
l1 norm: 64.24449565268931
Rbeta: -4.783556924652907
Start training:


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(1, n_epochs + 1)):


  0%|          | 0/100000 [00:00<?, ?it/s]


Train set: Avg. loss: 1.000151277, Accuracy: 256/512 (50%)

4.995738e-05 0.00312251
l2 norm: 2013.9042815808575
l1 norm: 64.14333494613582
Rbeta: -5.502757912693426

Train set: Avg. loss: 1.000105858, Accuracy: 256/512 (50%)

5.4352862e-05 0.0031269137
l2 norm: 2001.218697186725
l1 norm: 63.98539045028906
Rbeta: -6.329005123074215

Train set: Avg. loss: 1.000069737, Accuracy: 256/512 (50%)

6.013327e-05 0.0031327023
l2 norm: 1984.0137106057782
l1 norm: 63.744359847568596
Rbeta: -7.270415401498539

Train set: Avg. loss: 1.000040174, Accuracy: 256/512 (50%)

6.7566594e-05 0.003140144
l2 norm: 1959.7542078294694
l1 norm: 63.35322131644038
Rbeta: -8.332544251293397

Train set: Avg. loss: 1.000015020, Accuracy: 256/512 (50%)

7.699416e-05 0.0031495797
l2 norm: 1924.2101613306138
l1 norm: 62.69081513746132
Rbeta: -9.508846595034832
Learning rate change
(tensor(50.), tensor(1.0000))

Train set: Avg. loss: 0.999687374, Accuracy: 256/512 (50%)

0.0004441902 0.0035176426
l2 norm: 569.1232088688


Train set: Avg. loss: 0.269810796, Accuracy: 494/512 (96%)

18.132988 18.139235
l2 norm: 3032.2303955236907
l1 norm: 1747.1689346935145
Rbeta: -1995.852224037218
(tensor(60.), tensor(76.5075))

Train set: Avg. loss: 0.260555655, Accuracy: 495/512 (97%)

18.780527 18.786808
l2 norm: 3196.5261303099187
l1 norm: 1871.7056640662322
Rbeta: -2139.19032608932

Train set: Avg. loss: 0.251731962, Accuracy: 496/512 (97%)

19.424213 19.430527
l2 norm: 3378.8770293834373
l1 norm: 2008.6183321576245
Rbeta: -2296.757646250534

Train set: Avg. loss: 0.243318170, Accuracy: 498/512 (97%)

20.063663 20.07001
l2 norm: 3530.507982416994
l1 norm: 2128.810994647991
Rbeta: -2435.2805496758024

Train set: Avg. loss: 0.235293388, Accuracy: 499/512 (97%)

20.69855 20.70493
l2 norm: 3598.549892761772
l1 norm: 2199.077585279735
Rbeta: -2516.717318279142

Train set: Avg. loss: 0.227637723, Accuracy: 500/512 (98%)

21.328592 21.335
l2 norm: 3669.1305396018447
l1 norm: 2270.6719878421573
Rbeta: -2599.6676978394403



Train set: Avg. loss: 0.077600762, Accuracy: 511/512 (100%)

43.461353 43.468197
l2 norm: 12140.197892515698
l1 norm: 9361.035007392702
Rbeta: -10787.863230912082

Train set: Avg. loss: 0.076223016, Accuracy: 511/512 (100%)

43.85162 43.85847
l2 norm: 12629.223539446288
l1 norm: 9755.958264895195
Rbeta: -11243.66204780843
(tensor(58.), tensor(204116.5938))

Train set: Avg. loss: 0.074885413, Accuracy: 511/512 (100%)

44.237915 44.244766
l2 norm: 13151.917088819382
l1 norm: 10177.790433688959
Rbeta: -11730.515383532655

Train set: Avg. loss: 0.073586419, Accuracy: 511/512 (100%)

44.620296 44.627148
l2 norm: 13711.87317639371
l1 norm: 10629.415410832424
Rbeta: -12251.751165037893

Train set: Avg. loss: 0.072324529, Accuracy: 511/512 (100%)

44.99883 45.00568
l2 norm: 14313.117034852316
l1 norm: 11114.064514460515
Rbeta: -12811.099555860981

Train set: Avg. loss: 0.071098305, Accuracy: 511/512 (100%)

45.37359 45.380424
l2 norm: 14960.425491479124
l1 norm: 11635.56871077598
Rbeta: -1341


Train set: Avg. loss: 0.038918171, Accuracy: 512/512 (100%)

58.890617 58.89747
l2 norm: 28666.13897382272
l1 norm: 23261.317066114556
Rbeta: 26859.401988896396

Train set: Avg. loss: 0.038488962, Accuracy: 512/512 (100%)

59.144478 59.15133
l2 norm: 27284.291112173418
l1 norm: 22152.619212778045
Rbeta: 25579.871768712277

Train set: Avg. loss: 0.038067870, Accuracy: 512/512 (100%)

59.396477 59.40331
l2 norm: 26042.191850001087
l1 norm: 21155.95327226677
Rbeta: 24429.632367462473
(tensor(60.), tensor(20090868.))

Train set: Avg. loss: 0.037654687, Accuracy: 512/512 (100%)

59.64662 59.653446
l2 norm: 24919.677224736683
l1 norm: 20255.156813124577
Rbeta: 23390.030795839157

Train set: Avg. loss: 0.037249193, Accuracy: 512/512 (100%)

59.89493 59.901764
l2 norm: 23900.094313093847
l1 norm: 19436.885427925383
Rbeta: 22445.67271275458

Train set: Avg. loss: 0.036851220, Accuracy: 512/512 (100%)

60.141445 60.14829
l2 norm: 22969.603345172578
l1 norm: 18690.046446009397
Rbeta: 21583.75382


Train set: Avg. loss: 0.024599787, Accuracy: 512/512 (100%)

69.51572 69.52255
l2 norm: 9602.306247018616
l1 norm: 7943.4697680070785
Rbeta: 9180.65623285828

Train set: Avg. loss: 0.024405938, Accuracy: 512/512 (100%)

69.70094 69.70781
l2 norm: 9499.920129207141
l1 norm: 7860.876591880586
Rbeta: 9085.3135426876

Train set: Avg. loss: 0.024214709, Accuracy: 512/512 (100%)

69.88522 69.89203
l2 norm: 9400.460952840529
l1 norm: 7780.635236060232
Rbeta: 8992.68538614374

Train set: Avg. loss: 0.024026034, Accuracy: 512/512 (100%)

70.06859 70.075226
l2 norm: 9303.818135156691
l1 norm: 7702.657288595176
Rbeta: 8902.670273212405
(tensor(60.), tensor(4.7788e+08))

Train set: Avg. loss: 0.023840019, Accuracy: 512/512 (100%)

70.25072 70.25743
l2 norm: 9209.893418341291
l1 norm: 7626.862822191177
Rbeta: 8815.175694594089

Train set: Avg. loss: 0.023656519, Accuracy: 512/512 (100%)

70.431816 70.43864
l2 norm: 9118.660901422345
l1 norm: 7553.2300695730855
Rbeta: 8730.175507014337

Train set: 

In [21]:
print(num_samples)

128


NameError: name 'classes1' is not defined