In [3]:
# Loading training and test data
import torch

data_tr = torch.load("training-test-data/training_data.txt")
target_tr = torch.load("training-test-data/training_targets.txt")
data_test = torch.load("training-test-data/test_data.txt")
target_test = torch.load("training-test-data/test_targets.txt")

num_samples = 128
num_samples_test = 100 # number of test samples
new_dim1 = 28 * 1 # first dimension
new_dim2 = 28 * 1 # second dimension
old_dim = 28 # MNIST original dimension

print(data_tr.shape)


torch.Size([128, 1, 28, 28])


In [4]:
import random
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math
from matplotlib.colors import LogNorm
import numpy as np
import matplotlib.pyplot as plt

# Two-layer linear convolutional neural network
output_channels = 1
class Net(nn.Module):
    def __init__(self, ker_size1, ker_size2, output_channels):
        super(Net, self).__init__()
        self.ker_size1 = ker_size1
        self.ker_size2 = ker_size2
        self.output_channels = output_channels
        self.conv1 = nn.Conv2d(1, output_channels, kernel_size=(self.ker_size1, self.ker_size2), bias=True) 
        self.fc1 = nn.Linear(int(new_dim1 * new_dim2 * output_channels), 1, bias=True)


    def forward(self, x):
        y1 = F.pad(x, (0,self.ker_size2-1,0,self.ker_size1-1), mode='circular') # Circular padding 
        y1 = self.conv1(y1)
#         print(y1.shape)
        y1 = F.relu(y1) # ReLU activations
        y1 = y1.reshape(y1.size(0), -1)
        y1 = self.fc1(y1) 
        return y1

    def initialize(self, initialization_scale):
        print("random initialization with bias")
        self.fc1.weight.data.mul_(initialization_scale)
        self.conv1.weight.data.mul_(initialization_scale)
        self.fc1.bias.data.mul_(initialization_scale)
        self.conv1.bias.data.mul_(initialization_scale)
#         nn.init.normal_(self.fc1.weight, mean=0.0, std=initialization_scale/np.sqrt(new_dim1 * new_dim1 * self.output_channels))
#         nn.init.normal_(self.conv1.weight, mean=0.0, std=initialization_scale/np.sqrt(self.ker_size1 * self.ker_size1  * self.output_channels))
#         print(self.conv1.weight.shape)
#         print(self.conv1.weight.data)
        
    def initialize_nonrandom(self, Uinit, Vinit):
        print("nonrandom")
        print(self.conv1.weight.data.shape)
        print(self.fc1.weight.data.shape)
        self.conv1.weight = torch.nn.Parameter(Uinit)
        self.fc1.weight = torch.nn.Parameter(Vinit)
#         for c in range(self.output_channels):
#             for i in range(self.ker_size1):
#                 for j in range(self.ker_size2):
#                     self.conv1.weight.data[c][0][i][j] = Uinit[i][j]
#             for i in range(new_dim1):
#                 self.fc1.weight.data[0][i + c * new_dim1] = Vinit[i]
#         nn.init.normal_(self.fc1.weight, mean=0.0, std=initialization_scale/np.sqrt(new_dim1))
#         nn.init.normal_(self.conv1.weight, mean=0.0, std=initialization_scale/np.sqrt(ker_size1))

output = torch.zeros((num_samples, 1))
output = output.float()
output_test = torch.zeros((num_samples_test, 1))
output_test = output.float()


# Batch gradient descent
def train_minibatch(network, optimizer):
    minibatch_size = 32
    num_batch = int(num_samples/minibatch_size)
    for i in range(num_batch):
        network.train()
        optimizer.zero_grad()
        start_index = i * minibatch_size
        end_index = start_index + minibatch_size
        output = network(data_tr[start_index:end_index])
        loss = torch.sum(torch.exp(-1 * torch.mul(output.flatten(), target_tr[start_index:end_index]))) / minibatch_size
        loss.backward()
        optimizer.step()

# Evaluate training data loss
def train_eval(network):
    network.eval()
    train_loss = 0
    correct = 0
    with torch.no_grad():
        output = network(data_tr)
        train_loss = torch.sum(torch.exp(-1 * torch.mul(output.flatten(), target_tr)))
        pred = output.apply_(lambda x: 1 if x > 0 else -1)
        correct += pred.eq(target_tr.data.view_as(pred)).sum()
    train_loss /= num_samples
    print('\nTrain set: Avg. loss: {:.9f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    train_loss, correct, num_samples,
    100. * correct / num_samples))
    return train_loss

def test(network):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        output_test = network(data_test)
        test_loss = torch.sum(torch.exp(-1 * torch.mul(output_test.flatten(), target_test)))
        pred = output_test.apply_(lambda x: 1 if x > 0 else -1)
        correct += pred.eq(target_test.data.view_as(pred)).sum()
    test_loss /= num_samples_test
    accuracy = 100. * correct / num_samples_test
    losses = test_loss
    return (accuracy, losses)


# Get the information about beta
def extract_info(network, show_photo): 

  # Compute beta for linear CNNs
    beta_test = np.zeros((new_dim1,new_dim2))
    for i in range(new_dim1):
        for j in range(new_dim2):
            tempimg = torch.zeros((1,1,new_dim1, new_dim2))
            tempimg[0,0,i,j]=1
            beta_test[i,j] = network(tempimg)  

  # Compute margin
    with torch.no_grad():
        network.eval()
        output_np = np.ndarray.flatten(network(data_tr).data.numpy())
        target_np = np.ndarray.flatten(target_tr.data.numpy())
        margins = [target_np[i] * output_np[i] for i in range(num_samples)]
        min_margin = min(margins) # get the minimum margin for any datapoint 


    # Compute R(beta)
    w1 = network.conv1.weight.detach().numpy()
    w2 = network.fc1.weight.detach().numpy()
    w1_norm_sq = np.sum(np.square(w1))
    w2_norm_sq = np.sum(np.square(w2))
    print(w1_norm_sq, w2_norm_sq)
    Rbeta = (np.sum(np.square(w1)) + np.sum(np.square(w2))) * np.sqrt(new_dim1 * new_dim2)
    Rbeta2 = 2 * new_dim1 * np.linalg.norm(w1) * np.linalg.norm(w2)

    # Normalize by margin 
    beta_test = beta_test / min_margin # normalize to have margin 1
    hat_beta = np.absolute(np.fft.fft2(beta_test,norm='ortho'))
    Rbeta = Rbeta / min_margin
    Rbeta2 = Rbeta2 / min_margin
    ell1 = 2 * np.sum(hat_beta)
    
    print("l2 norm: " + str(2 * np.sqrt(new_dim1 * new_dim2)* np.linalg.norm(beta_test, ord="fro")))
    print("l1 norm: " + str(2 * np.sum(hat_beta)))
    print("Rbeta: " + str(Rbeta))
    print("Rbeta2: " + str(Rbeta2))

    if show_photo:
        print("Time domain:")
        plt.imshow(np.absolute(beta_test), cmap='gray')
        plt.show()
        print("Frequency domain:")
        plt.imshow(np.absolute(hat_beta), cmap='gray', norm=LogNorm(vmin=0.0001, vmax=0.08))
        plt.show()
  
    return (Rbeta, beta_test, ell1)


In [45]:
# Train and extract info about beta
import seaborn as sns
n_epochs = 100000
learning_rate_start = 0.0001
momentum = 0.3
initialization_scale = 0.1
wd = 1e-5

from tqdm import tqdm_notebook as tqdm
def experiment(ker_size1, ker_size2, output_channels, initialization):
  # print(class1, class2)
    network = Net(ker_size1, ker_size2, output_channels)
    network.initialize(initialization_scale)
    (Uinit, Vinit) = initialization
#     print(Uinit)
#     network.initialize_nonrandom(Uinit, Vinit)
    optimizer =  optim.SGD(network.parameters(), lr=learning_rate_start, momentum=momentum, weight_decay=1e-5)
    print("Before training:")
    train_eval(network)
    extract_info(network, False)
    # test()
    lossarray = []
    rbetavals = []
    ell1s = []
    print("Start training:")
    for epoch in tqdm(range(1, n_epochs + 1)):
        train_minibatch(network, optimizer)
        if epoch % 100 == 0:
            lossv = train_eval(network)
            loss = np.ndarray.flatten(lossv.detach().numpy())[0]
            lossarray.append(loss)
            (Rbeta, beta_test, ell1) = extract_info(network, False)
            rbetavals.append(Rbeta)
            ell1s.append(ell1)
#             if loss <= 1:
            if loss <= 0.000001: # stop at 10^-6 loss 
                break
#         # After enough epochs, change the learning rate to be higher to expedite convergence

        if epoch == 200 == 0:
            optimizer =  optim.SGD(network.parameters(), lr=0.005, momentum=momentum, weight_decay=wd)
            print("Learning rate change")
              # optimizer =  optim.SGD(network.parameters(), lr=0.001, momentum=momentum)

        if epoch == 500:
            optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum, weight_decay=wd)
            print("Learning rate change")
              # optimizer =  optim.SGD(network.parameters(), lr=0.001, momentum=momentum)

        if epoch == 1000:
            optimizer = optim.SGD(network.parameters(), lr=0.05, momentum=momentum, weight_decay=wd)
              # print("Learning rate change")
              # optimizer =  optim.SGD(network.parameters(), lr=0.005, momentum=momentum)

        if epoch == 1200:
            optimizer = optim.SGD(network.parameters(), lr=0.1, momentum=momentum, weight_decay=wd)
              # optimizer =  optim.SGD(network.parameters(), lr=0.007, momentum=momentum)

        if epoch == 1500:
            optimizer = optim.SGD(network.parameters(), lr=0.5, momentum=momentum, weight_decay=wd)

        if epoch == 2000:
            optimizer = optim.SGD(network.parameters(), lr=1, momentum=momentum, weight_decay=wd)
              # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

        if epoch == 3000:
            optimizer = optim.SGD(network.parameters(), lr=2, momentum=momentum, weight_decay=wd)
              # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

#         if epoch == 4000:
#             optimizer = optim.SGD(network.parameters(), lr=4, momentum=momentum, weight_decay=wd)
#               # optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=momentum)

#         if epoch == 4500:
#             optimizer = optim.SGD(network.parameters(), lr=10, momentum=momentum, weight_decay=wd)

#         if epoch == 5000:
#             optimizer = optim.SGD(network.parameters(), lr=20, momentum=momentum, weight_decay=wd)
        
#         if epoch == 7000:
#             optimizer = optim.SGD(network.parameters(), lr=40, momentum=momentum, weight_decay=wd)
        
#         if epoch == 8000:
#             optimizer = optim.SGD(network.parameters(), lr=50, momentum=momentum, weight_decay=wd)
            


#         if epoch % 500 == 0:
#             print(test(network))

    print("After training:")
    train_eval(network)
    (accuracy, losses) = test(network)
    print(accuracy, losses)

    (rk, beta, ell1) = extract_info(network, True)

    return (rk, beta, lossarray, rbetavals, ell1s)


    


In [7]:

k = 28
# Cout = [1,3,5,7]
Cout = [1,2, 4,8]
# Cout = [1, 2, 3, 4]
# Cout = [3]
pairs = []
for c in Cout:
    pairs.append((k, c))

betas = []
rbetas = []
losses_all = []
rbetavals_all = []
ell1s_all = []

Uinitial = np.random.normal(0, initialization_scale/np.sqrt(k), (k, k))
Vinitial = np.random.normal(0, initialization_scale/np.sqrt(new_dim1), new_dim1 * new_dim1)


for (k, output_channels) in pairs:
    print(k, output_channels)
    Uinit_torch = torch.zeros((output_channels, 1, k, k))
    Vinit_torch = torch.zeros((1, new_dim1 * new_dim1 * output_channels))
    for c in range(output_channels):
        for i in range(k):
            for j in range(k):
                Uinit_torch.data[c][0][i][j] = Uinitial[i][j] / np.sqrt(output_channels) # Initialize all channels in the same way
        for i in range(new_dim1 * new_dim1):
            Vinit_torch.data[0][i + c * new_dim1 * new_dim1] = Vinitial[i] / np.sqrt(output_channels)
                
    init = (Uinit_torch, Vinit_torch)
    (Rbeta, beta, losses, rbetavals, ell1s) = experiment(k, k, output_channels, init)
    rbetas.append(Rbeta)
    betas.append(beta)
    rbetavals_all.append(rbetavals)
    ell1s_all.append(ell1s)
    losses_all.append(losses)
    
# Write data to a CSV
import pandas as pd
    
# Write rbetas 
name =  "experiments-data/" + str(k) + "rbeta-2-nonlinear-bias2-wd" + str(Cout) + ".csv"
pd.DataFrame(rbetas).to_csv(name, header=False, index=False)

# Write betas, losses, and ell1s
for i in range(len(pairs)):
    beta = betas[i]
    losses = losses_all[i]
    rbetavals = rbetavals_all[i]
    ell1s = ell1s_all[i]
    print(str(pairs[i]))
    name = "experiments-data/" + "-iid-nonlinear-bias2-wd" + str(pairs[i]) + ".csv"
    pd.DataFrame(beta).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "loss-iid-nonlinear-bias2-wd" + str(pairs[i]) + ".csv"
    pd.DataFrame(losses).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "rbetavals-iid-nonlinear-bias2-wd" + str(pairs[i]) + ".csv"
    pd.DataFrame(rbetavals).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "ell1s-iid-nonlinear-bias2-wd" + str(pairs[i]) + ".csv"
    pd.DataFrame(ell1s).to_csv(name, header=False, index=False)

28 1
random initialization with bias
Before training:

Train set: Avg. loss: 0.999999881, Accuracy: 64/128 (50%)

3.3946372e-07 3.336561e-07
l2 norm: 1562.9221146571585
l1 norm: 56.130754975446095
Rbeta: -0.5686848696773349
Rbeta2: -0.5686637181495952
Start training:


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(1, n_epochs + 1)):


  0%|          | 0/100000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [46]:
def run_experiment(k, T, Cout):
    pairs = []
    for c in Cout:
        pairs.append((k, c))

    betas = []
    rbetas = []
    losses_all = []
    rbetavals_all = []
    ell1s_all = []

    Uinitial = np.random.normal(0, initialization_scale/np.sqrt(k), (k, k))
    Vinitial = np.random.normal(0, initialization_scale/np.sqrt(new_dim1), new_dim1 * new_dim1)


    for (k, output_channels) in pairs:
        for t in range(T):
            print(k, output_channels, t)
            Uinit_torch = torch.zeros((output_channels, 1, k, k))
            Vinit_torch = torch.zeros((1, new_dim1 * new_dim1 * output_channels))
            for c in range(output_channels):
                for i in range(k):
                    for j in range(k):
                        Uinit_torch.data[c][0][i][j] = Uinitial[i][j] / np.sqrt(output_channels) # Initialize all channels in the same way
                for i in range(new_dim1 * new_dim1):
                    Vinit_torch.data[0][i + c * new_dim1 * new_dim1] = Vinitial[i] / np.sqrt(output_channels)

            init = (Uinit_torch, Vinit_torch)
            (Rbeta, beta, losses, rbetavals, ell1s) = experiment(k, k, output_channels, init)
            rbetas.append(Rbeta)
            betas.append(beta)
            rbetavals_all.append(rbetavals)
            ell1s_all.append(ell1s)
            losses_all.append(losses)

    # Write data to a CSV
    import pandas as pd


    # Write betas, losses, and ell1s
    for i in range(len(pairs)):
        # Write rbetas 
        rbetas_to_write = []
        for t in range(T):
            index = i *T + t
            rbetas_to_write.append(rbetas[index])
            beta = betas[index]
            losses = losses_all[index]
            rbetavals = rbetavals_all[index]
            ell1s = ell1s_all[index]
            print(str(pairs[i]))
            name = "experiments-data/" + "-iid-nonlinear-bias-wd" + str(pairs[i]) + str(t) +  ".csv"
            pd.DataFrame(beta).to_csv(name, header=False, index=False)
            name = "experiments-data/" + "loss-iid-nonlinear-bias-wd" + str(pairs[i]) + str(t) + ".csv"
            pd.DataFrame(losses).to_csv(name, header=False, index=False)
            name = "experiments-data/" + "rbetavals-iid-nonlinear-bias-wd" + str(pairs[i]) +  str(t) + ".csv"
            pd.DataFrame(rbetavals).to_csv(name, header=False, index=False)
            name = "experiments-data/" + "ell1s-iid-nonlinear-bias-wd" + str(pairs[i]) + str(t)+  ".csv"
            pd.DataFrame(ell1s).to_csv(name, header=False, index=False)
        name =  "experiments-data/" + str(pairs[i]) + "rbeta-2-nonlinear-bias-wd" + ".csv"
        pd.DataFrame(rbetas_to_write).to_csv(name, header=False, index=False)

In [47]:
# Run experiments 
T = 3
Cout =[1,2,4,8]

for k in [1, 3, 8, 16, 28]:
    run_experiment(k, T, Cout)



1 1 0
random initialization with bias
Before training:

Train set: Avg. loss: 1.000117540, Accuracy: 66/128 (52%)

0.008403225 0.0032239256
l2 norm: 561.6109811688538
l1 norm: 46.01073171600378
Rbeta: -46.771090835368774
Rbeta2: -41.87450529238547
Start training:


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(1, n_epochs + 1)):


  0%|          | 0/100000 [00:00<?, ?it/s]


Train set: Avg. loss: 0.987675369, Accuracy: 64/128 (50%)

0.00880862 0.003906818
l2 norm: 960.6017460145578
l1 norm: 45.444760806582394
Rbeta: -19.946986241372226
Rbeta2: -18.40523978950929

Train set: Avg. loss: 0.974147975, Accuracy: 64/128 (50%)

0.009980234 0.005957
l2 norm: 1018.4164765765922
l1 norm: 44.34365896044611
Rbeta: -14.963995729438585
Rbeta2: -14.47933928887067

Train set: Avg. loss: 0.957000136, Accuracy: 64/128 (50%)

0.011933016 0.009569131
l2 norm: 1019.0049052962811
l1 norm: 44.6972568999044
Rbeta: -16.721490721634126
Rbeta2: -16.62013407314884

Train set: Avg. loss: 0.933597684, Accuracy: 96/128 (75%)

0.0147699425 0.015219541
l2 norm: 972.9938584077661
l1 norm: 46.37026034544471
Rbeta: -24.977344653102328
Rbeta2: -24.97453610552876

Train set: Avg. loss: 0.900919139, Accuracy: 124/128 (97%)

0.018657994 0.023679383
l2 norm: 693.5760546223748
l1 norm: 53.80304472927342
Rbeta: -65.6792059115765
Rbeta2: -65.21561868573492
Learning rate change

Train set: Avg. loss


Train set: Avg. loss: 0.000004482, Accuracy: 128/128 (100%)

1.5368769 4.907544
l2 norm: 480.2833716537341
l1 norm: 24.54296239333733
Rbeta: 18.600755103535498
Rbeta2: 15.853615736094692

Train set: Avg. loss: 0.000004476, Accuracy: 128/128 (100%)

1.5365398 4.9064965
l2 norm: 479.60893273822467
l1 norm: 24.524713949737333
Rbeta: 18.5860730678624
Rbeta2: 15.841078516701709

Train set: Avg. loss: 0.000004471, Accuracy: 128/128 (100%)

1.5362297 4.905409
l2 norm: 478.94590545389343
l1 norm: 24.506950708020543
Rbeta: 18.57232457728781
Rbeta2: 15.82944261187247

Train set: Avg. loss: 0.000004468, Accuracy: 128/128 (100%)

1.5359116 4.9043055
l2 norm: 478.2899882375964
l1 norm: 24.489424912681873
Rbeta: 18.559368788125184
Rbeta2: 15.818473020116441

Train set: Avg. loss: 0.000004465, Accuracy: 128/128 (100%)

1.5355897 4.9032044
l2 norm: 477.6400345023373
l1 norm: 24.472076070498666
Rbeta: 18.54711813688819
Rbeta2: 15.808095101080525

Train set: Avg. loss: 0.000004462, Accuracy: 128/128 (1


Train set: Avg. loss: 0.000004434, Accuracy: 128/128 (100%)

1.5269065 4.8793373
l2 norm: 455.17402560118114
l1 norm: 23.858143360009386
Rbeta: 18.332050333764748
Rbeta2: 15.621552950392369

Train set: Avg. loss: 0.000004434, Accuracy: 128/128 (100%)

1.5267297 4.879077
l2 norm: 454.7046429193076
l1 norm: 23.84470399485418
Rbeta: 18.329659285727722
Rbeta2: 15.619261026908953

Train set: Avg. loss: 0.000004433, Accuracy: 128/128 (100%)

1.526553 4.8788233
l2 norm: 454.24057362747385
l1 norm: 23.831394463612433
Rbeta: 18.32733543761081
Rbeta2: 15.617019120914938

Train set: Avg. loss: 0.000004433, Accuracy: 128/128 (100%)

1.5263762 4.878578
l2 norm: 453.7804882959487
l1 norm: 23.818170894917245
Rbeta: 18.325072559577592
Rbeta2: 15.61482465303825

Train set: Avg. loss: 0.000004433, Accuracy: 128/128 (100%)

1.5261995 4.8783417
l2 norm: 453.3260763058853
l1 norm: 23.80510061720681
Rbeta: 18.32288627479105
Rbeta2: 15.612686399036049

Train set: Avg. loss: 0.000004432, Accuracy: 128/128 (1


Train set: Avg. loss: 0.000004424, Accuracy: 128/128 (100%)

1.5208647 4.8738546
l2 norm: 439.19906469396165
l1 norm: 23.39299672087174
Rbeta: 18.27096175734654
Rbeta2: 15.557920054294048

Train set: Avg. loss: 0.000004423, Accuracy: 128/128 (100%)

1.5207472 4.873802
l2 norm: 438.93601265252477
l1 norm: 23.385157812525698
Rbeta: 18.270089141047446
Rbeta2: 15.55690277592061

Train set: Avg. loss: 0.000004423, Accuracy: 128/128 (100%)

1.5206295 4.873753
l2 norm: 438.67619544142894
l1 norm: 23.37740730336451
Rbeta: 18.26923718055698
Rbeta2: 15.55590454358279

Train set: Avg. loss: 0.000004423, Accuracy: 128/128 (100%)

1.520512 4.873705
l2 norm: 438.4192309263515
l1 norm: 23.369720237789814
Rbeta: 18.268382231304432
Rbeta2: 15.554900303659256

Train set: Avg. loss: 0.000004423, Accuracy: 128/128 (100%)

1.520382 4.8736563
l2 norm: 438.1667992346335
l1 norm: 23.362152486075395
Rbeta: 18.26754072275829
Rbeta2: 15.553876846575353

Train set: Avg. loss: 0.000004423, Accuracy: 128/128 (100%


Train set: Avg. loss: 0.000004419, Accuracy: 128/128 (100%)

1.5165495 4.8728447
l2 norm: 430.6389855233302
l1 norm: 23.136127393148538
Rbeta: 18.245007515630615
Rbeta2: 15.525084386783329

Train set: Avg. loss: 0.000004419, Accuracy: 128/128 (100%)

1.5164943 4.872836
l2 norm: 430.50448943313046
l1 norm: 23.132124769403486
Rbeta: 18.244672456920064
Rbeta2: 15.524655231679972

Train set: Avg. loss: 0.000004419, Accuracy: 128/128 (100%)

1.5164373 4.8728294
l2 norm: 430.3710988515993
l1 norm: 23.12815651294936
Rbeta: 18.244354734864622
Rbeta2: 15.524239039525746

Train set: Avg. loss: 0.000004419, Accuracy: 128/128 (100%)

1.5163785 4.87283
l2 norm: 430.23966654657426
l1 norm: 23.124235603696974
Rbeta: 18.244041761693808
Rbeta2: 15.523814671488854

Train set: Avg. loss: 0.000004419, Accuracy: 128/128 (100%)

1.5163199 4.8728304
l2 norm: 430.11039532592355
l1 norm: 23.120370582302062
Rbeta: 18.24371596072451
Rbeta2: 15.523379909017049

Train set: Avg. loss: 0.000004419, Accuracy: 128/12


Train set: Avg. loss: 0.000004417, Accuracy: 128/128 (100%)

1.5143332 4.8727665
l2 norm: 426.26221841093616
l1 norm: 23.004910184649493
Rbeta: 18.233724044741574
Rbeta2: 15.50958451420994

Train set: Avg. loss: 0.000004417, Accuracy: 128/128 (100%)

1.5142863 4.8727665
l2 norm: 426.1914839481585
l1 norm: 23.002763901329665
Rbeta: 18.233537865636414
Rbeta2: 15.509302065132964

Train set: Avg. loss: 0.000004417, Accuracy: 128/128 (100%)

1.5142385 4.872768
l2 norm: 426.1217433758414
l1 norm: 23.000646550270446
Rbeta: 18.23335086434295
Rbeta2: 15.509012094317587

Train set: Avg. loss: 0.000004417, Accuracy: 128/128 (100%)

1.5141895 4.8727713
l2 norm: 426.05285336939824
l1 norm: 22.998546198554678
Rbeta: 18.23317013237771
Rbeta2: 15.5087224814503

Train set: Avg. loss: 0.000004417, Accuracy: 128/128 (100%)

1.5141411 4.8727746
l2 norm: 425.98561628653965
l1 norm: 22.996504822322294
Rbeta: 18.23299294708863
Rbeta2: 15.508440565768385

Train set: Avg. loss: 0.000004417, Accuracy: 128/128 


Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)

1.512961 4.872663
l2 norm: 423.98097607072964
l1 norm: 22.936307009355133
Rbeta: 18.22752239882672
Rbeta2: 15.500702589644193

Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)

1.5129317 4.87267
l2 norm: 423.94307701426544
l1 norm: 22.935153725186915
Rbeta: 18.22743143376282
Rbeta2: 15.50053829590758

Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)

1.5129024 4.872677
l2 norm: 423.9054664213847
l1 norm: 22.934000992717326
Rbeta: 18.227328886266918
Rbeta2: 15.500368141761449

Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)

1.512873 4.872685
l2 norm: 423.8682112292271
l1 norm: 22.93286096228297
Rbeta: 18.227231783613703
Rbeta2: 15.50019798808295

Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)

1.5128437 4.8726926
l2 norm: 423.8311427344058
l1 norm: 22.93172121073534
Rbeta: 18.22712841500729
Rbeta2: 15.500024820995867

Train set: Avg. loss: 0.000004416, Accuracy: 128/128 (100%)



KeyboardInterrupt: 

In [50]:
# Write data to a CSV
import pandas as pd
    
# Write rbetas 
name =  "experiments-data/" + str(k) + "rbeta" + str(Cout) + ".csv"
pd.DataFrame(rbetas).to_csv(name, header=False, index=False)

# Write betas
for i in range(len(pairs)):
    beta = betas[i]
    losses = losses_all[i]
    rbetavals = rbetavals_all[i]
    ell1s = ell1s_all[i]
    print(str(pairs[i]))
    name = "experiments-data/" + str(pairs[i]) + ".csv"
    pd.DataFrame(beta).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "loss" + str(pairs[i]) + ".csv"
    pd.DataFrame(losses).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "rbetas" + str(pairs[i]) + ".csv"
    pd.DataFrame(rbetavals).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "ell1s" + str(pairs[i]) + ".csv"
    pd.DataFrame(ell1s).to_csv(name, header=False, index=False)
    

IndexError: list index out of range

In [30]:
# Write data to a CSV
import pandas as pd
    
# Write rbetas 
name =  "experiments-data/" + str(k) + "rbeta" + str(Cout) + ".csv"
pd.DataFrame(rbetas).to_csv(name, header=False, index=False)

# Write betas, losses, and ell1s
for i in range(len(pairs)):
    beta = betas[i]
#     losses = losses_all[i]
    rbetavals = rbetavals_all[i]
    ell1s = ell1s_all[i]
    print(str(pairs[i]))
    name = "experiments-data/" + str(pairs[i]) + ".csv"
    pd.DataFrame(beta).to_csv(name, header=False, index=False)
#     name = "experiments-data/" + "loss" + str(pairs[i]) + ".csv"
#     pd.DataFrame(losses).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "rbetavals" + str(pairs[i]) + ".csv"
    pd.DataFrame(rbetavals).to_csv(name, header=False, index=False)
    name = "experiments-data/" + "ell1s" + str(pairs[i]) + ".csv"
    pd.DataFrame(ell1s).to_csv(name, header=False, index=False)
    

(28, 1)
(28, 3)
(28, 5)
(28, 7)


In [21]:
print(num_samples)

128


In [52]:
print(rbetas)

[2.6596820989735943, 2.6352740575188918, 2.591987834198255, 2.584869061040482]
