In [None]:
# load necessary modules
import numpy as np 
from scipy.integrate import odeint
import os, sys 
from pathlib import Path
from os.path import dirname, realpath
script_dir = Path(dirname(realpath('.')))
module_dir = str(script_dir)
sys.path.insert(0, module_dir + '/modules')
import mlp, math
import torch
import torch.nn as nn
from torchsummary import summary
import torchvision 
from itertools import islice
import mnist
from torchvision import transforms as ts

download_directory = '../data/mnist'
depth, width = 3, 200

dist_dict = {'su': 'standard_uniform', 'sn': 'standard_normal', 'xu': 'xavier_uniform', 'xn': 'xavier_normal'}

# experiment identifier
architecture = f'mlp-{depth}-{width}'
start = f'zero'
alpha = 10
dist_id = 'xu' 
dist = f'all-{dist_id}-{alpha}'
save_folder = f'../data/mnist/{architecture}/{start}/{dist}'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
optimization_steps = 100000
log_freq = 100
# load dataset
train = torchvision.datasets.MNIST(root=download_directory, train=True, transform=torchvision.transforms.ToTensor(), download=False)
test = torchvision.datasets.MNIST(root=download_directory, train=False, transform=torchvision.transforms.ToTensor(), download=False)


net = mlp.MLP(depth, width, 784, 10)
net.re_init([1, 2, 3], dist_dict[dist_id], alpha)
net.learn(mnist, train, test, loss_function='MSE', device=device, save_folder=save_folder, weight_decay=1e-2,\
          optimization_steps=optimization_steps, batch_size=64, log_freq=log_freq)

{'loss_function': 'MSE', 'save_folder': '../data/mnist/mlp-3-200/zero/dist', 'batch_size': 64, 'learning_rate': 0.001, 'weight_decay': 0.01, 'optimization_steps': 100000, 'log_freq': 100}


  0%|          | 0/100000 [00:00<?, ?it/s]

In [20]:
# Define the shape of the tensor
shape = (208, 100)

# Create a tensor filled with zeros
tensor = torch.zeros(shape)

# Initialize the tensor using Glorot uniform initialization
nn.init.xavier_normal_(tensor)

print(torch.linalg.norm(tensor/torch.linalg.norm(tensor)))

tensor(1.0000)


In [19]:
net.hidden[-1].weight.dtype

torch.float32

In [4]:
torch.randn(size=(4, 5), dtype=torch.float32)

tensor([[ 0.1366,  0.2946,  0.4762,  0.5481, -0.6822],
        [-0.2726,  0.3101,  0.6530,  0.0578,  1.0401],
        [-2.0029, -0.1530,  0.3312,  0.2375, -1.6871],
        [-1.2877,  0.0608, -0.9288,  1.4429,  0.9075]])