<a href="https://colab.research.google.com/github/nadbag98/DL_HW/blob/main/HW3/ultra_wide_experiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Necessary imports
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from tqdm.notebook import tqdm
import plotly.express as px
import plotly.graph_objects as go
from google.colab import files

In [3]:
#Data preparation functions

from torch.utils.data import DataLoader, random_split
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.datasets import fetch_california_housing

class HousingDataset(torch.utils.data.Dataset):
    """
  Prepare the California Housing dataset for regression
  Code was taken from https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-create-a-neural-network-for-regression-with-pytorch.md
  """

    def __init__(self, X, y, scale_data=True, normalize_data=True):
        if not torch.is_tensor(X) and not torch.is_tensor(y):
            # Apply scaling if necessary
            if scale_data:
                X = StandardScaler().fit_transform(X)
            if normalize_data:
              #data is normalized since this is assumed for the shallow network
                X = Normalizer().fit_transform(X)
            self.X = torch.from_numpy(X)
            self.y = torch.from_numpy(y).view(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]


def get_california_dataset():
    X, y = fetch_california_housing(return_X_y=True)
    dataset = HousingDataset(X , y)
    num_samples = X.shape[0]
    train_size = int(num_samples * 0.01)
    test_size = num_samples - train_size
    train_set, test_set = random_split(dataset, [train_size, test_size])
    # setting batch sizes equal to set size in order to run full batch GD
    train_loader = DataLoader(train_set, batch_size=train_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=train_size, shuffle=False)
    in_dim = X.shape[1]
    out_dim = 1 if len(y.shape) == 1 else y.shape[1]
    return train_loader, test_loader, in_dim, out_dim 

In [4]:
#Training and testing epochs for the NN model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def NN_epoch(net: nn.Module,
                optim: torch.optim.Optimizer,
                criterion: nn.Module,
                train_loader: torch.utils.data.DataLoader):

  net.to(device)
  for data in train_loader:
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)
        optim.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs.view(-1), targets.view(-1))
        loss.backward()
        optim.step()
  return net(inputs) #This suits in our case of full-batch GD


def test_epoch(net: nn.Module,
               criterion: nn.Module,
               test_loader: torch.utils.data.DataLoader):
    epoch_loss = 0.0
    net.zero_grad()
    for data in test_loader:
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        epoch_loss += loss.item()
    return epoch_loss

In [5]:
# An epoch of GD by the given formula

def NTK_epoch(u : torch.Tensor,
              train_loader: torch.utils.data.DataLoader,
              lr : float):
  for data in train_loader: #single iteration
    X, y = data
  X, y = X.to(device), y.to(device)
  #removed since setting shown in class assumes ||x||==1
  # normalized_X = F.normalize(X)  
  # H = (1/torch.pi)(X@X.t())*(torch.pi-torch.acos(normalized_X@normalized_X.t()))
  H = (0.5/torch.pi)*(X@X.t())*(torch.pi-torch.acos(X@X.t()).nan_to_num())
  u_dt = -H@(u-y)
  u_new = u+lr*u_dt
  return u_new


In [6]:
class ShallowNN(nn.Module):

  def __init__(self,init_weight,GPU=True):
    super(ShallowNN,self).__init__()
    self.in_dim = init_weight.shape[1]
    self.hidden_width = init_weight.shape[0]
    self.out_dim = 1
    self.layers = nn.ModuleList()
    hidden_layer = nn.Linear(self.in_dim,self.hidden_width,
                                  bias=False,dtype=torch.float64)
    hidden_layer.weight.data = init_weight.double()
    self.layers.append(hidden_layer)
    relu = nn.ReLU()
    self.layers.append(relu)
    # output weights of 1 or -1, with equal prob. to each
    output_weights = (2*torch.bernoulli(torch.empty(self.hidden_width).
                                             uniform_(0,1))-1).double()
    output_layer = nn.Linear(self.hidden_width,1,bias=False,dtype=torch.float64)
    output_layer.weight.data = output_weights
    self.layers.append(output_layer)                
                              
    

    
  def forward(self,x):
    for layer in self.layers:
      layer.double()
      x = layer(x)
    return (1/np.sqrt(self.hidden_width))*x



In [7]:
def experiment(epochs = 20000 , lr= 1e-4 ,widths = [10**i for i in range(1,6,2)]):
  dist = np.zeros((len(widths),epochs))
  train_loader, test_loader, in_dim, out_dim  = get_california_dataset()
  for i in tqdm(range(len(widths))):
    hidden_width = widths[i]
    init_weights = torch.normal(mean=0.0,std=1.0,size=(hidden_width,in_dim))
    init_weights.to(device)
    net = ShallowNN(init_weights)
    net = net.to(device)
    criterion = nn.MSELoss()
    optim = torch.optim.SGD(net.parameters(),lr)
    for data in train_loader:
      X, y = data
    X = X.to(device)
    u_ntk = net(X)
    for e in tqdm(range(epochs)):
      u_net = NN_epoch(net, optim, criterion, train_loader)
      u_ntk = NTK_epoch(u_ntk,train_loader,lr)
      dist[i][e] = torch.linalg.norm(u_net-u_ntk).item()
  return dist
    
    

dist = experiment()

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/20000 [00:00<?, ?it/s]

  0%|          | 0/20000 [00:00<?, ?it/s]

  0%|          | 0/20000 [00:00<?, ?it/s]

In [8]:
def plot_result(results: np.ndarray, path: str = 'HW3', widths = [10**i for i in range(1,6,2)]):
    """
    Plots value of results for each network depth as a function of epoch
    :param results: Dictionary with keys that are network depths, and values that
                    are dictionaries of value name (i.e. "Train Loss") to list of values
    :param value_to_plot: Specific result to plot
    :return:
    """
    fig = go.Figure()
    
    for i, width in enumerate(widths):
        value_to_epoch = results[i]
        fig.add_trace(
            go.Scatter(x=np.arange(1, len(value_to_epoch)+1), y=value_to_epoch, mode='lines+markers', name="W="+str(width))
        )
    fig.update_layout(
        xaxis_title="Epoch #",
        yaxis_title="Value of Distance Between Net and NTK"
    )
    fig.show()
    # fig.savefig("u_dist_plot.png")
    # files.download("u_dist_plot.png")


In [9]:
plot_result(dist)

AttributeError: ignored

In [None]:
X, y = fetch_california_housing(return_X_y=True)
dataset = HousingDataset(X, y)
num_samples = X.shape[0]
# print(X)
# print(X/np.linalg.norm(X,ord=2,axis=1,keepdims=True))
train_loader = DataLoader(X,batch_size=len(X))
train_loader
i=0
for data in train_loader:
  print(i)
  i+=1

0


In [None]:
W = torch.rand((4,8)).double()
model = ShallowNN(W)
print("output weights")
print(model.output_weights)
print("-----------")
for data in train_loader:
  output = model.forward(data)
  print(data@W.t())

output weights


AttributeError: ignored