# Follow-up on activation functions in (multi-layer) perceptrons

How do we tell which activation function is being used in a model?

`nn.Linear` itself has no activation function - it's just a linear transformation.

The activation (if any) comes from what you do with its output.


In [27]:
import torch
from torch import nn, optim
#torch.manual_seed(152)
model = nn.Linear(2, 1)
print(model)
for name, param in model.named_parameters():
  print(param.data)

Linear(in_features=2, out_features=1, bias=True)
tensor([[0.1313, 0.4056]])
tensor([-0.6527])


# Ising Model exploration

The actual Ising model Hamiltonian is
$$H_\mathrm{model}[\boldsymbol{S}^i] = - \sum_{j=1}^L \sum_{k=1}^L J_{j,k}S_{j}^iS_{k}^i.$$

Following Mehta et al., we recast this model in the form
$$
H_\mathrm{model}^i \equiv \mathbf{X}^i \cdot \mathbf{J},
$$

where the vectors $\mathbf{X}^i$ represent all two-body interactions $\{S_{j}^iS_{k}^i \}_{j,k=1}^L$, and the index $i$ runs over the samples in the data set.

Our goal will be to learn the interaction strength ${\bf J}$.

First we create the datasets.

In [28]:
import numpy as np
np.random.seed(12)

### Ising model size
L=20

# create 100 random Ising states
states=np.random.choice([-1, 1], size=(100,L))

def ising_energies(states):
    """
    This function calculates the energies of the states in the nn Ising Hamiltonian
    """
    L = states.shape[1]
    J = np.zeros((L, L),)
    for i in range(L):
        J[i,(i+1)%L]=-1.0 # interaction between nearest-neighbors

    # compute energies
    E = np.einsum('...i,ij,...j->...',states,J,states)

    return E
# calculate Ising energies
energies=ising_energies(states)
print(states, energies)

[[ 1  1 -1 ...  1 -1 -1]
 [-1  1 -1 ... -1 -1 -1]
 [-1 -1  1 ... -1  1 -1]
 ...
 [-1  1 -1 ...  1 -1  1]
 [-1  1 -1 ... -1 -1 -1]
 [ 1  1 -1 ...  1 -1 -1]] [  4.   0.  -4.  -8.   4.   0.   0.  -4.   0.   8.   4.   0.   8.   0.
   0.   0.   8.   0.   4.   0.  -4.   0.   4.  -4.  -8.   4.   0.  -4.
   8.  -8.  -4.   0.  -4.   4.   0.  -4.   8.  -4.  -4.  -4.   4.   4.
  -4.  -4.  -4.  -4.   0.  -4. -12.   0.  -4.   8.   4.  -4.  -8.  -4.
 -12.   4.  -4.   8.   4.   0.   4.   0.   0.  -4.  -4. -12.  -4.  -4.
   0.  -8.   4.   4.   4.  -4.   4.  -4.  12.   8.   8.   4.  -4.  -4.
   0.   0.   4.   4.  -4.   8.   0.   0.   0.   0.  -8.   0.   4.   8.
   0.  -8.]


In [29]:
# reshape Ising states into LR samples: S_i S_j --> X_p
states=np.einsum('...i,...j->...ij', states, states)
shape=states.shape
states=states.reshape((shape[0],shape[1]*shape[2]))
# build final data set
Data=[states,energies]
print(Data)

[array([[ 1,  1, -1, ..., -1,  1,  1],
       [ 1, -1,  1, ...,  1,  1,  1],
       [ 1,  1, -1, ...,  1, -1,  1],
       ...,
       [ 1, -1,  1, ...,  1, -1,  1],
       [ 1, -1,  1, ...,  1,  1,  1],
       [ 1,  1, -1, ..., -1,  1,  1]]), array([  4.,   0.,  -4.,  -8.,   4.,   0.,   0.,  -4.,   0.,   8.,   4.,
         0.,   8.,   0.,   0.,   0.,   8.,   0.,   4.,   0.,  -4.,   0.,
         4.,  -4.,  -8.,   4.,   0.,  -4.,   8.,  -8.,  -4.,   0.,  -4.,
         4.,   0.,  -4.,   8.,  -4.,  -4.,  -4.,   4.,   4.,  -4.,  -4.,
        -4.,  -4.,   0.,  -4., -12.,   0.,  -4.,   8.,   4.,  -4.,  -8.,
        -4., -12.,   4.,  -4.,   8.,   4.,   0.,   4.,   0.,   0.,  -4.,
        -4., -12.,  -4.,  -4.,   0.,  -8.,   4.,   4.,   4.,  -4.,   4.,
        -4.,  12.,   8.,   8.,   4.,  -4.,  -4.,   0.,   0.,   4.,   4.,
        -4.,   8.,   0.,   0.,   0.,   0.,  -8.,   0.,   4.,   8.,   0.,
        -8.])]


In [30]:
print (Data[0])

[[ 1  1 -1 ... -1  1  1]
 [ 1 -1  1 ...  1  1  1]
 [ 1  1 -1 ...  1 -1  1]
 ...
 [ 1 -1  1 ...  1 -1  1]
 [ 1 -1  1 ...  1  1  1]
 [ 1  1 -1 ... -1  1  1]]


In [31]:
import torch
print([state_array for state_array in Data[0]])
input_features = torch.tensor([state for state in Data[0]], dtype=torch.float32)
output = torch.tensor([[e] for e in Data[1]], dtype=torch.float32)
print(input_features)


[array([ 1,  1, -1,  1,  1, -1,  1,  1, -1, -1, -1,  1, -1,  1,  1,  1, -1,
        1, -1, -1,  1,  1, -1,  1,  1, -1,  1,  1, -1, -1, -1,  1, -1,  1,
        1,  1, -1,  1, -1, -1, -1, -1,  1, -1, -1,  1, -1, -1,  1,  1,  1,
       -1,  1, -1, -1, -1,  1, -1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,
       -1, -1, -1,  1, -1,  1,  1,  1, -1,  1, -1, -1,  1,  1, -1,  1,  1,
       -1,  1,  1, -1, -1, -1,  1, -1,  1,  1,  1, -1,  1, -1, -1, -1, -1,
        1, -1, -1,  1, -1, -1,  1,  1,  1, -1,  1, -1, -1, -1,  1, -1,  1,
        1,  1,  1, -1,  1,  1, -1,  1,  1, -1, -1, -1,  1, -1,  1,  1,  1,
       -1,  1, -1, -1,  1,  1, -1,  1,  1, -1,  1,  1, -1, -1, -1,  1, -1,
        1,  1,  1, -1,  1, -1, -1, -1, -1,  1, -1, -1,  1, -1, -1,  1,  1,
        1, -1,  1, -1, -1, -1,  1, -1,  1,  1, -1, -1,  1, -1, -1,  1, -1,
       -1,  1,  1,  1, -1,  1, -1, -1, -1,  1, -1,  1,  1, -1, -1,  1, -1,
       -1,  1, -1, -1,  1,  1,  1, -1,  1, -1, -1, -1,  1, -1,  1,  1,  1,
        1, -1,  1,  1, -

In [32]:
from torch import nn, optim
model = nn.Sequential(
    nn.Linear(L, 10),
    nn.Sigmoid(),
    nn.Linear(10, 1),
    nn.Sigmoid()
)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=2.0)

In [33]:
# Define the training loop
for epoch in range(50000):

  # Forward pass: Calculate the predicted output class using the model
  output = model(input_features)

  # Calculate the loss between the predicted output class and the actual target class
  loss = criterion(output, classification)

  # Backpropagation: Compute gradients of the model parameters with respect to the loss
  loss.backward()
  # Update the model parameters using the computed gradients
  optimizer.step()
  # Zero out the gradients for the next iteration to avoid accumulation
  optimizer.zero_grad()

  if epoch % 100 == 0:
      print(f"Epoch {epoch} done! Loss = {loss}")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x400 and 20x10)