In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


In [None]:
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
def generate_data(batch_size):
    inputs = torch.randn(batch_size, 10)
    targets = torch.randn(batch_size, 1)
    return inputs, targets

In [None]:
def train_step_2(model_2, inputs, targets):
    print("Params inside function trainig")

    for name, param in model_2.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)

    optimizer_2 = optim.SGD(model_2.parameters(), lr=0.01)
    print("Params after optimizer inside function trainig")

    for name, param in model_2.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)

    print("\nParameters in Optimizer:")
    for group in optimizer_2.param_groups:
      for param in group['params']:
        print(param)


    criterion_2 = nn.MSELoss()
    model_2.train()
    optimizer_2.zero_grad()
    outputs = model_2(inputs)
    loss = criterion_2(outputs, targets)
    loss.backward()
    optimizer_2.step()

    #for var_name in optimizer_2.state_dict():
     #   print(var_name, "\n", optimizer_2.state_dict()[var_name])
    print("Params after training inside function trainig")

    for name, param in model_2.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)
    return loss.item()

### **Does weights get carried across functions if optimizers are defined within those functions**

if you define a new optimizer within each function, the optimizer's internal state (such as momentums in SGD or running averages in Adam) will not be carried over between the functions. However, the weights of the model itself will be carried over, as they are stored within the model object.

Here's the key distinction:

Model Weights: The model's weights and biases are attributes of the model object. If you pass the same model instance across different functions and update its weights, the updated weights persist across function calls because they are tied to the model instance.

Optimizer State: The optimizer maintains its own state, which includes information like learning rates, momentums, etc. If you define a new optimizer in each function, each new optimizer starts with its default initial state, not aware of the previous updates made by a different optimizer instance.

If you want the optimizer's state to persist across different functions, you should create the optimizer outside these functions and pass it as an argument, similar to how you would handle the model object.

In [None]:
def experiment():     #what are the arguments

    # Create an instance of the network

    # Print the model's parameters

    model = SimpleNet()

    print("Params before any training")

    for name, param in model.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)


    optimizer = optim.SGD(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()

    print("Params after defining optimizer")

    for name, param in model.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)

    print("\nParameters in Optimizer:")
    for group in optimizer.param_groups:
      for param in group['params']:
        print(param)

    batch_size = 32
    inputs_1, targets_1 = generate_data(batch_size)

    model.train()
    optimizer.zero_grad()
    outputs_1 = model(inputs_1)
    loss_1 = criterion(outputs_1, targets_1)
    loss_1.backward()
    optimizer.step()
    #for var_name in optimizer.state_dict():
     #   print(var_name, "\n", optimizer.state_dict()[var_name])

    print(loss_1.item())


    print("Params after first trainig")

    for name, param in model.named_parameters():
      print(f"{name}: {param.size()}")
      print(param)

    inputs_2, targets_2 = generate_data(batch_size)

    for i in range(3):
      print("Params inside for loop before trainig")
      for name, param in model.named_parameters():
        print(f"{name}: {param.size()}")
        print(param)
      loss_2 = train_step_2(model, inputs_2, targets_2)
      print(f"Loss after stage 2: {loss_2}")
      print("Params inside for loop after trainig")
      for name, param in model.named_parameters():
        print(f"{name}: {param.size()}")
        print(param)






In [None]:
experiment()

Params before any training
fc1.weight: torch.Size([5, 10])
Parameter containing:
tensor([[-0.1150,  0.2627,  0.1874, -0.0236, -0.0262,  0.2522,  0.0796,  0.2644,
         -0.0987,  0.2446],
        [-0.2835, -0.1992,  0.2061, -0.1774, -0.0465, -0.1941,  0.0694,  0.1202,
         -0.1306,  0.2277],
        [-0.1852,  0.2010,  0.2014,  0.1824,  0.3081,  0.2519,  0.0382,  0.2854,
         -0.0019, -0.2112],
        [-0.2094,  0.0764,  0.1692,  0.2396,  0.0766,  0.0171,  0.0904, -0.1763,
         -0.2968, -0.1658],
        [-0.0653, -0.2377, -0.1779, -0.0834,  0.1997,  0.1286,  0.1425, -0.0991,
         -0.0707, -0.2052]], requires_grad=True)
fc1.bias: torch.Size([5])
Parameter containing:
tensor([-0.2835,  0.2960, -0.2915,  0.1949,  0.2395], requires_grad=True)
fc2.weight: torch.Size([1, 5])
Parameter containing:
tensor([[ 0.1337, -0.1685, -0.2931, -0.1561,  0.3518]], requires_grad=True)
fc2.bias: torch.Size([1])
Parameter containing:
tensor([-0.1959], requires_grad=True)
Params after def