In [52]:
import torch.nn as nn
import torch.nn.functional as F

class DiseaseAndTypeClassifier(nn.Module):
    def __init__(self,tmodel):
        '''
        tmodel: pretrained model
        ex:
        model_name='resnet26d'
        tmodel=timm.create_model(model_name, pretrained=True)
        m1=DiseaseAndTypeClassifier(tmodel)
        
        '''
        super().__init__()
        self.m = tmodel
        
        #do this if you want to replace head of self.m
        #get model head
        # self.
#         h=list(self.m.named_modules())[-1]
#         print(h[0])
#         h[1]=nn.Linear(in_features=self.m.get_classifier().in_features,out_features=99, bias=False)
#         print(list(self.m.named_modules())[-1])
        
        self.s=nn.Sequential(
            #do this if you want to add a head that matches the output size of self.m
            nn.Linear(in_features=self.m.get_classifier().out_features,out_features=512, bias=False),                
            nn.ReLU())
        self.l1=nn.Linear(in_features=512, out_features=10, bias=False)  #rice type
        self.l2=nn.Linear(in_features=512, out_features=10, bias=False)  #disease
        
    def forward(self,x): 
        x=self.m(x)
        x=self.s(x)     
        label=self.l1(x)  #disease type
        variety=self.l2(x)  #variety
        return label,variety

    
class smallm(nn.Module):
    def __init__(self):
        super().__init__()
        #linear network expects linear input
        self.flatten=nn.Flatten()
        self.l1=nn.Sequential(
            nn.Linear(in_features=256,out_features=512, bias=False),
            nn.ReLU())
        self.l2=nn.Sequential(
            nn.Linear(in_features=512,out_features=512, bias=False),
            nn.ReLU())
        # self.head=nn.Sequential(
        #     nn.Linear(in_features=512,out_features=10, bias=False),
        #     nn.ReLU())        
        self.head=nn.Linear(in_features=512,out_features=10, bias=False)
    def forward(self,x):
        x=self.flatten(x)
        x=self.l1(x)
        x=self.l2(x)
        x=self.l2(x)
        out0=self.head(x)
        out1=self.head(x)
        return out0,out1
    
    def get_classifier(self):
        #you can see what type it is and input and output params
        return self.head
        

# m_in=nn.Sequential(
#             nn.Linear(in_features=self.m.get_classifier().in_features,out_features=512, bias=False),
#             nn.ReLU())
sm=smallm()
m1=DiseaseAndTypeClassifier(sm)


In [53]:
smallm()

smallm(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (l1): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=False)
    (1): ReLU()
  )
  (l2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=False)
    (1): ReLU()
  )
  (head): Linear(in_features=512, out_features=10, bias=False)
)

In [57]:
# list(smallm().modules())
print(smallm())

smallm(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (l1): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=False)
    (1): ReLU()
  )
  (l2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=False)
    (1): ReLU()
  )
  (head): Linear(in_features=512, out_features=10, bias=False)
)


In [41]:
#want to remove the head?  Works with forward as well (head, last sequential is removed from it)
#but it then becomes a nn.Sequential object, you lose all the stuff that made it a smallm model
s=smallm()
s=nn.Sequential(*list(s.children())[:-1])
s

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=False)
    (1): ReLU()
  )
  (2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=False)
    (1): ReLU()
  )
)

In [42]:
for child in s.children():
    for param in child.parameters():
        print(param.requires_grad)
        print(param)

True
Parameter containing:
tensor([[ 0.0503, -0.0471, -0.0006,  ..., -0.0281, -0.0501, -0.0057],
        [-0.0165,  0.0101, -0.0625,  ...,  0.0080,  0.0299,  0.0152],
        [ 0.0180, -0.0514,  0.0101,  ..., -0.0426, -0.0405,  0.0009],
        ...,
        [ 0.0106, -0.0184,  0.0349,  ..., -0.0269, -0.0406,  0.0212],
        [ 0.0330, -0.0154,  0.0414,  ..., -0.0400, -0.0016,  0.0080],
        [-0.0512,  0.0574,  0.0174,  ..., -0.0257,  0.0127,  0.0517]],
       requires_grad=True)
True
Parameter containing:
tensor([[-0.0264, -0.0250, -0.0241,  ..., -0.0013,  0.0173, -0.0113],
        [-0.0194, -0.0440,  0.0262,  ..., -0.0091, -0.0402,  0.0085],
        [ 0.0290,  0.0323,  0.0216,  ..., -0.0187,  0.0417, -0.0074],
        ...,
        [-0.0296,  0.0103, -0.0433,  ...,  0.0175,  0.0436,  0.0197],
        [-0.0322,  0.0368,  0.0085,  ...,  0.0334,  0.0063,  0.0175],
        [ 0.0284,  0.0135, -0.0128,  ..., -0.0358,  0.0158, -0.0366]],
       requires_grad=True)


In [29]:
for name, param in s.named_parameters():
    print(name, param.requires_grad)

1.0.weight True
2.0.weight True


In [13]:
for child in s.children():
    for param in child.parameters():
        param.requires_grad = True

In [4]:
import timm
tmodel=timm.create_model('resnet26d', pretrained=True, num_classes=512,global_pool='catavgmax') 
print(tmodel.get_classifier().in_features)

4096


In [83]:
#strip the head and convert to a Sequential
nm=nn.Sequential(*list(tmodel.children())[:-1])

In [84]:
nm.get_classifier().in_features

AttributeError: 'Sequential' object has no attribute 'get_classifier'

In [1]:
#get all the parameters for the l1 layer
p=list(m1.l2.parameters())

len(p[0])
p[0][9]

type(p)
p[0].requires_grad

NameError: name 'm1' is not defined

In [39]:
p2=list(m1.m.head.parameters())
p2

[Parameter containing:
 tensor([[ 0.1117,  0.0419, -0.0875,  ..., -0.0762,  0.0960,  0.0056],
         [ 0.0098,  0.0295, -0.0530,  ..., -0.0715,  0.1092, -0.0794],
         [ 0.0860,  0.1053,  0.0559,  ..., -0.0113,  0.0444, -0.1116],
         ...,
         [-0.0368,  0.0990,  0.0697,  ...,  0.0029,  0.0611, -0.0217],
         [ 0.0424, -0.0100,  0.0332,  ..., -0.0335, -0.0245, -0.1022],
         [-0.0782, -0.0470,  0.0407,  ...,  0.0659, -0.0937,  0.0376]],
        requires_grad=True)]

In [43]:
for child in m1.children():
    print('FOUND1')
    print(child)

FOUND1
smallm(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (l1): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=False)
    (1): ReLU()
  )
  (l2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=False)
    (1): ReLU()
  )
  (head): Sequential(
    (0): Linear(in_features=76, out_features=99, bias=False)
    (1): ReLU()
  )
)
FOUND1
Sequential(
  (0): Linear(in_features=99, out_features=512, bias=False)
  (1): ReLU()
)
FOUND1
Linear(in_features=512, out_features=10, bias=False)
FOUND1
Linear(in_features=512, out_features=10, bias=False)


In [6]:
m1.m.fc=nn.Linear(in_features=512,out_features=512)

In [7]:
print(m1)

DiseaseAndTypeClassifier(
  (m): smallm(
    (l1): Sequential(
      (0): Linear(in_features=10, out_features=512, bias=False)
      (1): ReLU()
    )
    (l12): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=False)
      (1): ReLU()
    )
    (fc): Linear(in_features=512, out_features=512, bias=True)
  )
  (rl): ReLU()
  (l1): Linear(in_features=512, out_features=10, bias=False)
  (l2): Linear(in_features=512, out_features=10, bias=False)
)


In [None]:
input_image = torch.rand(3,28,28)

In [38]:
sm=smallm()
print(sm)

smallm(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (l1): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=False)
    (1): ReLU()
  )
  (l2): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=False)
    (1): ReLU()
  )
  (fc): Linear(in_features=512, out_features=256, bias=False)
)


In [22]:
import torch
# Use .detach() to get x2 without grad and the whole computing graph stays same.
lin0 = nn.Linear(2, 2)
lin1 = nn.Linear(2, 2)
x1 = torch.randn(2, 2)
x2 = lin0(x1)
x3 = lin1(x2)
output = x2.detach()
print(x2)
print(output)
print(x3)

tensor([[ 0.3884,  1.0749],
        [-0.2458, -0.2331]], grad_fn=<AddmmBackward0>)
tensor([[ 0.3884,  1.0749],
        [-0.2458, -0.2331]])
tensor([[ 0.4868, -0.2339],
        [ 0.1149, -0.1691]], grad_fn=<AddmmBackward0>)


In [23]:
# You can use output as part of other graphs.
x4 = lin1(output)
x4.sum().backward()
print(lin0.weight.grad, lin1.weight.grad)
# The backward of this new graph doesn't affect the initial graph. That's why lin0.weight.grad is None.


None tensor([[0.1426, 0.8418],
        [0.1426, 0.8418]])


In [24]:
# The output variable of a frozen model keeps the gradient tracking and you are able to backpropogate through it. 
# However, it doesn't affect the gradient. This is a good proprety that we want.
lin0 = nn.Linear(2, 2)
lin1 = nn.Linear(2, 2)
lin2 = nn.Linear(2, 2)
x3 = lin0(x2)
for param in lin1.parameters():
    param.requires_grad = False
x4 = lin1(x3)
x5 = lin2(x4)
x5.sum().backward()
print(lin0.weight.grad, lin1.weight.grad, lin2.weight.grad)

tensor([[ 0.1457,  0.8600],
        [-0.1082, -0.6386]]) None tensor([[ 0.1352, -1.5110],
        [ 0.1352, -1.5110]])


In [6]:
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(2*3, 8),
            nn.ReLU(),
            nn.Linear(8, 8),
            nn.ReLU()
        )
        self.h1=nn.Linear(8, 3)
        self.h2=nn.Linear(8, 5)

    def forward(self, x):
        x = self.flatten(x)
        x = self.linear_relu_stack(x)
        x = self.linear_relu_stack(x)
        o1=self.h1(x)
        o2=self.h2(x)
        return o1,o2

In [7]:
s1=NeuralNetwork()
for child in s1.children():
    for param in child.parameters():
        print(param)

Parameter containing:
tensor([[ 0.1545,  0.3736, -0.3435,  0.0399,  0.1408, -0.0583],
        [ 0.3021, -0.3993,  0.2122, -0.1926, -0.2593,  0.0273],
        [ 0.3910,  0.2362, -0.2592, -0.3741,  0.1457, -0.0029],
        [ 0.0457,  0.3631,  0.1568, -0.2177,  0.1282,  0.1381],
        [-0.3662, -0.0367, -0.1060,  0.1888,  0.0237,  0.2178],
        [-0.0620, -0.1117,  0.3076,  0.2571,  0.0704,  0.2905],
        [-0.1825,  0.3836, -0.2615,  0.3017, -0.2123, -0.1978],
        [-0.2592,  0.0386, -0.2730, -0.2817, -0.2072,  0.1349]],
       requires_grad=True)
Parameter containing:
tensor([-0.1503, -0.0760,  0.1067,  0.1963,  0.2854,  0.2776, -0.0633, -0.0066],
       requires_grad=True)
Parameter containing:
tensor([[-2.6617e-01,  1.0101e-01,  7.0075e-02, -1.2686e-01,  1.7857e-01,
          9.1110e-02, -3.1292e-01, -1.8995e-01],
        [ 1.4165e-01,  2.6779e-03, -2.5487e-02,  3.0827e-01,  1.5819e-01,
         -2.4409e-01,  8.3514e-02, -2.6360e-01],
        [ 2.2916e-01,  3.7602e-02, -2.70

In [8]:
s1.h1

Linear(in_features=8, out_features=3, bias=True)

In [9]:
#changes h1
s1.h1=nn.Linear(30, 20)

In [14]:
s1

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=6, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=8, bias=True)
    (3): ReLU()
  )
  (h1): Linear(in_features=30, out_features=20, bias=True)
  (h2): Linear(in_features=8, out_features=5, bias=True)
)

In [11]:
for name, param in s1.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Layer: linear_relu_stack.0.weight | Size: torch.Size([8, 6]) | Values : tensor([[ 0.1545,  0.3736, -0.3435,  0.0399,  0.1408, -0.0583],
        [ 0.3021, -0.3993,  0.2122, -0.1926, -0.2593,  0.0273]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([8]) | Values : tensor([-0.1503, -0.0760], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([8, 8]) | Values : tensor([[-0.2662,  0.1010,  0.0701, -0.1269,  0.1786,  0.0911, -0.3129, -0.1900],
        [ 0.1416,  0.0027, -0.0255,  0.3083,  0.1582, -0.2441,  0.0835, -0.2636]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([8]) | Values : tensor([-0.2817,  0.3031], grad_fn=<SliceBackward0>) 

Layer: h1.weight | Size: torch.Size([20, 30]) | Values : tensor([[-0.1648, -0.1640, -0.0968,  0.0409, -0.0515, -0.0127,  0.0749,  0.0308,
          0.0458,  0.0718, -0.1257, -0.0869, -0.1731,  0.1237, -0.1215, -0.1805,
          0.1117, -0.0183,  0

In [13]:
print(list(s1.named_parameters()))

[('linear_relu_stack.0.weight', Parameter containing:
tensor([[ 0.1545,  0.3736, -0.3435,  0.0399,  0.1408, -0.0583],
        [ 0.3021, -0.3993,  0.2122, -0.1926, -0.2593,  0.0273],
        [ 0.3910,  0.2362, -0.2592, -0.3741,  0.1457, -0.0029],
        [ 0.0457,  0.3631,  0.1568, -0.2177,  0.1282,  0.1381],
        [-0.3662, -0.0367, -0.1060,  0.1888,  0.0237,  0.2178],
        [-0.0620, -0.1117,  0.3076,  0.2571,  0.0704,  0.2905],
        [-0.1825,  0.3836, -0.2615,  0.3017, -0.2123, -0.1978],
        [-0.2592,  0.0386, -0.2730, -0.2817, -0.2072,  0.1349]],
       requires_grad=True)), ('linear_relu_stack.0.bias', Parameter containing:
tensor([-0.1503, -0.0760,  0.1067,  0.1963,  0.2854,  0.2776, -0.0633, -0.0066],
       requires_grad=True)), ('linear_relu_stack.2.weight', Parameter containing:
tensor([[-2.6617e-01,  1.0101e-01,  7.0075e-02, -1.2686e-01,  1.7857e-01,
          9.1110e-02, -3.1292e-01, -1.8995e-01],
        [ 1.4165e-01,  2.6779e-03, -2.5487e-02,  3.0827e-01,  1.581