# GoogLeNet V1

<img src="imgs/inceptionv1.png">

<img src="imgs/googlenetv1.png">

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [36]:
class InceptionV1(nn.Module):
    def __init__(self, in1, out1, mid2, out2, mid3, out3, out4, auxiliary=False):
        super(InceptionV1, self).__init__()
        # 1x1 conv
        self.conv1 = nn.Conv2d(in_channels=in1, out_channels=out1, kernel_size=1)
        self.activate1 = nn.ReLU(inplace=True)
        self.layer1 = nn.Sequential(self.conv1, self.activate1)
        
        # 1x1 conv -> 3x3 conv
        self.conv2_1 = nn.Conv2d(in_channels=in1, out_channels=mid2, kernel_size=1)
        self.activate2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(in_channels=mid2, out_channels=out2, kernel_size=3, padding=1)
        self.activate2_2 = nn.ReLU(inplace=True)
        self.layer2 = nn.Sequential(self.conv2_1, self.activate2_1, self.conv2_2, self.activate2_2)
        
         # 1x1 conv -> 5x5 conv
        self.conv3_1 = nn.Conv2d(in_channels=in1, out_channels=mid3, kernel_size=1)
        self.activate3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(in_channels=mid3, out_channels=out3, kernel_size=5, padding=2)
        self.activate3_2 = nn.ReLU(inplace=True)
        self.layer3 = nn.Sequential(self.conv3_1, self.activate3_1, self.conv3_2, self.activate3_2)
        
        # 3x3 pool -> 1x1 conv 
        self.pool4 = nn.MaxPool2d(kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=in1, out_channels=out4, kernel_size=1)
        self.activate4 = nn.ReLU(inplace=True)
        self.layer4 = nn.Sequential(self.conv4, self.activate4)
        
        self.auxiliary = auxiliary
        
        if auxiliary:
            self.pool5 = nn.AvgPool2d(kernel_size=5, stride=3)
            self.conv5 = nn.Conv2d(in_channels=in1, out_channels=128, kernel_size=1)
            self.activate5 = nn.ReLU(inplace=True)
            self.auxiliary_layer = nn.Sequential(self.pool5, self.conv5, self.activate5)
        
    def forward(self, x, train=False):
        out1 = self.layer1(x)
        out2 = self.layer2(x)
        out3 = self.layer3(x)
        out4 = self.layer4(x)
        output1 = torch.cat([out1,out2,out3,out4],1) # depth-wise concat
        
        if self.auxiliary:
            if train:
                output2= self.auxiliary_layer(inputs)
            else:
                output2 = None
            return output1, output2
        else:
            return output1
        

In [37]:
class GoogLeNetV1(nn.Module):
    def __init__(self):
        super(GoogLeNetV1, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64,kernel_size=(7,7), stride=2, padding=3)
        self.activate1 = nn.ReLU(inplace=True)
        self.layer1 = nn.Sequential(self.conv1, self.activate1)
        
        self.maxpool2 = nn.MaxPool2d(kernel_size=(3,3), stride=2, padding=1)
        self.activate2 = nn.ReLU(inplace=True)
        self.pool2 = nn.Sequential(self.maxpool2, self.activate2)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=192,kernel_size=(3,3), padding=1)
        self.activate3 = nn.ReLU(inplace=True)
        self.layer3 = nn.Sequential(self.conv3, self.activate3)
        
        self.maxpool4 = nn.MaxPool2d(kernel_size=(3,3), stride=2, padding=1)
        self.activate4 = nn.ReLU(inplace=True)
        self.pool4 = nn.Sequential(self.maxpool4, self.activate4)
        
        self.layer5 = InceptionV1(192,  64,  96, 128, 16, 32, 32)
        
        self.layer6 = InceptionV1(256, 128, 128, 192, 32, 96, 64)

        self.pool7 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer8 = InceptionV1(480, 192,  96, 208, 16,  48,  64)
        self.layer9 = InceptionV1(512, 160, 112, 224, 24,  64,  64)
        self.layer10 = InceptionV1(512, 128, 128, 256, 24,  64,  64)
        self.layer11 = InceptionV1(512, 112, 144, 288, 32,  64,  64)
        self.layer12 = InceptionV1(528, 256, 160, 320, 32, 128, 128)
        
        self.maxpool13 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.pool13 = nn.Sequential(self.maxpool13)

        self.layer14 = InceptionV1(832, 256, 160, 320, 32, 128, 128)
        self.layer15 = InceptionV1(832, 384, 192, 384, 48, 128, 128)

        self.pool16 = nn.AvgPool2d(kernel_size=7, stride=1)
        
        self.dropout17 = nn.Dropout(0.4)
        
        self.linear18 = nn.Linear(1024, 1000)

    def forward(self, x):
        network = self.layer1(x)
        print('layer1 ',network.size())
        network = self.pool2(network)
        print('pool2 ',network.size())
        network = self.layer3(network)
        print('layer3 ',network.size())
        network = self.pool4(network)
        print('pool4 ',network.size())
        network = self.layer5(network)
        print('layer5 ',network.size())
        network = self.layer6(network)
        print('layer6 ',network.size())
        network = self.pool7(network)
        print('pool7 ',network.size())
        network = self.layer8(network)
        print('layer8 ',network.size())
        network = self.layer9(network)
        print('layer9 ',network.size())
        network = self.layer10(network)
        print('layer10 ',network.size())
        network = self.layer11(network)
        print('layer11 ',network.size())
        network = self.layer12(network)
        print('layer12 ',network.size())
        network = self.pool13(network)
        print('pool13 ',network.size())
        network = self.layer14(network)
        print('layer14 ',network.size())
        network = self.layer15(network)
        print('layer15 ',network.size())
        network = self.pool16(network)
        print('pool16 ',network.size())
        network = self.dropout17(network)
        print('dropout17 ',network.size())
        network = network.view(network.size(0), -1)
        out = self.linear18(network)
        print('layer18 ',network.size())
        return out, network

In [38]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_data = torch.randn(1,3,224,224)
net = GoogLeNetV1()
net.to(device)
input_data = input_data.to(device)
output, network = net(input_data)
print(output.size())

layer1  torch.Size([1, 64, 112, 112])
pool2  torch.Size([1, 64, 56, 56])
layer3  torch.Size([1, 192, 56, 56])
pool4  torch.Size([1, 192, 28, 28])
layer5  torch.Size([1, 256, 28, 28])
layer6  torch.Size([1, 480, 28, 28])
pool7  torch.Size([1, 480, 14, 14])
layer8  torch.Size([1, 512, 14, 14])
layer9  torch.Size([1, 512, 14, 14])
layer10  torch.Size([1, 512, 14, 14])
layer11  torch.Size([1, 528, 14, 14])
layer12  torch.Size([1, 832, 14, 14])
pool13  torch.Size([1, 832, 7, 7])
layer14  torch.Size([1, 832, 7, 7])
layer15  torch.Size([1, 1024, 7, 7])
pool16  torch.Size([1, 1024, 1, 1])
dropout17  torch.Size([1, 1024, 1, 1])
layer18  torch.Size([1, 1024])
torch.Size([1, 1000])
