![model](./images/model.png)

### Create a classes for modules

In [None]:
#class for the convolution module
class conv_block(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(conv_block, self).__init__()
        self.relu = nn.Relu() #relu
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs) # convolution
        self.batchnorm = nn.BatchNorm2d(out_channels) # batch normalization

    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        x = self.relu(x)
        return x
    
#class for the inception block
class inception_block(nn.Module):
    def __init__(self, in_channels, out_ch1, out_ch3):
        super(inception_block, self).__init__()
        #first conv module; padding should be 'same'
        self.ch1 = conv_block(in_channels=in_channels, out_channels=out_ch1, kernel_size=(3,3), stride=(1,1), padding='same')
        #second conv module; padding should be 'same' to be able to concatenate them together
        self.ch3 = conv_block(in_channels=in_channels, out_channels=out_ch3, kernel_size=(3,3), stride=(1,1), padding='same') 

    def forward(self, x):
        #concat the outputs of the convolutions / aka Merge
        return torch.cat([self.ch1(x), self.ch3(x)], 1)
    
# createclass for the downsample module
class downsample_block(nn.Module):
    def __init__(self, in_channels, conv_out):
        super(downsample_block, self).__init__()
        #conv module
        self.convblock = conv_block(in_channels, conv_out, kernel_size=(3,3), stride=(2,2))
        # max pooling
        self.maxpool = nn.MaxPool2d(kernel_size=(3,3), stride=(2,2))

    def forward(self, x):
        return torch.cat([self.convblock(x), self.maxpool(x)], 1)
    

### putting the model together 

In [None]:
class Inception(nn.Module):
    def __init__(self, in_channels=3, num_classes=10, dropout_prob=0):
        super(Inception, self).__init__()
        self.conv1 = conv_block(in_channels=3, out_channels=96, kernel_size=(3,3), stride=(1,1))

        self.inception1 = inception_block(96, 32, 32) # first part comes from the output of the previous layer
        self.inception2 = inception_block(64, 32, 48) # example 32 + 32 = 64
        self.downsample1 = downsample_block(80, 80) # 32 + 48 = 80

        self.inception3 = inception_block(160, 112, 48)
        self.inception4 = inception_block(160, 96, 64)
        self.inception5 = inception_block(160, 80, 80)
        self.inception6 = inception_block(160, 48, 96)
        self.downsample2 = downsample_block(114, 96)

        self.inception7 = inception_block(240, 176, 160)
        self.inception8 = inception_block(336, 176, 160)
        self.avgpool = nn.AvgPool2d(kernel_size=(7, 7), padding=(1,1))
        self.dropout = nn.Dropout(p = dropout_prob)
        self.fc = nn.Linear(336, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = self.downsample1(x)

        x = self.inception3(x)
        x = self.inception4(x)
        x = self.inception5(x)
        x = self.inception6(x)
        x = self.downsample2(x)

        x = self.inception7(x)
        x = self.inception8(x)
        x = self.avgpool(x)

        x = x.reshape(x.shape[0], -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x



### Model Training
Create the Model then set the parameters

In [None]:
# creating the model
model = Inception(in_channels=3, num_classes=10, dropout_prob=0).to(device) #change val for dropout prob

loss_function = nn.CrossEntropyLoss() # because of multi classification
optimizer = optim.SGD(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.LinearLR(optimizer) # scheduler controls the flow of information in batches

reate 2 functions that obtain the metric on the train set and the test set

### Train

In [None]:
def train(epoch):
    model.train() # start the training process
    curr_loss_train = 0 # initialize values
    correct_train = 0
    total_train = 0

    for ind, (data_train, true_labels_train) in enumerate(train_loader):
        data_train = data_train.to(device=device)
        true_labels_train = true_labels_train.to(device=device)

        out_train = model(data_train) # apply model to train data
        loss_train = loss_function(out_train, true_labels_train) # get loss

        optimizer.zer_grad()
        loss_train.backward()
        optimizer.step()

        curr_loss_train += loss_train.item()
        ix, predicted_train = out_train.max(1)
        correct_train += predicted_train.eq(true_labels_train).sum().item()
        total_train += true_labels_train.size(0)

        train_loss = curr_loss_train/len(train_loader) # get the loss
        acc_train_val = (correct_train/total_train)*100 # get the accuracy

        train_acc.append(acc_train_val)
        train_all_loss.append(train_loss)

### Test

In [None]:
def test(epoch):
    model.eval() # start the testing process / eval mode
    curr_loss_test = 0 # initialize values
    correct_test = 0
    total_test = 0

    num_class=10
    confusion_matrix = torch.zeros(num_class, num_class) # get the confusion matrix of the test set
    with torch.no_grad():
        for data_test, true_labels_test in test_loader:

            data_test = data_test.to(device=device)
            true_labels_test = true_labels_test.to(device=device)

            out_test = model(data_test) # apply model to train data
            loss_test = loss_function(out_test, true_labels_test) # get loss

            curr_loss_test += loss_test.item()
            ix, predicted_test = out_test.max(1)
            correct_test += predicted_test.eq(true_labels_test).sum().item()
            total_test += true_labels_test.size(0)

            for tr, pr in zip(true_labels_test.view(-1), predicted_test.view(-1)):
                confusion_matrix[tr.long(), pr.long()] += 1

            test_loss = curr_loss_test/len(test_loader) # get the loss
            acc_test_val = (correct_test/total_test)*100 # get the accuracy

            test_acc.append(acc_test_val)
            test_all_loss.append(test_loss)
            con_mats.append(confusion_matrix)