In [2]:
import torch
import torchvision
from torchvision import transforms, models
from torch import optim, cuda
from torch.utils.data import DataLoader
import torch.nn as nn

In [3]:
image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ]),
    # Validation does not use augmentation
    'val':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # Test does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [4]:
image_transforms['val']

Compose(
    Resize(size=256, interpolation=PIL.Image.BILINEAR)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [5]:
image_transforms['train']

Compose(
    RandomResizedCrop(size=(256, 256), scale=(0.8, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
    RandomRotation(degrees=(-15, 15), resample=False, expand=False)
    ColorJitter(brightness=None, contrast=None, saturation=None, hue=None)
    RandomHorizontalFlip(p=0.5)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [12]:
# model = models.resnet50(pretrained=False)
class ResnetLesion101(nn.Module):
    def __init__(self, pretrained = True):
        super(ResnetLesion101,self).__init__()
        resnet101 = models.resnet101(pretrained=pretrained)
        self.resnet101_b4fc = nn.Sequential(*list(resnet101.children())[:-2])
        self.convDM = nn.Conv2d(in_channels=2048, out_channels=1024, kernel_size=1)
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
        self.fc1 = nn.Linear(in_features= 1024, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=3)
        self.softmax = nn.Softmax(dim=1)
    def forward(self,x):
        x = self.resnet101_b4fc(x)
        print(x.shape) #torch.Size([10, 2048, 1, 1])
        x = self.convDM(x)
        x = self.avgpool(x)
        x=x.view(x.shape[0], -1)
        print(x.shape) #torch.Size([10, 2048])
        x = self.fc1(x)
        print("After fc1:", x.shape) #torch.Size([10, 500])
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [13]:
model = ResnetLesion101()

In [14]:
model

ResnetLesion101(
  (resnet101_b4fc): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (

In [20]:
inp = torch.rand([10,1,256,256])

In [17]:
model = models.resnet50(pretrained=False)

The model has an input channel of 3(pretrained) but our images are single channeled mostly.  

In [24]:
model.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(7,7))


In [25]:
model

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1):

In [26]:
out = model(inp)

In [27]:
out.shape

torch.Size([10, 1000])