# Transfer Learning

In [2]:
# Solve Real ML Problems with limited resources! Applying existing trained ML model to a new but related problem

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import models, transforms

In [4]:
#Defining Batch size:
batch_size = 50

In [5]:
# Transforming the training data. because Cifar 10 data is 32x32 we need to resize it to 224x224 for imageNET
# We use mean and staandard deviation of the original data set because that is what the network expects

train_data_transform = transforms.Compose([
                                            transforms.Resize(224),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.RandomVerticalFlip(),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224,0.225])
                                            ])

In [6]:
#Downloading the training set, that is why train=True
train_set = torchvision.datasets.CIFAR10(root='./data',train=True,download=True,transform=train_data_transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100.0%

Extracting ./data/cifar-10-python.tar.gz to ./data


In [8]:
#shuffle the training set
train_loader = torch.utils.data.DataLoader(train_set,batch_size=batch_size,shuffle=True,num_workers=2)

In [9]:
#sAME STEPS WITH THE TESTING SET
val_data_transform = transforms.Compose([
                                        transforms.Resize(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224,0.225])
                                        ])
#This downloads the dataset and transforms it.
val_set = torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=val_data_transform)
val_order = torch.utils.data.DataLoader(val_set,batch_size=batch_size,shuffle=False,num_workers=2)

Files already downloaded and verified


In [11]:
#Choose GPU or else the CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
#In Pytorch we have to iterate of the data manually and applies the model manually after each forward pass

device(type='cuda', index=0)

In [16]:
def train_model(model, loss_function, optimizer, data_loader):
    model.train()
    current_loss=0.0
    current_acc=0
    #iterate ofver the training data
    for i,(inputs, labels) in enumerate(data_loader):
        inputs=inputs.to(device)
        labels=labels.to(device)
        #Set gradient to 0
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            #forward (y predicted)
            outputs=model(inputs)
            # Selects the output with the largest probability
            _, predictions=torch.max(outputs,1)
            #Calculates the loss for that particular image between the label and the prediction
            loss = loss_function(outputs, labels)
            # backward, calculate the gradient
            loss.backward()
            #Update the weigths
            optimizer.step()
        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)
    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)
    print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss,total_acc))   
    

In [17]:
def test_model(model, loss_function, data_loader):
    # set model in evaluation mode
    model.eval()
    current_loss = 0.0
    current_acc = 0
    # iterate over the validation data
    for i, (inputs, labels) in enumerate(data_loader):
    # send the input/labels to the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
    # forward
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = loss_function(outputs, labels)
        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)
    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)
    print('Test Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss,total_acc))

In [18]:
def tl_feature_extractor(epochs=3):
    # load the pre-trained model
    model = torchvision.models.resnet18(pretrained=True)
    # exclude existing parameters from backward pass
    # for performance
    for param in model.parameters():
        param.requires_grad = False
    # newly constructed layers have requires_grad=True by default
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 10)
    # transfer to GPU (if available)
    model = model.to(device)
    loss_function = nn.CrossEntropyLoss()
    # only parameters of the final layer are being optimized
    optimizer = optim.Adam(model.fc.parameters())
    # train
    for epoch in range(epochs):
        print('Epoch {}/{}'.format(epoch + 1, epochs))
        train_model(model, loss_function, optimizer, train_loader)
        test_model(model, loss_function, val_order)

In [19]:
def tl_fine_tuning(epochs=3):
    # load the pre-trained model
    model = models.resnet18(pretrained=True)
    # replace the last layer
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 10)
    # transfer the model to the GPU
    model = model.to(device)
    # loss function
    loss_function = nn.CrossEntropyLoss()
    # We'll optimize all parameters
    optimizer = optim.Adam(model.parameters())
    # train
    for epoch in range(epochs):
        print('Epoch {}/{}'.format(epoch + 1, epochs))
        train_model(model, loss_function, optimizer, train_loader)
        test_model(model, loss_function, val_order)

In [20]:
tl_fine_tuning(epochs=5)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/rviccinig/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
52.5%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100.0%


Epoch 1/5
Train Loss: 0.8051; Accuracy: 0.7165
Test Loss: 0.8161; Accuracy: 0.7127
Epoch 2/5
Train Loss: 0.5263; Accuracy: 0.8196
Test Loss: 0.4817; Accuracy: 0.8355
Epoch 3/5
Train Loss: 0.4263; Accuracy: 0.8526
Test Loss: 0.4257; Accuracy: 0.8535
Epoch 4/5
Train Loss: 0.3636; Accuracy: 0.8764
Test Loss: 0.4307; Accuracy: 0.8550
Epoch 5/5
Train Loss: 0.3207; Accuracy: 0.8891
Test Loss: 0.3833; Accuracy: 0.8725
