In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms, models

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import time
import copy
import os
import zipfile
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

import cv2

In [0]:
# For running in a google colaborator notebook instance
from google.colab import drive
drive.mount('/content/drive')

In [0]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history


def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False
              
            
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

In [0]:
zip_path = '/content/Data-20190928T230731Z-001.zip'
extract_directory = '/content/'
zip_ref = zipfile.ZipFile(zip_path, 'r')
zip_ref.extractall(extract_directory)

In [4]:
MODEL_NAME = "resnet"
NUM_CLASSES = 2
FEATURE_EXTRACT = True
model_ft, input_size = initialize_model(MODEL_NAME, NUM_CLASSES, FEATURE_EXTRACT, 
                                        use_pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/checkpoints/resnet18-5c106cde.pth
100%|██████████| 46827520/46827520 [00:00<00:00, 95736717.59it/s]


In [0]:
X = []
y = []

for dirpath, dirnames, filenames in os.walk('/content/Data'):
    if filenames:
        if 'NSFW' in dirpath:
            label = 0
        else:
            label = 1
        
        for filename in filenames:
            image = cv2.imread(os.path.join(dirpath, filename))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#             image = Image.open(os.path.join(dirpath, filename))
#             image = image.convert('RGB')
            X.append(image)
#         X.extend([Image.open(os.path.join(dirpath,filename))
#                        for filename in filenames])
        
        y.extend([label] * len(filenames))

In [0]:
class LewdDataset(Dataset):
    def __init__(self, X, y, transform):
        self.X = X
        self.y = y
        self.transform = transform
        
    def __getitem__(self, index):
        image = self.X[index]
        label = self.y[index]
        
        return self.transform(image), label
    
    
    def __len__(self):
        return len(self.X)

In [0]:
data_transforms = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((input_size, input_size)),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

dataset_train = LewdDataset(X_train, y_train, data_transforms['train'])
dataset_test = LewdDataset(X_test, y_test, data_transforms['val'])

BATCH_SIZE = 8
loader_train = torch.utils.data.DataLoader(dataset_train, BATCH_SIZE)
loader_test = torch.utils.data.DataLoader(dataset_test, BATCH_SIZE)

dataloaders = {
    'train': loader_train,
    'val': loader_test
}

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)
params_to_update = model_ft.parameters()
print("Params to learn:")
if FEATURE_EXTRACT:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(params_to_update)
criterion = nn.CrossEntropyLoss()

Params to learn:
	 fc.weight
	 fc.bias


In [39]:
NUM_EPOCHS = 3
model_ft, hist = train_model(model_ft, dataloaders, criterion, optimizer_ft, 
                             num_epochs=NUM_EPOCHS, is_inception=(MODEL_NAME=="inception"))

Epoch 0/2
----------
train Loss: 0.3217 Acc: 0.8653
val Loss: 0.2754 Acc: 0.8784

Epoch 1/2
----------
train Loss: 0.2982 Acc: 0.8737
val Loss: 0.2644 Acc: 0.8986

Epoch 2/2
----------
train Loss: 0.2864 Acc: 0.8814
val Loss: 0.2590 Acc: 0.9054

Training complete in 1m 38s
Best val Acc: 0.905405


In [0]:
torch.save(model_ft, '/content/resnet.pt')

In [0]:
image, label = dataset_test[0]
test_torch = torch.unsqueeze(data_transforms['train'](image), 0).to(device)
results = model_ft(test_torch)[0].detach().cpu().numpy()

In [31]:
np.argmax(results)
Image.

1