In [234]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [235]:
import os

In [236]:
import torch
from torchvision import datasets, models, transforms
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torchvision
from torch.utils.data import random_split, SubsetRandomSampler

In [237]:
torch.__version__

'1.10.1'

In [238]:
#Retrieve data and create data generators

#Make these the root directory of your model and where the data will be stored in subdirectories, respectively.
root = ''
datadir = root + '/data'

MIN_VALID_IMG_DIM = 100
IMG_CROP_SIZE = 224

def is_valid_file(path):
    try:
        img = Image.open(path)
        img.verify()
    except:
        return False
    
    if not(img.height >= MIN_VALID_IMG_DIM and img.width >= MIN_VALID_IMG_DIM):
        return False

    return True

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

data_transform = {
    'all':
    transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ])
}

dataset = torchvision.datasets.ImageFolder(datadir, data_transform['all'])

new_dataset = []
for idx in range(len(dataset)):
    try:
        new_dataset.append(dataset[idx])
    except Exception as e:
        print(idx)
        
dataset = new_dataset

def get_subset(indices, start, end):
    return indices[start : start + end]

TRAIN_PCT, TEST_PCT = 0.8, 0.2
count = {}
count['train'] = int(len(dataset) * TRAIN_PCT)
count['test'] = int(len(dataset) * TEST_PCT)

indices = torch.randperm(len(dataset))

train_indices = get_subset(indices, 0, count['train'])
test_indices = get_subset(indices, count['train'], count['test'])

dataloaders = {
    "train": torch.utils.data.DataLoader(
        dataset, sampler=SubsetRandomSampler(train_indices), batch_size=32, num_workers=0
    ),
    "test": torch.utils.data.DataLoader(
        dataset, sampler=SubsetRandomSampler(test_indices), batch_size=32, num_workers=0
    ),
}

65
261
355
450
588
637
721
724
858
859
965
972
1037
1058
1133
1155
1203
1292
1414
1794
1922
2098
2163
2332
2353
2376
2827
2831
2849
2868
2879
3029
3046
3508
3824
4020
4069
4274
4284
4290
4378
4979
5568
5609
5665
5801
5829
6191
6229
6240
6315
6330
6483
6621
6758
6788
6903
6929
6941
7066
7161
7466
7603
7641
7715
7872
8102
8203
8380
8611
8686
8832
8837
9463
10074
10257
10287
10563
11959
12046
12072
14978
22729
25514
26822
27955
27979
29023


In [239]:
#Creating a neural network

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

model = models.resnet50(pretrained=True).to(device)

for param in model.parameters():
    param.requires_grad = False
    
model.fc = nn.Sequential(
    nn.Linear(2048, 128),
    nn.ReLU(inplace=True),
    nn.Linear(128,2)).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters())

In [240]:
def train_model(model, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)
        
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0.0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                _, preds = torch.max(outputs, 1)
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / count[phase]
            epoch_acc = running_corrects.double() / count[phase]

            print('{} loss: {:.4f}, acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
        
    return model

In [241]:
model_trained = train_model(model, criterion, optimizer, num_epochs=3)

Epoch 1/3
----------
train loss: 0.6119, acc: 0.6621
test loss: 0.5923, acc: 0.6794
Epoch 2/3
----------
train loss: 0.5902, acc: 0.6808
test loss: 0.5837, acc: 0.6844
Epoch 3/3
----------
train loss: 0.5841, acc: 0.6858
test loss: 0.5936, acc: 0.6779


In [263]:
#Confusion matrix

nb_classes = 2

confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
    for i, (inputs, classes) in enumerate(dataloaders['test']):
        inputs = inputs.to(device)
        classes = classes.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        for t, p in zip(classes.view(-1), preds.view(-1)):
                confusion_matrix[t.long(), p.long()] += 1

print(confusion_matrix)

KeyboardInterrupt: 

In [243]:
#Create directory for model, then save and load.

py_dir = root + 'models/pytorch'


if not os.path.exists(py_dir):
    os.makedirs(py_dir)
    

In [244]:
torch.save(model_trained.state_dict(), py_dir + '/photoshop_detection_weights.h5')

In [245]:
torch.__version__

'1.10.1'

In [258]:
model = models.resnet50(pretrained=False).to(device)
model.fc = nn.Sequential(
    nn.Linear(2048, 128),
    nn.ReLU(inplace=True),
    nn.Linear(128,2)).to(device)
model.load_state_dict(torch.load(py_dir + '/weights.h5'))

<All keys matched successfully>

In [259]:
#Test out the model with some validation examples.
val_dir = root + '/validation'
validation_img_paths = [val_dir + '/test_1.jpeg',
                        val_dir + '/test_2.jpeg']

img_list = [Image.open(img_path) for img_path in validation_img_paths]

In [260]:
validation_batch = torch.stack([data_transform['all'](img).to(device) for img in img_list])

In [261]:
pred_logits_tensor = model(validation_batch)
pred_logits_tensor

tensor([[ 0.3211,  0.0995],
        [ 0.4019,  0.0086],
        [ 0.5970, -0.1118],
        [ 0.2953,  0.1798],
        [-0.0910,  0.4441]], grad_fn=<AddmmBackward0>)

In [262]:
pred_probs = F.softmax(pred_logits_tensor, dim=1).cpu().data.numpy()
pred_probs

array([[0.55518055, 0.44481948],
       [0.5970713 , 0.40292868],
       [0.6701425 , 0.3298576 ],
       [0.5288291 , 0.4711708 ],
       [0.36931357, 0.63068646]], dtype=float32)