In [1]:
import os
import torch
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split

In [2]:
project_name='Fake_Face_Classifier'

In [3]:
data_dir = '../input/fake-image-classification-challenge/data/train'
print(os.listdir(data_dir))
classes = os.listdir(data_dir)
print(classes)

In [4]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
import torchvision.transforms as tt

In [5]:
# I calculated this mean vector and standard deviation vector separately and couldn't run that code here because it took lot of ram and time for loading all the dataset and can't be run here with this model
stats = ((0.5305, 0.4213, 0.3636), (0.2870, 0.2547, 0.2487))

In [8]:
train_tfms = tt.Compose([tt.RandomCrop(256, padding=4, padding_mode='reflect'), 
                         tt.RandomHorizontalFlip(), 
                         tt.ToTensor()])
valid_tfms = tt.Compose([tt.ToTensor(), tt.Normalize(*stats)])

In [9]:
dataset = ImageFolder(data_dir, train_tfms)

In [10]:
# tensor([0.5305, 0.4213, 0.3636]) tensor([0.2870, 0.2547, 0.2487])

In [11]:
img, label = dataset[0]
print(img.shape, label)

In [12]:
print(dataset.classes)

In [13]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [14]:
def show_example(img, label):
    print('Label: ', dataset.classes[label], "("+str(label)+")")
    plt.imshow(img.permute(1, 2, 0))

In [15]:
show_example(*dataset[0])

In [16]:
show_example(*dataset[1099])

In [17]:
random_seed = 42
torch.manual_seed(random_seed);

In [18]:
val_size = 3600
train_size = len(dataset) - val_size

train_ds, val_ds = random_split(dataset, [train_size, val_size])
len(train_ds), len(val_ds)

In [19]:
from torch.utils.data.dataloader import DataLoader

batch_size=128

In [20]:
# train_ds = dataset

In [21]:
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=2, pin_memory=True)

In [22]:
from torchvision.utils import make_grid

def show_batch(dl):
    for images, labels in dl:
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.set_xticks([]); ax.set_yticks([])
        ax.imshow(make_grid(images, nrow=16).permute(1, 2, 0))
        break

In [23]:
show_batch(train_dl)

In [24]:
import torch.nn as nn
import torch.nn.functional as F

In [25]:
simple_model = nn.Sequential(
    nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(2, 2)
)

In [26]:
for images, labels in train_dl:
    print('images.shape:', images.shape)
    out = simple_model(images)
    print('out.shape:', out.shape)
    break

In [27]:
class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))
        
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [28]:
class Cifar10CnnModel(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 128 x 128

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 64 x 64

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 32 x 32

            nn.Flatten(), 
            nn.Linear(128*32*32, 1024),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.Dropout(0.25),
            nn.ReLU(),
            nn.Linear(512, 2))
        
    def forward(self, xb):
        return self.network(xb)

In [29]:
model = Cifar10CnnModel()
model

In [30]:
for images, labels in train_dl:
    print('images.shape:', images.shape)
    out = model(images)
    print('out.shape:', out.shape)
    print('out[0]:', out[0])
    break

In [31]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [32]:
device = get_default_device()
device

In [33]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
to_device(model, device);

In [34]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader,val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        countb=0
        for batch in train_loader:
            countb+=1
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            if(countb%10==0):
                print(countb)
            
#         Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [35]:
model = to_device(Cifar10CnnModel(), device)

In [36]:
evaluate(model, val_dl)

In [37]:
num_epochs = 50
opt_func = torch.optim.Adam
lr = 0.001

In [38]:
history = fit(num_epochs, lr, model, train_dl,val_dl, opt_func)

In [59]:
evaluate(model, val_dl)

In [40]:
def plot_accuracies(history):
    accuracies = [x['val_acc'] for x in history]
    plt.plot(accuracies, '-x')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs');

In [41]:
plot_accuracies(history)

In [42]:
test_dataset = ImageFolder('../input/test-out/test_out', valid_tfms)

In [43]:
import numpy as np
import cv2 as cv

In [44]:
data_dir = '../input/test-out/test_out'
test_dataset = ImageFolder(data_dir,train_tfms)
show_example(*dataset[0])

In [45]:
# test = []
# dir = r'../input/fake-image-classification-challenge/data/test'
# for img in os.listdir(dir):
#     path = os.path.join(dir,img)
#     img = cv.imread(path)
#     test.append(np.array(img))

In [46]:
# test = np.asarray(test)

In [47]:
# test = torch.as_tensor(np.array(test))

In [48]:
def predict_image(img, model):
    # Convert to a batch of 1
    xb = to_device(img.unsqueeze(0), device)
    # Get predictions from model
    yb = model(xb)
    # Pick index with highest probability
    _, preds  = torch.max(yb, dim=1)
    # Retrieve the class label
    return dataset.classes[preds[0].item()]

In [49]:
# img = test[0]
# plt.imshow(img.permute(1, 2, 0))
# print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [50]:
img, label = test_dataset[0]
type(img)
plt.imshow(img.permute(1, 2, 0))
print('Label:', dataset.classes[label], ', Predicted:', predict_image(img, model))

In [51]:
import pandas as pd

In [52]:
cols = ['path','label']
df = pd.DataFrame(columns = cols )

In [53]:
def pg(n):
    if(n<10):
        return '0000'+str(n)
    if(n<100):
        return '000'+str(n)
    if(n<1000):
        return '00'+str(n)
    if(n<10000):
        return '0'+str(n)
    return str(n)

In [54]:
cnt=0
str1 = 'data/test/'
str2='.jpg'
# for img,label in test_dataset:
#     df.loc[0]

In [55]:
for img,label in test_dataset:
    i = predict_image(img,model)
    df = df.append(pd.Series({"path":str1+pg(cnt)+str2 , "label": i }),ignore_index=True)
    cnt+=1

In [56]:
df

In [57]:
df.to_csv('CNNE10_NOVAL.csv')