In [None]:
import numpy as np
import pandas as pd
import os
from skimage.io import imread
import torch
import torch.nn as nn

In [2]:
def build_dataset(file_path):
    X, Y = [], []
    
    for dirname, _, filenames in os.walk(file_path):
        for filename in filenames:
            X.append(imread(os.path.join(dirname, filename)))
            if dirname == file_path + "/FAKE":
                Y.append(1)
            else:
                Y.append(0)
                
    X = np.array(X, dtype = np.float32)
    Y = np.array(Y)
    
    X = torch.tensor(X, dtype = torch.float32).reshape(-1, 3, 32, 32).to(device)
    Y = torch.tensor(Y).to(device)
    
    return (X, Y)

In [24]:
def train(network, total_steps = 10000, learning_rate = 1e-3, batch_size = 64):
    criterion_train = nn.CrossEntropyLoss()
    optimizer_train = torch.optim.Adam(network.parameters(), lr = learning_rate)
    for i in range(total_steps):
        ix = torch.randint(0, Xtr.shape[0], (batch_size,))

        Xb = Xtr[ix]
        Yb = Ytr[ix]

        logits = network(Xb)
        loss_train = criterion_train(logits, Yb)

        optimizer_train.zero_grad(set_to_none = True)
        loss_train.backward()
        optimizer_train.step()

        if (i + 1) % 100 == 0:
            print("Step [{}/{}], Loss: {:.4f}".format(i + 1, total_steps, loss_train.item()))

In [4]:
with torch.no_grad():
    def split_loss(network, split):
        
        x, y = {
            'train': (Xtr, Ytr),
            'test': (Xte, Yte)
        }[split]
        
        logits = network(x)
        loss = criterion(logits, y)
        
        print(split, loss.item())

In [5]:
with torch.no_grad():
    def get_test_accuracy(network):
        network.eval()
        correct = 0
        total = 0
        for image, label in zip(Xte, Yte):
            logits = network(image.reshape(1, -1))
            predicted = torch.argmax(logits)
            total += 1
            correct += (predicted == label).sum().item()
        print((correct / total) * 100, correct, total)
        network.train()

In [17]:
class CNN3D(nn.Module):
    
    def __init__(self):
        super(CNN3D, self).__init__()
        
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        # (1, 3, 32, 32)
        self.conv1 = nn.Conv3d(1, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 1, 30, 30)
        self.bn1 = nn.BatchNorm3d(32)
        
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 28, 28)
        self.bn2 = nn.BatchNorm2d(32)
        
        self.conv3 = nn.Conv2d(32, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 26, 26)
        self.bn3 = nn.BatchNorm2d(32)
        
        self.fc1 = nn.Linear(32 * 26 * 26, 4 * n_hidden)
        self.ln_fc1 = nn.LayerNorm(4 * n_hidden)
        
        self.fc2 = nn.Linear(4 * n_hidden, n_hidden)
        self.ln_fc2 = nn.LayerNorm(n_hidden)
        
        self.fc3 = nn.Linear(n_hidden, num_classes)

    def forward(self, x):
        x = x.reshape(-1, 1, 3, 32, 32)
        x = self.relu(self.bn1(self.conv1(x)))
        x = x.reshape(-1, 32, 30, 30)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        
        x = self.flatten(x)
        
        x = self.relu(self.ln_fc1(self.fc1(x)))
        x = self.relu(self.ln_fc2(self.fc2(x)))
        x = self.fc3(x)
        
        return x

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64
total_steps = 2000
learning_rate = 3e-4
n_hidden = 512
num_classes = 2

device # cuda

device(type='cuda')

In [19]:
CNN3D_model = CNN3D().to(device)

In [20]:
print(sum(p.numel() for p in CNN3D_model.parameters()) / 1e6, "million parameters")

45.379202 million parameters


In [11]:
train_path = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train"
test_path = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test"

Xtr, Ytr = build_dataset(train_path)
Xte, Yte = build_dataset(test_path)

In [42]:
train(CNN3D_model, total_steps, learning_rate / 300, batch_size)

Step [100/2000], Loss: 0.0034
Step [200/2000], Loss: 0.1529
Step [300/2000], Loss: 0.0020
Step [400/2000], Loss: 0.0006
Step [500/2000], Loss: 0.0571
Step [600/2000], Loss: 0.0005
Step [700/2000], Loss: 0.0011
Step [800/2000], Loss: 0.0017
Step [900/2000], Loss: 0.0006
Step [1000/2000], Loss: 0.0004
Step [1100/2000], Loss: 0.0006
Step [1200/2000], Loss: 0.0020
Step [1300/2000], Loss: 0.0077
Step [1400/2000], Loss: 0.0041
Step [1500/2000], Loss: 0.0044
Step [1600/2000], Loss: 0.0019
Step [1700/2000], Loss: 0.0004
Step [1800/2000], Loss: 0.0012
Step [1900/2000], Loss: 0.0015
Step [2000/2000], Loss: 0.0017


In [None]:
split_loss(CNN3D_model, 'train')
split_loss(CNN3D_model, 'test')

In [43]:
get_test_accuracy(CNN3D_model)

91.0 18200 20000


<h1>Training Log</h1>
<br/>
<h2>Without Dropout</h2>
<ol>
    <li>2k training steps, lr 3e-4, 86.305% accuracy (17261 / 20000)
    </li>
    <li>4k training steps, lr 3e-4, 87.90% accuracy (17580 / 20000)
    </li>
    <li>6k training steps, lr 3e-4, 87.97% accuracy (17594 / 20000)
    </li>
    <li>8k training steps, lr 1e-4, 90.745% accuracy (18149 / 20000)
    </li>
    <li>10k training steps, lr 1e-4, 90.525% accuracy (18105 / 20000), seems to have begun to overtrain
    </li>
    <li>12k training steps, lr 5e-5, 90.75% accuracy (18150 / 20000), nvm just needed less learning rate
    </li>
    <li>14k training steps, lr 1e-5, 90.855% accuracy (18171 / 20000)
    </li>
    <li>16k training steps, lr 5e-6, 90.995% accuracy (18199 / 20000)
    </li>
    <li>18k training steps, lr 1e-6, 91.0% accuracy (18200 / 20000), I feel that this is the limit
    </li>
</ol>

In [45]:
torch.save(CNN3D_model.state_dict(), "CNN3D_model_91percent.ckpt")

In [54]:
dropout = 0.1

In [55]:
class CNN3D_DROPOUT(nn.Module):
    
    def __init__(self):
        super(CNN3D_DROPOUT, self).__init__()
        
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        # (1, 3, 32, 32)
        self.conv1 = nn.Conv3d(1, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 1, 30, 30)
        self.bn1 = nn.BatchNorm3d(32)
        
        self.conv2 = nn.Conv2d(32, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 28, 28)
        self.bn2 = nn.BatchNorm2d(32)
        
        self.conv3 = nn.Conv2d(32, 32, kernel_size = 3, stride = 1, padding = 0)
        # (32, 26, 26)
        self.bn3 = nn.BatchNorm2d(32)
        
        self.fc1 = nn.Linear(32 * 26 * 26, 4 * n_hidden)
        self.ln_fc1 = nn.LayerNorm(4 * n_hidden)
        
        self.fc2 = nn.Linear(4 * n_hidden, n_hidden)
        self.ln_fc2 = nn.LayerNorm(n_hidden)
        
        self.fc3 = nn.Linear(n_hidden, num_classes)

    def forward(self, x):
        x = x.reshape(-1, 1, 3, 32, 32)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.dropout(x)
        x = x.reshape(-1, 32, 30, 30)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.dropout(x)
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.dropout(x)
        
        x = self.flatten(x)
        
        x = self.relu(self.ln_fc1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.ln_fc2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)
        
        return x

In [56]:
CNN3D_DROPOUT_model = CNN3D_DROPOUT().to(device)

In [94]:
train(CNN3D_DROPOUT_model, total_steps * 5, learning_rate / 600, batch_size)

Step [100/10000], Loss: 0.0790
Step [200/10000], Loss: 0.0065
Step [300/10000], Loss: 0.0066
Step [400/10000], Loss: 0.0509
Step [500/10000], Loss: 0.0102
Step [600/10000], Loss: 0.0032
Step [700/10000], Loss: 0.0196
Step [800/10000], Loss: 0.0014
Step [900/10000], Loss: 0.0069
Step [1000/10000], Loss: 0.0151
Step [1100/10000], Loss: 0.0051
Step [1200/10000], Loss: 0.0326
Step [1300/10000], Loss: 0.0091
Step [1400/10000], Loss: 0.0024
Step [1500/10000], Loss: 0.0608
Step [1600/10000], Loss: 0.0020
Step [1700/10000], Loss: 0.0014
Step [1800/10000], Loss: 0.0048
Step [1900/10000], Loss: 0.0011
Step [2000/10000], Loss: 0.0090
Step [2100/10000], Loss: 0.0008
Step [2200/10000], Loss: 0.0682
Step [2300/10000], Loss: 0.0009
Step [2400/10000], Loss: 0.0055
Step [2500/10000], Loss: 0.0008
Step [2600/10000], Loss: 0.0018
Step [2700/10000], Loss: 0.0294
Step [2800/10000], Loss: 0.0118
Step [2900/10000], Loss: 0.0089
Step [3000/10000], Loss: 0.0007
Step [3100/10000], Loss: 0.0003
Step [3200/10000]

In [95]:
get_test_accuracy(CNN3D_DROPOUT_model)

91.17 18234 20000


<h1>Training Log</h1>
<br/>
<h2>With Dropout</h2>
<ol>
    <li>2k steps, lr 3e-4, 86.405% accuracy (17281 / 20000)
    </li>
    <li>4k steps, lr 3e-4, 87.465% accuracy (17493 / 20000)
    </li>
    <li>6k steps, lr 3e-4, 88.725% accuracy (17745 / 20000)
    </li>
    <li>8k steps, lr 3e-4, 89.985% accuracy (17997 / 20000)
    </li>
    <li>10k steps, lr 3e-4, 89.35% accuracy (17870 / 20000)
    </li>
    <li>12k steps, lr 1e-4, 90.43% accuracy (18086 / 20000)
    </li>
    <li>14k steps, lr 1e-4, 90.485% accuracy (18097 / 20000)
    </li>
    <li>16k steps, lr 5e-5, 90.885% accuracy (18177 / 20000)
    </li>
    <li>18k steps, lr 5e-5, 90.225% accuracy (18045 / 20000)
    </li>
    <li>20k steps, lr 1e-5, 90.915% accuracy (18183 / 20000)
    </li>
    <li>22k steps, lr 1e-5, 91.135% accuracy (18227 / 20000)
    </li>
    <li>24k steps, lr 1e-5, 90.88% accuracy (18176 / 20000)
    </li>
    <li>26k steps, lr 5e-6, 90.98% accuracy (18196 / 20000)
    </li>
    <li>28k steps, lr 5e-6, 91.145% accuracy (18229 / 20000)
    </li>
    <li>30k steps, lr 1e-6, 91.18% accuracy (18236 / 20000)
    </li>
    <li>32k steps, lr 1e-6, 91.165% accuracy (18233 / 20000)
    </li>
    <li>34k steps, lr 5e-7, 90.87% accuracy (18174 / 20000)
    </li>
    <li>44k steps, lr 5e-7, 91.17% accuracy (18234 / 20000)
    </li>
</ol>
<h2>My Takeaways</h2>
<ul>
    <li>it doesnt seem that overfitting or dropout was the bottleneck
    </li>
    <li>dropout is definitely a lot more robust and more immune to tinkering and messing around with the network
    </li>
    <li>did not produce a significant improvement, but as stated, it was probably not the bottleneck
    </li>
</ul>

In [96]:
torch.save(CNN3D_DROPOUT_model.state_dict(), "CNN3D_DROPOUT_model_91percent.ckpt")