<a href="https://colab.research.google.com/github/rreyes2155/Synthetic_Real_Faces/blob/main/DATA_690_Deep_Learning_Project_Models_GCP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
os.environ['USE_AUTH_EPHEM'] = '0'

from google.colab import auth
auth.authenticate_user()



In [None]:
# install gcsfuse so we can load our bucket as local file
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  1210  100  1210    0     0  43214      0 --:--:-- --:--:-- --:--:-- 43214
OK
72 packages can be upgraded. Run 'apt list --upgradable' to see them.
gcsfuse is already the newest version (0.42.4).
0 upgraded, 0 newly installed, 0 to remove and 72 not upgraded.


In [None]:
# make directory and load our bucket to directory
!mkdir faces
!gcsfuse --implicit-dirs data690deeplearning faces

mkdir: cannot create directory ‘faces’: File exists
I0514 05:36:53.414283 2023/05/14 05:36:53.414256 Start gcsfuse/0.42.4 (Go version go1.19.7) for app "" using mount point: /content/faces


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from skimage import io
import random
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets, models
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torchvision
import time
import copy
from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer, ViTFeatureExtractor, ViTImageProcessor
from sklearn.metrics import accuracy_score
from datasets import Dataset, load_metric

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

# Load Cleaned DataFrames and create train/test splits

In [None]:
# upload cleaned metadata csvs for each dataset
real = pd.read_csv('faces/celebahq_metadata_cleaned.csv')
lama_all = pd.read_csv('faces/lama_all_metadata_cleaned.csv')
mat = pd.read_csv('faces/mat_metadata_cleaned.csv')

# We may not use them but lets load the seperated lama datasets in case we want to try.
lama_deep = pd.read_csv('faces/lama_deep_metadata_cleaned.csv')
lama_dialated = pd.read_csv('faces/lama_dialated_metadata_cleaned.csv')
lama_fourier = pd.read_csv('faces/lama_fourier_metadata_cleaned.csv')

## Combine data and create train/testsplit

In [None]:
# function to combine datasets and create train/test

# accepts a list of the number of samples you want from each dataset
# in this order [real, lama_all, 'mat', 'deep', 'dialated', 'fourier']

def make_test_split(nums, test_portion = 0.2, rs = 3):
    # check we don't pull from lama_all and any of the lama types datasets
    if nums[1] > 0 and (nums[3] > 0 or nums[4] > 0 or nums[5] > 0):
        return null

    # sample each dataset
    a = real.sample(n = nums[0], random_state = rs, ignore_index = True)
    if nums[1] != 0:
        b = lama_all.sample(n = nums[1], random_state = rs, ignore_index = True)
    c = mat.sample(n = nums[2], random_state = rs, ignore_index = True)
    if nums[1] == 0:
        d = lama_deep.sample(n = nums[3], random_state = rs, ignore_index = True)
        e = lama_dialated.sample(n = nums[4], random_state = rs, ignore_index = True)
        f = lama_fourier.sample(n = nums[5], random_state = rs, ignore_index = True)

    # create train/test splits for each dataset
    train_a, test_a = train_test_split(a, test_size = test_portion, random_state = rs)
    if nums[1] != 0:
        train_b, test_b = train_test_split(b, test_size = test_portion, random_state = rs)
    train_c, test_c = train_test_split(c, test_size = test_portion, random_state = rs)
    if nums[1] == 0:
        train_d, test_d = train_test_split(d, test_size = test_portion, random_state = rs)
        train_e, test_e = train_test_split(e, test_size = test_portion, random_state = rs)
        train_f, test_f = train_test_split(f, test_size = test_portion, random_state = rs)

    # list of frames to concat
    if nums[1] != 0:        
        train_frames = [train_a, train_b, train_c]
        test_frames = [test_a, test_b, test_c]
    if nums[1] == 0:        
        train_frames = [train_a, train_c, train_d, train_e, train_f]
        test_frames = [test_a, test_c, test_d, test_e, test_f]

    # concat train/test dfs
    train = pd.concat(train_frames, ignore_index = True)
    test = pd.concat(test_frames, ignore_index = True)

    # shuffle data
    train_shuff = train.sample(frac=1).reset_index(drop=True)
    test_shuff = test.sample(frac=1).reset_index(drop=True)
    
    return train_shuff, test_shuff

In [None]:
# lets get counts to see how big we can make our datasets
data_list = [real, lama_all, mat, lama_deep, lama_dialated, lama_fourier]
data_names = ['real', 'lama_all', 'mat', 'lama_deep', 'lama_dialated', 'lama_fourier']
counts = []
for df in data_list:
    counts.append(len(df))
    counts_df = pd.DataFrame(list(zip(data_names, counts)), columns = ['DataFrame', 'Number of Samples'])
display(counts_df)

Unnamed: 0,DataFrame,Number of Samples
0,real,9276
1,lama_all,8784
2,mat,7637
3,lama_deep,3483
4,lama_dialated,3483
5,lama_fourier,1818


In [None]:
# create train/test data

# [real, lama_all, 'mat', 'deep', 'dialated', 'fourier']
# lest first start with a small sample 
small_sample_sizes = [500, 250, 250, 0, 0, 0]
all_samples = [len(real), len(lama_all), len(mat), 0, 0, 0]

train_1000, test_1000 = make_test_split(small_sample_sizes)
train_all, test_all = make_test_split(small_sample_sizes)

# Models

## Create Custom Dataset

We need to first create a custom dataset for our data. Lets first look at the shape of the images

In [None]:
img_shapes = {}
for i in range(100):      
    shape = read_image(train_1000['image_path'].iloc[random.randint(0, len(train_1000)) - 1]).size()
    img_shapes[shape] = img_shapes.get(shape, 0) + 1

In [None]:
for key in img_shapes:
    print(f'{key} : {img_shapes[key]}')

torch.Size([3, 200, 200]) : 100


It looks like our data is the same shape, we will adjust it later when using pretrained models.

Now lets make a custom dataset

https://pytorch.org/tutorials/beginner/basics/data_tutorial.html

In [None]:
# create custom dataset class for our faces  dataset

class Faces(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.transform = transform
        self.classes = ['real', 'fake']
        self.class_to_idx = dict_2 = {'real' : 0, 'fake': 1}

    def __len__(self):
        return len(self.df)

    # retrives image from path in df file and target from df
    def get_image_class(self, index):
        image_path = self.df.iloc[index]['image_path']
        class_name = self.df.iloc[index]['target']
        return Image.open(image_path), class_name 

    def __getitem__(self, idx):
        image, class_name = self.get_image_class(idx)
        label = self.class_to_idx[class_name]

        return self.transform(image), label

## Training and Helper Functions

In [None]:
# from https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.
# code to train model

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

## Densnet Model

https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html


https://pytorch.org/vision/main/models.html




All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].

In [None]:
# transformations for densenet model
densenet_transforms = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])     

In [None]:
# set batch size
batch_size = 32

In [None]:
# create Dataset for train and test
train_1000_densenet_dataset = Faces(train_1000, transform = densenet_transforms)
train_all_densenet_dataset = Faces(train_all, transform = densenet_transforms)

test_1000_densenet_dataset = Faces(test_1000, transform = densenet_transforms)
test_all_densenet_dataset = Faces(test_all, transform = densenet_transforms)

# create Dataloader for train and test
train_1000_densenet_dataloader = DataLoader(train_1000_densenet_dataset, batch_size = batch_size, shuffle = True)
train_all_densenet_dataloader = DataLoader(train_all_densenet_dataset, batch_size = batch_size, shuffle = True)

test_1000_densenet_dataloader = DataLoader(test_1000_densenet_dataset, batch_size = batch_size, shuffle = True)
test_all_densenet_dataloader = DataLoader(test_all_densenet_dataset, batch_size = batch_size, shuffle = True)

In [None]:
# create test/train dataloader dicts
densenet_1000_dict = {'train' : train_1000_densenet_dataloader, 'val' : test_1000_densenet_dataloader}
densenet_all_dict = {'train' : train_all_densenet_dataloader, 'val' : test_all_densenet_dataloader}

### Subset Run

In [None]:
# create model
densenet_model = models.densenet121(pretrained=True)
in_features = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(in_features, 2)
input_size = 224
densenet_model.to(device)

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(densenet_model.parameters(), lr=0.001, momentum=0.9)

In [None]:
epochs = 5

In [None]:
densenet_model_tuned_1000, densetnet_1000_results = train_model(densenet_model, densenet_1000_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/4
----------
train Loss: 0.6920 Acc: 0.5587
val Loss: 0.6250 Acc: 0.6450

Epoch 1/4
----------
train Loss: 0.4556 Acc: 0.8425
val Loss: 0.4769 Acc: 0.8100

Epoch 2/4
----------
train Loss: 0.2777 Acc: 0.9325
val Loss: 0.4112 Acc: 0.8050

Epoch 3/4
----------
train Loss: 0.1646 Acc: 0.9637
val Loss: 0.3869 Acc: 0.8300

Epoch 4/4
----------
train Loss: 0.0964 Acc: 0.9888
val Loss: 0.3867 Acc: 0.8250

Training complete in 1m 23s
Best val Acc: 0.830000


### All Data Run with SGD

In [None]:
# re instantiate model
densenet_model = models.densenet121(pretrained=True)
in_features = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(in_features, 2)
densenet_model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(densenet_model.parameters(), lr=0.001, momentum=0.9)

In [None]:
epochs = 25

In [None]:
densenet_model_tuned_all, densetnet_all_results = train_model(densenet_model, densenet_all_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/24
----------
train Loss: 0.7487 Acc: 0.5038
val Loss: 0.6110 Acc: 0.7000

Epoch 1/24
----------
train Loss: 0.4766 Acc: 0.8363
val Loss: 0.4999 Acc: 0.7400

Epoch 2/24
----------
train Loss: 0.3093 Acc: 0.9075
val Loss: 0.4467 Acc: 0.7900

Epoch 3/24
----------
train Loss: 0.2220 Acc: 0.9350
val Loss: 0.4210 Acc: 0.7900

Epoch 4/24
----------
train Loss: 0.1260 Acc: 0.9813
val Loss: 0.4085 Acc: 0.8000

Epoch 5/24
----------
train Loss: 0.0654 Acc: 0.9938
val Loss: 0.4372 Acc: 0.8100

Epoch 6/24
----------
train Loss: 0.0534 Acc: 0.9962
val Loss: 0.4355 Acc: 0.8250

Epoch 7/24
----------
train Loss: 0.0267 Acc: 1.0000
val Loss: 0.4554 Acc: 0.8100

Epoch 8/24
----------
train Loss: 0.0251 Acc: 0.9975
val Loss: 0.4497 Acc: 0.8250

Epoch 9/24
----------
train Loss: 0.0136 Acc: 1.0000
val Loss: 0.4630 Acc: 0.8200

Epoch 10/24
----------
train Loss: 0.0139 Acc: 1.0000
val Loss: 0.4782 Acc: 0.8100

Epoch 11/24
----------
train Loss: 0.0155 Acc: 0.9975
val Loss: 0.4882 Acc: 0.8100

Ep

### All Data Run with Adam

In [None]:
# re instantiate model
densenet_model = models.densenet121(pretrained=True)
in_features = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(in_features, 2)
densenet_model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(densenet_model.parameters(), lr=0.001)

epochs = 25



In [None]:
densenet_model_adam, densetnet_adam_results = train_model(densenet_model, densenet_all_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/24
----------
train Loss: 0.6151 Acc: 0.6900
val Loss: 2.4410 Acc: 0.5650

Epoch 1/24
----------
train Loss: 0.3802 Acc: 0.8300
val Loss: 0.5430 Acc: 0.7750

Epoch 2/24
----------
train Loss: 0.2797 Acc: 0.8950
val Loss: 0.5493 Acc: 0.8300

Epoch 3/24
----------
train Loss: 0.2686 Acc: 0.8788
val Loss: 0.4644 Acc: 0.8150

Epoch 4/24
----------
train Loss: 0.2625 Acc: 0.8888
val Loss: 0.7016 Acc: 0.7650

Epoch 5/24
----------
train Loss: 0.2430 Acc: 0.8962
val Loss: 0.9307 Acc: 0.7350

Epoch 6/24
----------
train Loss: 0.1810 Acc: 0.9287
val Loss: 4.5883 Acc: 0.5700

Epoch 7/24
----------
train Loss: 0.1733 Acc: 0.9325
val Loss: 0.6587 Acc: 0.7650

Epoch 8/24
----------
train Loss: 0.0876 Acc: 0.9738
val Loss: 1.3248 Acc: 0.7450

Epoch 9/24
----------
train Loss: 0.0564 Acc: 0.9800
val Loss: 1.4449 Acc: 0.7300

Epoch 10/24
----------
train Loss: 0.0739 Acc: 0.9750
val Loss: 1.9254 Acc: 0.6450

Epoch 11/24
----------
train Loss: 0.1922 Acc: 0.9225
val Loss: 0.8826 Acc: 0.8100

Ep

## Resnet Model

The images are resized to resize_size=[256] using interpolation=InterpolationMode.BILINEAR, followed by a central crop of crop_size=[224]. Finally the values are first rescaled to [0.0, 1.0] and then normalized using mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].

In [None]:
# transformations for resnet model
resnet_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])     

# create Dataset for train and test
train_1000_resnet_dataset = Faces(train_1000, transform = resnet_transforms)
train_all_resnet_dataset = Faces(train_all, transform = resnet_transforms)

test_1000_resnet_dataset = Faces(test_1000, transform = resnet_transforms)
test_all_resnet_dataset = Faces(test_all, transform = resnet_transforms)

# create Dataloader for train and test
train_1000_resnet_dataloader = DataLoader(train_1000_resnet_dataset, batch_size = batch_size, shuffle = True)
train_all_resnet_dataloader = DataLoader(train_all_resnet_dataset, batch_size = batch_size, shuffle = True)

test_1000_resnet_dataloader = DataLoader(test_1000_resnet_dataset, batch_size = batch_size, shuffle = True)
test_all_resnet_dataloader = DataLoader(test_all_resnet_dataset, batch_size = batch_size, shuffle = True)

# create test/train dataloader dicts
resnet_1000_dict = {'train' : train_1000_resnet_dataloader, 'val' : test_1000_resnet_dataloader}
resnet_all_dict = {'train' : train_all_resnet_dataloader, 'val' : test_all_resnet_dataloader}

### Subset Run


In [None]:
resnet_model = models.resnet18(pretrained=True)
in_features = resnet_model.fc.in_features
resnet_model.classifier = nn.Linear(in_features, 2)
resnet_model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet_model.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.AdamW(resnet_model.parameters(), lr=0.001)

epochs = 5



In [None]:
resnet_model_1000, ressetnet_1000_results = train_model(resnet_model, resnet_all_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/4
----------
train Loss: 3.1331 Acc: 0.4188
val Loss: 0.7500 Acc: 0.6250

Epoch 1/4
----------
train Loss: 0.2457 Acc: 0.9000
val Loss: 0.5471 Acc: 0.7300

Epoch 2/4
----------
train Loss: 0.0834 Acc: 0.9825
val Loss: 0.5127 Acc: 0.7900

Epoch 3/4
----------
train Loss: 0.0447 Acc: 0.9988
val Loss: 0.4855 Acc: 0.8000

Epoch 4/4
----------
train Loss: 0.0199 Acc: 1.0000
val Loss: 0.4875 Acc: 0.8100

Training complete in 0m 52s
Best val Acc: 0.810000


### All Data Run with SGD

In [None]:
resnet_model = models.resnet18(pretrained=True)
in_features = resnet_model.fc.in_features
resnet_model.classifier = nn.Linear(in_features, 2)
resnet_model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet_model.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.AdamW(resnet_model.parameters(), lr=0.001)

epochs = 25



In [None]:
resnet_model_all_sgd, ressetnet_all_sgd_results = train_model(resnet_model, resnet_all_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/24
----------
train Loss: 3.1167 Acc: 0.4163
val Loss: 0.8946 Acc: 0.6150

Epoch 1/24
----------
train Loss: 0.2703 Acc: 0.8938
val Loss: 0.5745 Acc: 0.7400

Epoch 2/24
----------
train Loss: 0.0943 Acc: 0.9800
val Loss: 0.5301 Acc: 0.7600

Epoch 3/24
----------
train Loss: 0.0387 Acc: 0.9988
val Loss: 0.5340 Acc: 0.7900

Epoch 4/24
----------
train Loss: 0.0211 Acc: 1.0000
val Loss: 0.5054 Acc: 0.8150

Epoch 5/24
----------
train Loss: 0.0168 Acc: 1.0000
val Loss: 0.5083 Acc: 0.8250

Epoch 6/24
----------
train Loss: 0.0109 Acc: 1.0000
val Loss: 0.5183 Acc: 0.8000

Epoch 7/24
----------
train Loss: 0.0078 Acc: 1.0000
val Loss: 0.5205 Acc: 0.8100

Epoch 8/24
----------
train Loss: 0.0112 Acc: 1.0000
val Loss: 0.5193 Acc: 0.8200

Epoch 9/24
----------
train Loss: 0.0078 Acc: 1.0000
val Loss: 0.5184 Acc: 0.8150

Epoch 10/24
----------
train Loss: 0.0064 Acc: 1.0000
val Loss: 0.5132 Acc: 0.8350

Epoch 11/24
----------
train Loss: 0.0153 Acc: 1.0000
val Loss: 0.5530 Acc: 0.8050

Ep

### All Data Run with Adam

In [None]:
resnet_model = models.resnet18(pretrained=True)
in_features = resnet_model.fc.in_features
resnet_model.classifier = nn.Linear(in_features, 2)
resnet_model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(resnet_model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.AdamW(resnet_model.parameters(), lr=0.001)

epochs = 25



In [None]:
resnet_model_all_adam, ressetnet_all_sgd_results = train_model(resnet_model, resnet_all_dict,
                                                                loss_fn, optimizer, num_epochs = epochs)

Epoch 0/24
----------
train Loss: 2.4778 Acc: 0.6050
val Loss: 4.5733 Acc: 0.4950

Epoch 1/24
----------
train Loss: 0.4589 Acc: 0.8237
val Loss: 0.4395 Acc: 0.8100

Epoch 2/24
----------
train Loss: 0.2774 Acc: 0.8938
val Loss: 0.4589 Acc: 0.8250

Epoch 3/24
----------
train Loss: 0.1932 Acc: 0.9200
val Loss: 0.4237 Acc: 0.7900

Epoch 4/24
----------
train Loss: 0.1359 Acc: 0.9513
val Loss: 0.5484 Acc: 0.8200

Epoch 5/24
----------
train Loss: 0.1035 Acc: 0.9537
val Loss: 1.6148 Acc: 0.6450

Epoch 6/24
----------
train Loss: 0.1029 Acc: 0.9675
val Loss: 1.3119 Acc: 0.7000

Epoch 7/24
----------
train Loss: 0.0490 Acc: 0.9850
val Loss: 1.0390 Acc: 0.7800

Epoch 8/24
----------
train Loss: 0.0711 Acc: 0.9750
val Loss: 0.7780 Acc: 0.7800

Epoch 9/24
----------
train Loss: 0.1032 Acc: 0.9650
val Loss: 0.8964 Acc: 0.7500

Epoch 10/24
----------
train Loss: 0.0877 Acc: 0.9700
val Loss: 0.6413 Acc: 0.8600

Epoch 11/24
----------
train Loss: 0.0590 Acc: 0.9750
val Loss: 0.5099 Acc: 0.8200

Ep

## Vision Transformer Model

https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_🤗_Trainer.ipynb

In [None]:
# dicts for our labels and targets
target_to_label = {'real' : 0, 'fake': 1}
label_to_target = {0 : 'real', 1 : 'fake'}

In [None]:
# transform our dataset to match pre trained model
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel value
    images = []
    for img_path in example_batch['image_path']:
        images.append(io.imread(img_path))
    inputs = feature_extractor([x for x in images], return_tensors='pt')

    # Don't forget to include the labels!
    targets = []
    for targ in example_batch['target']:
        targets.append(target_to_label[targ])
    inputs['labels'] = [x for x in targets]
    return inputs

In [None]:
# create hugging face datasets
ViT_train_1000 = Dataset.from_pandas(train_1000).with_transform(transform)
ViT_train_all = Dataset.from_pandas(train_all).with_transform(transform)
ViT_test_1000 = Dataset.from_pandas(test_1000).with_transform(transform)
ViT_test_all = Dataset.from_pandas(test_1000).with_transform(transform)

https://huggingface.co/google/vit-base-patch16-224-in21k

In [None]:
# our pre-trained model we are using trained on ImageNet-21k
model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)



In [None]:
# translate labels dicts
target_to_label = {'real' : 0, 'fake': 1}
label_to_target = {0 : 'real', 1 : 'fake'}

In [None]:
# define collate function for combime our batches
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [None]:
# metric to evaluate our model
metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

### ViT Subset Run

In [None]:
# set our training args
training_args = TrainingArguments(
  output_dir="face/vit",
  per_device_train_batch_size=32,
  evaluation_strategy="steps",
  num_train_epochs=5,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)

In [None]:
# create our model with subset data
ViT_1000_model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels = 2,
    id2label = label_to_target,
    label2id = target_to_label
)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# create our trainer to train and evalute model
trainer_1000 = Trainer(
    model = ViT_1000_model,
    args = training_args,
    data_collator = collate_fn,
    compute_metrics = compute_metrics,
    train_dataset = ViT_train_1000,
    eval_dataset = ViT_test_1000,
    tokenizer = feature_extractor,
)

In [None]:
# train our model and save results
train_1000_results = trainer_1000.train()
trainer_1000.save_model()
trainer_1000.log_metrics("train", train_1000_results.metrics)
trainer_1000.save_metrics("train", train_1000_results.metrics)
trainer_1000.save_state()

Step,Training Loss,Validation Loss,Accuracy
100,0.0795,0.470029,0.855


***** train metrics *****
  epoch                    =         5.0
  total_flos               = 288680157GF
  train_loss               =      0.0502
  train_runtime            =  0:01:58.93
  train_samples_per_second =      33.632
  train_steps_per_second   =       1.051


In [None]:
print(train_1000_results)

TrainOutput(global_step=125, training_loss=0.050203990936279294, metrics={'train_runtime': 118.9353, 'train_samples_per_second': 33.632, 'train_steps_per_second': 1.051, 'total_flos': 3.09967958458368e+17, 'train_loss': 0.050203990936279294, 'epoch': 5.0})


In [None]:
# evaluate our results
metrics_1000 = trainer_1000.evaluate(ViT_test_1000)
trainer_1000.log_metrics("eval", metrics_1000)
trainer_1000.save_metrics("eval", metrics_1000)

***** eval metrics *****
  epoch                   =        5.0
  eval_accuracy           =      0.855
  eval_loss               =       0.47
  eval_runtime            = 0:00:02.43
  eval_samples_per_second =     82.149
  eval_steps_per_second   =     10.269


### ViT All Data Run

In [None]:
# set our training args
training_args = TrainingArguments(
  output_dir="face/vit",
  per_device_train_batch_size=32,
  evaluation_strategy="steps",
  num_train_epochs=25,
  fp16=True,
  save_steps=100,
  eval_steps=100,
  logging_steps=10,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)

In [None]:
# create model with all of our data
ViT_all_model = ViTForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels = 2,
    id2label = label_to_target,
    label2id = target_to_label
)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# create trainer to train and evaluate our model
trainer_all = Trainer(
    model = ViT_all_model,
    args = training_args,
    data_collator = collate_fn,
    compute_metrics = compute_metrics,
    train_dataset = ViT_train_all,
    eval_dataset = ViT_test_all,
    tokenizer = feature_extractor,
)

In [None]:
# train our model and save results
train_all_results = trainer_all.train()
trainer_all.save_model()
trainer_all.log_metrics("train", train_all_results.metrics)
trainer_all.save_metrics("train", train_all_results.metrics)
trainer_all.save_state()



Step,Training Loss,Validation Loss,Accuracy
100,0.2578,0.466254,0.83
200,0.066,1.05237,0.75
300,0.0034,0.939797,0.79
400,0.0022,1.101762,0.78
500,0.0017,1.184804,0.775
600,0.0015,1.213172,0.775


***** train metrics *****
  epoch                    =         25.0
  total_flos               = 1443400785GF
  train_loss               =       0.0944
  train_runtime            =   0:09:10.56
  train_samples_per_second =       36.327
  train_steps_per_second   =        1.135


In [None]:
# evaluate our model
metrics_all = trainer_all.evaluate(ViT_test_all)
trainer_all.log_metrics("eval", metrics_all)
trainer_all.save_metrics("eval", metrics_all)

***** eval metrics *****
  epoch                   =       25.0
  eval_accuracy           =       0.83
  eval_loss               =     0.4663
  eval_runtime            = 0:00:02.11
  eval_samples_per_second =     94.521
  eval_steps_per_second   =     11.815
