<a href="https://colab.research.google.com/github/likithpala7/real-fake-detector/blob/main/train_mc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git
!pip install peft

Collecting ftfy
  Downloading ftfy-6.2.0-py3-none-any.whl (54 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.4/54.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ftfy
Successfully installed ftfy-6.2.0
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-ngw9xw42
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-ngw9xw42
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->clip==1.0)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->clip==1.0)
  Using ca

In [2]:
from google.colab import drive
from google.colab import files
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
from torch.utils.data import Dataset
import json
import torch
import clip
import peft
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import json
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
import time
from torch.utils.data import DataLoader
from peft import LoraConfig, get_peft_model
from torchvision.ops import sigmoid_focal_loss
import torch
import matplotlib.pyplot as plt
from collections import namedtuple
import pandas as pd
from copy import deepcopy
from tqdm import tqdm
from transformers import AutoProcessor, CLIPVisionModel, CLIPModel, CLIPVisionModelWithProjection
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
!unzip -q drive/MyDrive/GenImage/imagenet_ai_holdout.zip

In [5]:
!rm imagenet_ai_holdout/ADM/train/115_adm_156.PNG
!rm imagenet_ai_holdout/BigGAN/train/116_biggan_00098.png
!rm imagenet_ai_holdout/BigGAN/train/116_biggan_00107.png

In [2]:
label_dict = {model: i for i, model in
               enumerate(sorted(os.listdir('imagenet_ai_holdout')))}
print(label_dict)

class ImageDataset(Dataset):

    def get_dataset(self, d_type):

        data = []

        for dataset in os.listdir('imagenet_ai_holdout'):

            imgs = os.listdir(os.path.join('imagenet_ai_holdout', dataset, d_type))

            data.extend([os.path.join('imagenet_ai_holdout', dataset, d_type, img)
                         for img in imgs])


        return data


    def __init__(self, config, d_type='train'):


       self.data = self.get_dataset(d_type)
       self.preprocess = AutoProcessor.from_pretrained('laion/CLIP-ViT-H-14-laion2B-s32B-b79K')

       self.model = config.model

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):

        # Get the question and answer at the idx
        img_path = self.data[idx]

        model_type = img_path.split(os.path.sep)[1]
        label = torch.tensor(label_dict[model_type]).to(device)

        img = self.preprocess(images=Image.open(img_path),
                              return_tensors='pt').pixel_values.squeeze(0).to(device)

        return img, label


{'ADM': 0, 'BigGAN': 1, 'Glide': 2, 'SDV4': 3, 'SDV5': 4, 'VQDM': 5, 'nature': 6, 'wukong': 7}


In [3]:
CLIP_HIDDEN_STATE = 512

def print_trainable_parameters(model):

    """
    Prints the number of trainable parameters in the model
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(
        f"Trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

class CLIPTeacher(nn.Module):

    def __init__(self, config):
        super().__init__()

        self.clip_model = CLIPVisionModel.from_pretrained("openai/clip-vit-large-patch14")

        for param in self.clip_model.parameters():
            param.requires_grad = False

        hidden_state = self.clip_model.config.hidden_size

        encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_state, nhead=config.nhead, batch_first=True)
        self.teacher_model = nn.TransformerEncoder(encoder_layer, num_layers=config.nlayers)
        self.classifier = nn.Sequential(nn.Linear(hidden_state, hidden_state // 2),
                                        nn.Dropout(0.2),
                                        nn.ReLU(),
                                        nn.Linear(hidden_state // 2, len(label_dict))
                                        )

    def forward(self, x):

        # Get hidden states from CLIP Model (N, S, H)
        last_hidden_state = self.clip_model(x).last_hidden_state
        out = self.teacher_model(last_hidden_state)

        # Get the first token's hidden state
        out = torch.mean(out, dim=1)

        return self.classifier(out)

    def get_features(self, x):
        # Get hidden states from CLIP Model (N, S, H)
        last_hidden_state = self.clip_model(x).last_hidden_state
        out = self.teacher_model(last_hidden_state)

        # Get the first token's hidden state
        out = torch.mean(out, dim=1)

        return self.classifier[:-1](out)


class CLIPLarge(nn.Module):

    def __init__(self, model_name):

        super().__init__()

        self.clip_model = CLIPVisionModelWithProjection.from_pretrained(model_name)
        in_features = self.clip_model.config.projection_dim

        lora_config = LoraConfig(
            task_type=peft.TaskType.FEATURE_EXTRACTION,
            # bias='lora_only',
            # use_rslora=True,
            target_modules=['q_proj', 'v_proj']
        )

        self.clip_model = get_peft_model(self.clip_model, lora_config)

        self.fc1 = nn.Sequential(
            nn.Linear(in_features, in_features // 2),
            nn.Dropout(0.2),
            nn.ReLU(),
        )
        self.fc2 = nn.Linear(in_features // 2, len(label_dict))

    def forward(self, x):

        return self.fc2(self.fc1(self.clip_model(pixel_values=x).image_embeds.float().to(device)))

    def get_features(self, x):

        return self.fc1(self.clip_model(pixel_values=x).image_embeds.float().to(device))

class CLIP_Model(nn.Module):

    def __init__(self):

        super().__init__()

        self.clip_model, _ = clip.load('ViT-B/32', device=device)

        # Freeze the CLIP model
        for param in self.clip_model.parameters():
            param.requires_grad = False

        self.fc1 = nn.Sequential(
            nn.Linear(CLIP_HIDDEN_STATE, CLIP_HIDDEN_STATE // 2),
            nn.Dropout(0.2),
            nn.ReLU(),
        )
        self.fc2 = nn.Linear(CLIP_HIDDEN_STATE // 2, len(label_dict))



    def forward(self, x):
        return self.fc2(self.fc1(self.clip_model.encode_image(x).float()))

    def get_features(self, x):
        return self.fc1(self.clip_model.encode_image(x).float())

In [4]:
Config = namedtuple('Instance', ['batch_size', 'learning_rate',
                                 'weight_decay', 'num_workers',
                                 'epochs', 'load_checkpoint',
                                 'file_checkpoint', 'loss', 'model', 'nlayers',
                                 'nhead', 'temperature', 'alpha'])

config = Config(
    batch_size = 16,
    learning_rate = 1e-3,
    weight_decay = 0.05,
    num_workers = 0,
    nlayers=2,
    nhead=4,
    epochs = 1,
    loss = 'crossentropy',
    load_checkpoint = False,
    model = 'clip-large',
    file_checkpoint = '',
    temperature = 15,
    alpha = 0.7
)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(
        f"Trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

def save_model(model, model_name):
    # Save the model into the designated folder
    path = os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr, model_name + '.pth')
    torch.save(model, path)

def loss_fn_kd(outputs, labels, teacher_outputs):
    """
    Compute the knowledge-distillation (KD) loss given outputs, labels.
    "Hyperparameters": temperature and alpha

    NOTE: the KL Divergence for PyTorch comparing the softmaxs of teacher
    and student expects the input tensor to be log probabilities! See Issue #2
    """
    alpha = config.alpha
    T = config.temperature
    KD_loss = torch.nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1),
                             F.softmax(teacher_outputs/T, dim=1)) * (alpha * T * T) + \
              F.cross_entropy(outputs, labels) * (1. - alpha)

    return KD_loss

def val_model(dloader, val_model):
    val_model.eval()
    val_loss = 0
    val_accuracy = 0
    predictions, label_list = [], []

    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():

      for idx, (inputs, labels) in tqdm(enumerate(dloader), total=len(dloader)):

          logits = val_model(inputs)
          loss = criterion(logits, labels)

          val_loss += loss.item()

          val_acc, preds = compute_accuracy(logits, labels)
          val_accuracy += val_acc
          predictions.extend(preds.tolist())
          label_list.extend(labels.tolist())

    plot_confusion_matrix(predictions, label_list)
    return val_loss / len(val_dataloader), val_accuracy / len(val_dataloader)

def save_stats(train_loss, val_loss, train_acc, val_acc, epochs,
               lr, train_accs, val_accs):
    stats_dict = {
        'losses': losses,
        'val losses': val_losses,
        'training accuracies': train_accs,
        'val accuracies': val_accs,
        'min train loss': train_loss,
        'min val loss': val_loss,
        'max train acc': train_acc,
        'max val acc': val_acc,
        'epochs': epochs,
        'learning rate': lr,
        'loss': config.loss
    }

    fname = f'stats.json'

    # Save stats into checkpoint
    with open(os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr, fname), 'w') as f:
        json.dump(stats_dict, f)

def compute_accuracy(logits, labels):

    preds = torch.argmax(F.softmax(logits, dim=-1), dim=-1)
    correct = (preds == labels).sum().item()
    total = labels.size(0)
    return correct / total, preds

def plot_confusion_matrix(predictions, labels):

    # Define the class names and order
    classes = sorted(os.listdir('imagenet_ai_holdout'))

    # Create the confusion matrix using sklearn
    cm = confusion_matrix(labels, predictions)

    # Calculate row-wise sums to normalize the confusion matrix
    row_sums = cm.sum(axis=1, keepdims=True)
    normalized_cm = cm / row_sums.astype(float)

    # Convert normalized confusion matrix to DataFrame with named rows and columns
    normalized_cm_df = pd.DataFrame(normalized_cm, index=classes, columns=classes)

    plt.figure(figsize=(8, 6))
    sns.set(font_scale=1.4)  # Adjust to fit labels properly

    # Create a heatmap plot
    sns.heatmap(normalized_cm_df, annot=True, fmt='.2f', cmap='Blues', cbar=False,
                annot_kws={"size": 16}, linewidths=1, linecolor='black')

    plt.title('Normalized Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.yticks(rotation=0)  # Ensure correct orientation of row labels

    # Adjust layout to ensure all borders are visible
    plt.tight_layout(pad=1.0)

    img_num = len(os.listdir(os.path.join('drive', 'MyDrive', 'GenImage',
                                          'results', timestr, 'Confusion Matrices')))
    fname = f'cf_{img_num}.png'
    plt.savefig(os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr, 'Confusion Matrices', fname))


def plot_loss(training_loss, val_loss):
    num_epochs = len(training_loss)

    plt.clf()
    plt.plot(range(1, num_epochs + 1), training_loss, label='Training Loss')
    plt.plot(range(1, num_epochs + 1), val_loss, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Num epochs')
    plt.ylabel('Loss')
    plt.legend()
    fname = f'loss.png'
    plt.savefig(os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr, fname))


def train(train_loss, val_loss, train_acc, val_acc, best_model, epochs, learning_rate, train_accs, val_accs):

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9, last_epoch=-1, verbose=False)

    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs, config.epochs):
        print('-------------------- EPOCH ' + str(epoch) + ' ---------------------')
        model.train()
        epoch_loss = 0
        epoch_accuracy = 0

        for step, (inputs, labels) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):

            logits = model(inputs)

            if step % 500 == 0:
              print(logits)
              print(labels)

            loss = criterion(logits, labels)

            epoch_loss += loss.item()
            acc, _ = compute_accuracy(logits, labels)
            epoch_accuracy += acc

            # Back-propogate
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        # Get train and val loss per batch
        epoch_train_loss = epoch_loss / len(train_dataloader)
        epoch_train_accuracy = epoch_accuracy / len(train_dataloader)
        losses.append(epoch_train_loss)

        epoch_val_loss, epoch_val_accuracy = val_model(val_dataloader, model)
        val_losses.append(epoch_val_loss)

        train_accs.append(epoch_train_accuracy)
        val_accs.append(epoch_val_accuracy)

        if not val_loss or min(epoch_val_loss, val_loss) == epoch_val_loss:
            val_loss = epoch_val_loss
            best_model = deepcopy(model.state_dict())
        if not train_loss or min(train_loss, epoch_train_loss) == epoch_train_loss:
            train_loss = epoch_train_loss
        if not train_acc or max(train_acc, epoch_train_accuracy) == epoch_train_accuracy:
            train_acc = epoch_train_accuracy
        if not val_acc or max(val_acc, epoch_val_accuracy) == epoch_val_accuracy:
            val_acc = epoch_val_accuracy

        # Adjust learning rate scheduler
        scheduler.step()

        print('Training Loss: ' + str(epoch_train_loss))
        print('Validation Loss: ' + str(epoch_val_loss))
        print('Training Accuracy: ' + str(epoch_train_accuracy))
        print('Validation Accuracy: ' + str(epoch_val_accuracy))
        print('---------------------------------------------')

        # Save model and stats for checkpoints
        save_model(best_model, 'latest_model')
        epochs += 1
        save_stats(train_loss, val_loss,train_acc, val_acc, epochs,
                   scheduler.get_last_lr()[0], train_accs, val_accs)

    # Save the model and plot the loss
    plot_loss(losses, val_losses)
    return train_loss, val_loss, train_acc, val_acc

def save_experiment(statistics):
    """
    Saves the experiment results to a csv
    :param config: The hyperparameters used
    :param statistics: The accuracies for the training, validation, and test sets
    """
    trial_dict = {
        'Model name': [timestr],
        'Learning rate': [config.learning_rate],
        'Weight decay': [config.weight_decay],
        'Batch size': [config.batch_size],
        'Alpha': [config.alpha],
        'Temperature': [config.temperature],
        'Epochs': [config.epochs],
        'Loss': [config.loss],
        'Min Training Loss': [statistics[0]],
        'Min Validation Loss': [statistics[1]],
        'Maximum Training Accuracy': [statistics[2]],
        'Maximum Validation Accuracy': [statistics[3]],
    }

    trial_dict = pd.DataFrame(trial_dict)
    trial_dict.to_csv(os.path.join('drive', 'MyDrive', 'GenImage', 'results',
                                   timestr, 'results.csv'), index=False, header=True)


if __name__ == '__main__':

    timestr = time.strftime("%Y%m%d-%H%M%S")

    checkpoint_path = os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr)
    print(f'All model checkpoints and training stats will be saved in {checkpoint_path}')


    losses = []
    val_losses = []
    train_accs, val_accs = [], []
    min_train_loss = None
    min_val_loss = None
    max_val_acc = None
    max_train_acc = None
    best_model = None
    epochs_ran = 0

    model = CLIPLarge("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
    model.to(device)

    print_trainable_parameters(model)

    # Load datasets
    train_dset = ImageDataset(config)
    val_dset = ImageDataset(
        config,
        d_type = 'val'
    )

    # Create Dataloaders
    train_dataloader = DataLoader(train_dset, shuffle=True, batch_size=config.batch_size)
    val_dataloader = DataLoader(val_dset, shuffle=True, batch_size=config.batch_size,
                                num_workers=config.num_workers)

    # Load checkpoint if neccesary:
    if config.load_checkpoint:

        print('Loading model from ' + config.checkpoint_file)

        # Load the model and stats from the checkpoint
        model.load_state_dict(torch.load(os.path.join('drive', 'MyDrive', 'GenImage', 'results', config.checkpoint_file,
                                                      'latest_model.pth')))
        best_model = deepcopy(model)
        best_model.load_state_dict(torch.load(os.path.join('drive', 'MyDrive', 'GenImage', 'results', config.checkpoint_file,
                                                          'latest_model.pth')))
        best_model = best_model.state_dict()

        with open(os.path.join('drive', 'MyDrive', 'GenImage', 'results', config.checkpoint_file, 'stats.json'), 'r') as f:
            stats = json.load(f)

        min_train_loss, min_val_loss, losses, val_losses, epochs_ran = stats['min train loss'], stats[
            'min val loss'], stats['losses'], stats['val losses'], stats['epochs']
        max_train_acc, max_val_acc = stats['max train acc'], stats['max val acc']
        train_accs, val_accs = stats['training accuracies'], stats['val accuracies']

        print(f'Minimum Training Loss: {min_train_loss}')
        print(f'Training Losses: {losses}')
        print(f'Minimum Validation Loss: {min_val_loss}')
        print(f'Validation Losses: {val_losses}')
        print(f'Training Accuracies: {train_accs}')
        print(f'Validation Accuracies: {val_accs}')
        print(f'Maximum Training Accuracy: {max_train_acc}')
        print(f'Maximum Validation Accuracy: {max_val_acc}')
        print(f'Epochs ran: {epochs_ran}')
        timestr = config.checkpoint_file
    else:
        os.mkdir(os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr))
        os.mkdir(os.path.join('drive', 'MyDrive', 'GenImage', 'results', timestr, 'Confusion Matrices'))

    # If loading a checkpoint, use the learning rate from the last epoch
    if config.load_checkpoint:
        lr = stats['learning rate']
    else:
        lr = config.learning_rate

    min_train_loss, min_val_loss, max_train_acc, max_val_acc = (
        train(min_train_loss, min_val_loss, max_train_acc,
              max_val_acc, best_model, epochs_ran, lr, train_accs, val_accs))
    statistics = [min_train_loss, min_val_loss, max_train_acc, max_val_acc]
    save_experiment(statistics)


All model checkpoints and training stats will be saved in drive/MyDrive/GenImage/results/20240526-234137


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Trainable params: 1839624 || all params: 633916424 || trainable%: 0.29019976929955676




-------------------- EPOCH 0 ---------------------


  0%|          | 0/16835 [00:00<?, ?it/s]

tensor([[ 0.0562,  0.1635, -0.1006,  0.0313, -0.0242,  0.0019,  0.0678, -0.2437],
        [ 0.0103,  0.0011, -0.0501,  0.0625,  0.3765,  0.1374, -0.0588,  0.0004],
        [ 0.1399,  0.0782, -0.2779,  0.0552, -0.1852, -0.0246,  0.0737,  0.0410],
        [-0.1244,  0.0162, -0.1230, -0.2458,  0.0628,  0.3535,  0.0146, -0.2200],
        [ 0.0867,  0.0827, -0.0317, -0.0377, -0.0407,  0.0491,  0.0343,  0.2602],
        [ 0.1890,  0.2289, -0.1104, -0.2284,  0.1069, -0.1153, -0.2524, -0.0986],
        [ 0.1512, -0.0608,  0.0149,  0.1743,  0.0256,  0.1058, -0.1376, -0.2482],
        [ 0.0026,  0.1878,  0.1849, -0.0472,  0.2179, -0.1278, -0.0229, -0.2066],
        [ 0.0474, -0.0644, -0.1596,  0.1355, -0.0967,  0.0032,  0.1255, -0.0084],
        [ 0.1813,  0.3699, -0.0209,  0.0117,  0.1431, -0.0824, -0.0089,  0.0265],
        [ 0.1673,  0.0627,  0.0998, -0.1201, -0.1954,  0.3441,  0.0732, -0.1796],
        [ 0.1728,  0.0697, -0.0678, -0.0490,  0.2044,  0.1700, -0.1279, -0.2943],
        [ 0.1264

  0%|          | 38/16835 [02:29<18:33:59,  3.98s/it]

In [None]:
from google.colab import runtime
runtime.unassign()

In [None]:
model

Make sure to comment out params lines 2318-2323 on /usr/local/lib/python3.10/dist-packages/peft/peft_model.py