In [None]:
# %% Loading libraries
import os
import sys
import argparse
import time
import datetime
import random

from PIL import Image
from pathlib import Path
from collections import Counter

import numpy as np
import pandas as pd

import torch

from sklearn.model_selection import train_test_split

import lightning.pytorch as pl
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import WandbLogger

import timm

# Custom library
sys.path.append('../process/')
from imageUtilities import load_images_and_labels
from loadData import ImageDataModule

sys.path.append('../architectures/')
from visionClassifierLayer import PreTrainedVisionModel

import warnings
warnings.filterwarnings('ignore')

In [68]:
# Define the dictionary of arguments
args_dict = {
    "model_name_or_path": "vgg16",
    "logged_entry_name": "vgg16-seed:1111",
    "data_dir": "/workspace/persistent/HTClipper/data/processed",
    "data_type": "all",
    "city": "south",
    "save_dir": "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines",
    "model_dir_name": None,
    "batch_size": 32,
    "nb_epochs": 40,
    "patience": 3,
    "seed": 1111,
    "warmup_steps": 0,
    "grad_steps": 1,
    "learning_rate": 6e-4,
    "train_data_percentage": 1.0,
    "adam_epsilon": 1e-6,
    "min_delta_change": 0.01,
    "weight_decay": 0.01,
    "augment_data": False,
    "nb_augmented_samples": 1
}

# Convert the dictionary to an argparse Namespace
args = argparse.Namespace(**args_dict)

In [69]:
# Setting seed value for reproducibility    
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(args.seed)
random.seed(args.seed)
os.environ['PYTHONHASHSEED'] = str(args.seed)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
seed_everything(args.seed)

# Making sure that the input variables are right
assert args.data_type in ["all"]
assert args.city in ["midwest", "northeast", "south", "west"]
assert args.model_name_or_path in ['vgg16', 'vgg19', "resnet50", "resnet101", "resnet152", "mobilenet", "mobilenetv2", "densenet121", "densenet169", 
                                "efficientnet-b0", "efficientnet-b1", "efficientnet-b2", "efficientnet-b3", "efficientnet-b4", "efficientnet-b5", "efficientnet-b6",
                                "efficientnet-b7", "efficientnetv2_rw_m", "efficientnetv2_rw_s", "efficientnetv2_rw_t", "convnext_tiny", "convnext_small", 
                                "convnext_base", "convnext_large", "convnext_xlarge", "vit_base_patch16_224", "vit_large_patch16_224", "vit_base_patch32_224", 
                                "vit_large_patch32_224", "inception_v3", "inception_resnet_v2" ]

# Creating directories
if args.model_dir_name == None:
    directory = os.path.join(args.save_dir, args.model_name_or_path.split("/")[-1], args.city, args.data_type, 
                            "seed:" + str(args.seed), "lr-" + str(args.learning_rate))
else:
    directory = os.path.join(args.save_dir, args.model_name_or_path.split("/")[-1], args.city, args.data_type, 
                            "seed:" + str(args.seed), "lr-" + str(args.learning_rate) + "-" + args.model_dir_name)
Path(directory).mkdir(parents=True, exist_ok=True)
Path(args.save_dir).mkdir(parents=True, exist_ok=True)

# %% Loading dataset
# Map city and data_type combinations to file paths
file_paths = {
    "chicago": {
        "faces": "chicago_faces.csv",
        "nofaces": "chicago_nofaces.csv",
        "all": "chicago_images.csv"
    },
    "all": {
        "faces": "all_faces.csv",
        "nofaces": "all_nofaces.csv",
        "all": "all_images.csv"
    },
    "south": {
        "all": "south_images.csv"
    }
}

# Construct the file path and read the CSV file
file_path = os.path.join(args.data_dir, file_paths[args.city][args.data_type])
df = pd.read_csv(file_path)

# Removing vendors that have less than 2 ads
vendors_of_interest = {k:v for k, v in dict(Counter(df.VENDOR)).items() if v>1}
df = df[df['VENDOR'].isin(list(vendors_of_interest.keys()))]

# Remapping new vendor ids
all_vendors = df.VENDOR.unique()
vendor_to_idx_dict = {vendor: idx for idx, vendor in enumerate(all_vendors)}
df["VENDOR"] = df["VENDOR"].replace(vendor_to_idx_dict)

num_classes = df.VENDOR.nunique()
assert df['VENDOR'].min() >= 0 and df['VENDOR'].max() < num_classes

Seed set to 1111


In [70]:
directory

'/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/vgg16/south/all/seed:1111/lr-0.0006'

In [72]:
# %% Load and preprocess images
# The target size is fixed to 224x224 for a fair comparison with the ViT models.
# Turn the augment parameter to True only if you want to perform augmentation for the entire dataset
# Otherwise, the augmentation to training data only is implemented in the ImageDataModule class
images, labels = load_images_and_labels(df, target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)
assert images.shape[0] == labels.shape[0]

# %% Split data
# Split ratio is set to 0.20 between training and test data, and 0.05 between training and val data
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, random_state=1111, stratify=labels)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05, random_state=1111, stratify=y_train)

# %% Instantiate DataModule and Model
if args.augment_data == True:
    data_module = ImageDataModule(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=args.batch_size, augment_data=args.augment_data, num_augmented_samples=args.nb_augmented_samples)
else:
    data_module = ImageDataModule(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=args.batch_size, augment_data=args.augment_data)

data_module.setup()

In [73]:
# %% Loading 
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score

import lightning.pytorch as pl

import timm

class PreTrainedVisionModel(pl.LightningModule):
    def __init__(self, model_name, num_classes, learning_rate=1e-4):
        super().__init__()
        self.model = self.load_model(model_name, num_classes)
        self.learning_rate = learning_rate
        self.criterion = nn.CrossEntropyLoss()
        self.model_name = model_name

    def load_model(self, model_name, num_classes):
        model = timm.create_model(model_name, pretrained=True)
        
        if 'efficientnet' in model_name or 'efficientnetv2' in model_name:
            num_ftrs = model.classifier.in_features
            model.classifier = nn.Linear(num_ftrs, num_classes)
        elif 'convnext' in model_name:
            num_ftrs = model.head.fc.in_features
            model.head.fc = nn.Linear(num_ftrs, num_classes)
        elif 'vit' in model_name:
            num_ftrs = model.head.in_features
            model.head = nn.Linear(num_ftrs, num_classes)
        elif 'vgg' in model_name or 'densenet' in model_name:
            model.reset_classifier(num_classes=num_classes)
        else:
            num_ftrs = model.get_classifier().in_features
            model.fc = nn.Linear(num_ftrs, num_classes)
        
        return model

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        preds = torch.argmax(y_hat, dim=1)
        metrics = self.compute_metrics(preds, y, 'train')
        metrics['train_loss'] = loss
        self.log_dict(metrics)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        preds = torch.argmax(y_hat, dim=1)
        metrics = self.compute_metrics(preds, y, 'val')
        metrics['val_loss'] = loss
        self.log_dict(metrics)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        preds = torch.argmax(y_hat, dim=1)
        metrics = self.compute_metrics(preds, y, 'test')
        metrics['test_loss'] = loss
        self.log_dict(metrics)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def compute_metrics(self, preds, labels, stage):
        acc = accuracy_score(labels.cpu(), preds.cpu())
        precision = precision_score(labels.cpu(), preds.cpu(), average='micro')
        recall = recall_score(labels.cpu(), preds.cpu(), average='micro')
        f1 = f1_score(labels.cpu(), preds.cpu(), average='micro')
        balanced_acc = balanced_accuracy_score(labels.cpu(), preds.cpu())
        macro_f1 = f1_score(labels.cpu(), preds.cpu(), average='macro')
        weighted_f1 = f1_score(labels.cpu(), preds.cpu(), average='weighted')
        
        return {
            f'{stage}_accuracy': acc,
            f'{stage}_precision': precision,
            f'{stage}_recall': recall,
            f'{stage}_f1': f1,
            f'{stage}_balanced_accuracy': balanced_acc,
            f'{stage}_macro_f1': macro_f1,
            f'{stage}_weighted_f1': weighted_f1,
        }
    
    def extract_features(self, x):
        model_name = self.model_name.lower()

        if 'vgg' in model_name:
            # VGG16
            x = self.model.features(x)
            x = F.adaptive_avg_pool2d(x, (1, 1))
            features = torch.flatten(x, 1)  # Shape: [batch_size, 512]

        elif 'resnet' in model_name:
            # ResNet50
            x = self.model.forward_features(x)
            x = self.model.global_pool(x)
            features = torch.flatten(x, 1)  # Shape: [batch_size, 2048]

        elif 'densenet' in model_name:
            # Densenet121
            x = self.model.features(x)
            x = F.relu(x, inplace=True)
            x = F.adaptive_avg_pool2d(x, (1, 1))
            features = torch.flatten(x, 1)  # Shape: [batch_size, 1024]

        elif 'efficientnet' in model_name:
            # EfficientNetV2_RW_S
            x = self.model.forward_features(x)
            x = self.model.global_pool(x)
            features = torch.flatten(x, 1)  # Shape depends on the variant

        elif 'convnext' in model_name:
            # ConvNeXt_Small
            x = self.model.forward_features(x)
            x = self.model.norm_pre(x)
            x = x.mean(dim=(2, 3))  # Global average pooling
            features = x  # Shape: [batch_size, 768]

        elif 'vit' in model_name:
            # ViT_Base_Patch16_224
            x = self.model.forward_features(x)
            features = x[:, 0]  # Use the [CLS] token representation

        elif 'inception' in model_name:
            # Inception_v3
            x = self.model.forward_features(x)
            x = self.model.global_pool(x)
            features = torch.flatten(x, 1)  # Shape: [batch_size, 2048]

        else:
            # Default method
            x = self.model.forward_features(x)
            x = self.model.global_pool(x)
            features = torch.flatten(x, 1)

        return features

In [74]:
def load_pretrained_checkpoint(model_name):
    if model_name == "vgg16":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/vgg16/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"
    
    elif model_name == "vit_base_patch16_224":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/vit_base_patch16_224/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"
    
    elif model_name == "resnet50":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/resnet50/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"

    elif model_name == "densenet121":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/densenet121/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"

    elif model_name == "efficientnetv2_rw_s":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/efficientnetv2_rw_s/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"

    elif model_name == "convnext_small":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/convnext_small/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"

    elif model_name == "inception_v3":
        checkpoint = "/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/inception_v3/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt"

    else:
        raise Exception("Model not trained....")
        
    return checkpoint

In [75]:
def load_pretrained_model(model_name):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Instantiate the model architecture
    model = PreTrainedVisionModel(
        model_name=model_name,      # e.g., 'vgg16'
        num_classes=num_classes,      # Replace with the actual number of classes
    )

    # Load the state dictionary
    state_dict = torch.load(load_pretrained_checkpoint(model_name), map_location=device)

    # Load the state dict into the model
    model.load_state_dict(state_dict, strict=False)
    return model

In [76]:
model = PreTrainedVisionModel.load_from_checkpoint("/workspace/persistent/HTClipper/models/grouped-and-masked/image-baselines/vit_base_patch16_224/south/all/seed:1111/lr-0.0001-CE/final_model.ckpt", 
                                                  model_name="vit_base_patch16_224", num_classes=num_classes).eval()

In [77]:
from tqdm import tqdm

def save_embeddings(city, model, model_name, args=args):
    # Construct the file path and read the CSV file
    file_path = os.path.join(args.data_dir, f"{city}_images.csv")
    df = pd.read_csv(file_path)
    
    # Removing vendors that have less than 2 ads
    vendors_of_interest = {k:v for k, v in dict(Counter(df.VENDOR)).items() if v>1}
    df = df[df['VENDOR'].isin(list(vendors_of_interest.keys()))]
    
    # Remapping new vendor ids
    all_vendors = df.VENDOR.unique()
    vendor_to_idx_dict = {vendor: idx for idx, vendor in enumerate(all_vendors)}
    df["VENDOR"] = df["VENDOR"].replace(vendor_to_idx_dict)

    # %% Load and preprocess images
    # The target size is fixed to 224x224 for a fair comparison with the ViT models.
    # Turn the augment parameter to True only if you want to perform augmentation for the entire dataset
    # Otherwise, the augmentation to training data only is implemented in the ImageDataModule class
    images, labels = load_images_and_labels(df, target_size=(224, 224), augment=False,
                                             num_augmented_samples=args.nb_augmented_samples)
    assert images.shape[0] == labels.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, random_state=1111)
    data_module = ImageDataModule(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=args.batch_size, augment_data=False)
    data_module.setup()
    
    train_dm = data_module.train_dataloader()
    test_dm = data_module.test_dataloader()
    
    all_embeddings = []
    all_labels = []
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    with torch.no_grad():
        for images, labels in tqdm(train_dm, desc="Extracting Train embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    train_embeddings = torch.cat(all_embeddings)
    train_labels = torch.cat(all_labels)
    file_dir = f"/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/vision_baselines/trained_{model_name}"
    Path(file_dir).mkdir(parents=True, exist_ok=True)
    
    torch.save(train_embeddings, os.path.join(file_dir, f"{city}_visiondata_train.pt"))
    torch.save(train_labels, os.path.join(file_dir, f"{city}_visionlabels_train.pt"))
    
    all_embeddings = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(test_dm, desc="Extracting test embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    test_embeddings = torch.cat(all_embeddings)
    test_labels = torch.cat(all_labels)
    
    torch.save(test_embeddings, os.path.join(file_dir, f"{city}_visiondata_test.pt"))
    torch.save(test_labels, os.path.join(file_dir, f"{city}_visionlabels_test.pt"))
    
    # return train_embeddings, train_labels, test_embeddings, test_labels

In [None]:
for model_name in ["vgg16",  "resnet50",  "densenet121",  "efficientnetv2_rw_s",  "convnext_small",  "vit_base_patch16_224",  "inception_v3"]:
    print(f"---------------------------------------{model_name}---------------------------------------------------")
    model = load_pretrained_model(model_name)
    model.eval()

    for region in ["south", "northeast", "west", "midwest"]:
        print(f"Region:{region}")
        save_embeddings(region, model=model, model_name=model_name, args=args)

In [93]:
import timm
import torch

# List of model names
model_names = [
    "vgg16", 
    "resnet50", 
    "densenet121", 
    "efficientnetv2_rw_s", 
    "convnext_small", 
    "vit_base_patch16_224", 
    "inception_v3"
]

# Function to calculate total trainable parameters
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Dictionary to store model parameters
model_parameters = {}

# Calculate parameters for each model
for name in model_names:
    model = timm.create_model(name, pretrained=True, num_classes=1000)  # Adjust num_classes as needed
    model_parameters[name] = count_trainable_parameters(model)

# Print model parameters
for name, params in model_parameters.items():
    print(f"{name}: {params:,} trainable parameters")

vgg16: 138,357,544 trainable parameters
resnet50: 25,557,032 trainable parameters
densenet121: 7,978,856 trainable parameters
efficientnetv2_rw_s: 23,941,296 trainable parameters
convnext_small: 50,223,688 trainable parameters
vit_base_patch16_224: 86,567,656 trainable parameters
inception_v3: 23,834,568 trainable parameters


# Generating true positive and false positive data

In [14]:
model = load_pretrained_model("vit_base_patch16_224").eval()

In [9]:
import lightning as L
import lightning.pytorch as pl
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

trainer = L.Trainer(max_epochs=32, accelerator="gpu", fast_dev_run=False, 
                    accumulate_grad_batches = 4, # To run the backward step after n batches, helps to increase the batch size
                    benchmark = True, # Fastens the training process
                    deterministic=True, # Ensures reproducibility 
                    limit_train_batches=1.0, # trains on 10% of the data,
                    check_val_every_n_epoch = 1, # run val loop every 1 training epochs
                    # callbacks=[model_checkpoint, early_stop_callback], # Enables model checkpoint and early stopping
                    # callbacks=[early_stop_callback],
                    # logger = wandb_logger,
                    # strategy=DeepSpeedStrategy(stage=3, offload_optimizer=True, offload_parameters=True, offload_params_device='cpu'), # Enable CPU Offloading, and offload parameters to CPU
                    # plugins=DeepSpeedPrecisionPlugin(precision='16-mixed') # Mixed Precision system
                    precision='16-mixed' # Mixed Precision system
                    )

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


In [24]:
trainer.test(model=model, dataloaders=data_module.test_dataloader())

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 410/410 [00:10<00:00, 38.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.7506102919578552
 test_balanced_accuracy     0.7459064722061157
         test_f1            0.7506102919578552
        test_loss           1.7013839483261108
      test_macro_f1         0.6042097806930542
     test_precision         0.7506102919578552
       test_recall          0.7506102919578552
    test_weighted_f1        0.7493311166763306
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_accuracy': 0.7506102919578552,
  'test_precision': 0.7506102919578552,
  'test_recall': 0.7506102919578552,
  'test_f1': 0.7506102919578552,
  'test_balanced_accuracy': 0.7459064722061157,
  'test_macro_f1': 0.6042097806930542,
  'test_weighted_f1': 0.7493311166763306,
  'test_loss': 1.7013839483261108}]

# Saving TP and FP results from model predictions

In [None]:
from tqdm import tqdm

# Assuming 'pred' and 'actual' are lists intended to collect predictions and actual labels
pred, actual = ([] for i in range(2))

train_dm = data_module.train_dataloader()
# test_dm = data_module.test_dataloader()

# Iterate over the test dataloader with a tqdm progress bar
for images, labels in tqdm(train_dm, desc="Extracting Train predictions"):    
    outputs = model(images)
    preds = torch.argmax(outputs, dim=1)
    
    # Append predictions and labels to their respective lists
    pred.append(preds.cpu().numpy())
    actual.append(labels.cpu().numpy())

In [39]:
train_pred_labels = [int(item) for array in pred for item in array]
train_actual_labels = [int(item) for array in actual for item in array]

In [28]:
import numpy as np
from sklearn.metrics import f1_score

f1_score(train_actual_labels, train_pred_labels, average='macro')

np.float64(0.6070953204108801)

In [30]:
test_pred_labels = [int(item) for array in pred for item in array]
test_actual_labels = [int(item) for array in actual for item in array]

In [40]:
len(train_actual_labels), len(test_actual_labels)

(49809, 13108)

In [41]:
import pickle

with open('../error_analysis/vit_text_train_class_freq.pkl', 'wb') as f:
    pickle.dump(train_actual_labels, f)
    
with open('../error_analysis/vit_classification_text_test_pred_labels.pkl', 'wb') as f:
    pickle.dump(test_pred_labels, f)
    
with open('../error_analysis/vit_classification_text_test_act_labels.pkl', 'wb') as f:
    pickle.dump(test_actual_labels, f)

In [27]:
len(train_pred_labels) == len(train_actual_labels)

True

# Saving TP and FP results for images with and without faces

In [21]:
df = pd.read_csv("../data/processed/south_images.csv")

# Removing vendors that have less than 2 ads
vendors_of_interest = {k:v for k, v in dict(Counter(df.VENDOR)).items() if v>1}
df = df[df['VENDOR'].isin(list(vendors_of_interest.keys()))]

# Remapping new vendor ids
all_vendors = df.VENDOR.unique()
vendor_to_idx_dict = {vendor: idx for idx, vendor in enumerate(all_vendors)}
df["VENDOR"] = df["VENDOR"].replace(vendor_to_idx_dict)

num_classes = df.VENDOR.nunique()
assert df['VENDOR'].min() >= 0 and df['VENDOR'].max() < num_classes

In [30]:
train_df, test_df = train_test_split(df, test_size=0.20, random_state=1111, stratify=df['VENDOR'])
train_df, val_df = train_test_split(train_df, test_size=0.05, random_state=1111, stratify=train_df['VENDOR'])

In [44]:
train_face_vendor_dict = dict(Counter(train_df[train_df['IF_FACE'] == "yes"]['VENDOR']))
train_noface_vendor_dict = dict(Counter(train_df[train_df['IF_FACE'] == "no"]['VENDOR']))

In [45]:
import pickle

with open('../error_analysis/vit_face_train_class_freq.pkl', 'wb') as f:
    pickle.dump(train_face_vendor_dict, f)
    
with open('../error_analysis/vit_noface_train_class_freq.pkl', 'wb') as f:
    pickle.dump(train_noface_vendor_dict, f)

In [46]:
# Faces Dataset
train_images, train_labels = load_images_and_labels(train_df, target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

val_images, val_labels = load_images_and_labels(val_df, target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

test_images, test_labels = load_images_and_labels(test_df[test_df['IF_FACE'] == "yes"], target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

In [47]:
data_module = ImageDataModule(train_images, train_labels, val_images, val_labels, test_images, test_labels, batch_size=args.batch_size, augment_data=args.augment_data)

In [49]:
data_module.setup()

In [54]:
from tqdm import tqdm

# Assuming 'pred' and 'actual' are lists intended to collect predictions and actual labels
pred, actual = ([] for i in range(2))

# train_dm = data_module.train_dataloader()
test_dm = data_module.test_dataloader()

# Iterate over the test dataloader with a tqdm progress bar
for images, labels in tqdm(test_dm, desc="Extracting Train predictions"):    
    outputs = model(images)
    preds = torch.argmax(outputs, dim=1)
    
    # Append predictions and labels to their respective lists
    pred.append(preds.cpu().numpy())
    actual.append(labels.cpu().numpy())

Extracting Train predictions: 100%|██████████| 202/202 [01:42<00:00,  1.98it/s]


In [55]:
test_pred_labels = [int(item) for array in pred for item in array]
test_actual_labels = [int(item) for array in actual for item in array]

In [56]:
with open('../error_analysis/vit_faceclassification_text_test_pred_labels.pkl', 'wb') as f:
    pickle.dump(test_pred_labels, f)
    
with open('../error_analysis/vit_faceclassification_text_test_act_labels.pkl', 'wb') as f:
    pickle.dump(test_actual_labels, f)

In [60]:
test_pred_labels[:10], test_actual_labels[:10]

([1033, 309, 1228, 249, 876, 905, 0, 404, 1, 214],
 [1033, 309, 1228, 249, 876, 31, 0, 404, 1, 214])

In [61]:
# Faces Dataset
train_images, train_labels = load_images_and_labels(train_df, target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

val_images, val_labels = load_images_and_labels(val_df, target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

test_images, test_labels = load_images_and_labels(test_df[test_df['IF_FACE'] == "yes"], target_size=(224, 224), augment=False,
                                         num_augmented_samples=args.nb_augmented_samples)

In [62]:
data_module = ImageDataModule(train_images, train_labels, val_images, val_labels, test_images, test_labels, batch_size=args.batch_size, augment_data=args.augment_data)
data_module.setup()

In [63]:
from tqdm import tqdm

# Assuming 'pred' and 'actual' are lists intended to collect predictions and actual labels
pred, actual = ([] for i in range(2))

# train_dm = data_module.train_dataloader()
test_dm = data_module.test_dataloader()

# Iterate over the test dataloader with a tqdm progress bar
for images, labels in tqdm(test_dm, desc="Extracting Train predictions"):    
    outputs = model(images)
    preds = torch.argmax(outputs, dim=1)
    
    # Append predictions and labels to their respective lists
    pred.append(preds.cpu().numpy())
    actual.append(labels.cpu().numpy())

Extracting Train predictions: 100%|██████████| 208/208 [01:30<00:00,  2.30it/s]


In [64]:
test_pred_labels = [int(item) for array in pred for item in array]
test_actual_labels = [int(item) for array in actual for item in array]

In [65]:
with open('../error_analysis/vit_nofaceclassification_text_test_pred_labels.pkl', 'wb') as f:
    pickle.dump(test_pred_labels, f)
    
with open('../error_analysis/vit_nofaceclassification_text_test_act_labels.pkl', 'wb') as f:
    pickle.dump(test_actual_labels, f)

# Getting the TP and FP embeddings

In [None]:
df_temp = pd.read_csv("../data/processed/south")

In [None]:
from tqdm import tqdm

def save_embeddings(city, model, model_name, args=args):
    # Construct the file path and read the CSV file
    file_path = os.path.join(args.data_dir, f"{city}_images.csv")
    df = pd.read_csv(file_path)
    
    # Removing vendors that have less than 2 ads
    vendors_of_interest = {k:v for k, v in dict(Counter(df.VENDOR)).items() if v>1}
    df = df[df['VENDOR'].isin(list(vendors_of_interest.keys()))]
    
    # Remapping new vendor ids
    all_vendors = df.VENDOR.unique()
    vendor_to_idx_dict = {vendor: idx for idx, vendor in enumerate(all_vendors)}
    df["VENDOR"] = df["VENDOR"].replace(vendor_to_idx_dict)

    # %% Load and preprocess images
    # The target size is fixed to 224x224 for a fair comparison with the ViT models.
    # Turn the augment parameter to True only if you want to perform augmentation for the entire dataset
    # Otherwise, the augmentation to training data only is implemented in the ImageDataModule class
    images, labels = load_images_and_labels(df, target_size=(224, 224), augment=False,
                                             num_augmented_samples=args.nb_augmented_samples)
    assert images.shape[0] == labels.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, random_state=1111)
    
    data_module = ImageDataModule(X_train, y_train, X_val, y_val, X_test, y_test, batch_size=args.batch_size, augment_data=False)
    data_module.setup()
    
    train_dm = data_module.train_dataloader()
    test_dm = data_module.test_dataloader()
    
    all_embeddings = []
    all_labels = []
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    with torch.no_grad():
        for images, labels in tqdm(train_dm, desc="Extracting Train embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    train_embeddings = torch.cat(all_embeddings)
    train_labels = torch.cat(all_labels)
    file_dir = f"/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/vision_baselines/trained_{model_name}"
    Path(file_dir).mkdir(parents=True, exist_ok=True)
    
    torch.save(train_embeddings, os.path.join(file_dir, f"{city}_visiondata_train.pt"))
    torch.save(train_labels, os.path.join(file_dir, f"{city}_visionlabels_train.pt"))
    
    all_embeddings = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(test_dm, desc="Extracting test embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    test_embeddings = torch.cat(all_embeddings)
    test_labels = torch.cat(all_labels)
    
    torch.save(test_embeddings, os.path.join(file_dir, f"{city}_visiondata_test.pt"))
    torch.save(test_labels, os.path.join(file_dir, f"{city}_visionlabels_test.pt"))
    
    # return train_embeddings, train_labels, test_embeddings, test_labels

# Retrieval Faces and NoFaces embeddings

In [None]:
from tqdm import tqdm

def save_face_embeddings(city, model, model_name, mode="face", args=args):
    # Construct the file path and read the CSV file
    file_path = os.path.join(args.data_dir, f"{city}_images.csv")
    df = pd.read_csv(file_path)
    
    # Removing vendors that have less than 2 ads
    vendors_of_interest = {k:v for k, v in dict(Counter(df.VENDOR)).items() if v>1}
    df = df[df['VENDOR'].isin(list(vendors_of_interest.keys()))]
    
    # Remapping new vendor ids
    all_vendors = df.VENDOR.unique()
    vendor_to_idx_dict = {vendor: idx for idx, vendor in enumerate(all_vendors)}
    df["VENDOR"] = df["VENDOR"].replace(vendor_to_idx_dict)
    
    train_df, test_df = train_test_split(df, test_size=0.20, random_state=1111, stratify=df['VENDOR'])
    train_df, val_df = train_test_split(train_df, test_size=0.05, random_state=1111, stratify=train_df['VENDOR'])
    
    # Faces Dataset
    train_images, train_labels = load_images_and_labels(train_df, target_size=(224, 224), augment=False,
                                             num_augmented_samples=args.nb_augmented_samples)

    val_images, val_labels = load_images_and_labels(val_df, target_size=(224, 224), augment=False,
                                             num_augmented_samples=args.nb_augmented_samples)
    
    if mode == "face":
        test_df = test_df[test_df['IF_FACE'] == "yes"]
    else:
        test_df = test_df[test_df['IF_FACE'] == "yes"]

    test_images, test_labels = load_images_and_labels(test_df, target_size=(224, 224), augment=False, num_augmented_samples=args.nb_augmented_samples)
    data_module = ImageDataModule(train_images, train_labels, val_images, val_labels, test_images, test_labels, batch_size=args.batch_size, augment_data=args.augment_data)
    data_module.setup()
    
    train_dm = data_module.train_dataloader()
    test_dm = data_module.test_dataloader()
    
    all_embeddings = []
    all_labels = []
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    with torch.no_grad():
        for images, labels in tqdm(train_dm, desc="Extracting Train embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    train_embeddings = torch.cat(all_embeddings)
    train_labels = torch.cat(all_labels)
    file_dir = f"/workspace/persistent/HTClipper/models/pickled/embeddings/grouped-and-masked/error_analysis/vision_baselines/trained_{model_name}/{mode}"
    Path(file_dir).mkdir(parents=True, exist_ok=True)
    
    torch.save(train_embeddings, os.path.join(file_dir, f"{city}_visiondata_train.pt"))
    torch.save(train_labels, os.path.join(file_dir, f"{city}_visionlabels_train.pt"))
    
    all_embeddings = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(test_dm, desc="Extracting test embeddings"):
            images = images.to(device)
            labels = labels.to(device)

            embeddings = model.extract_features(images)
            all_embeddings.append(embeddings.cpu())
            all_labels.append(labels.cpu())

    test_embeddings = torch.cat(all_embeddings)
    test_labels = torch.cat(all_labels)
    
    torch.save(test_embeddings, os.path.join(file_dir, f"{city}_visiondata_test.pt"))
    torch.save(test_labels, os.path.join(file_dir, f"{city}_visionlabels_test.pt"))
    
    # return train_embeddings, train_labels, test_embeddings, test_labels

In [None]:
for model_name in ["vit_base_patch16_224"]:
    print(f"---------------------------------------{model_name}---------------------------------------------------")
    model = load_pretrained_model(model_name)
    model.eval()

    for region in ["south", "northeast", "west", "midwest"]:
        print(f"Region:{region}")
        save_embeddings(region, model=model, model_name=model_name, args=args)

In [80]:
df_temp = pd.read_csv("../data/processed/northeast_images.csv")

In [81]:
df_temp

Unnamed: 0.1,Unnamed: 0,ID,IMAGE,VENDOR,IF_FACE
0,0,45885,/workspace/persistent/HTClipper/data/IMAGES/ny...,538,yes
1,1,45885,/workspace/persistent/HTClipper/data/IMAGES/ny...,538,yes
2,2,45885,/workspace/persistent/HTClipper/data/IMAGES/ny...,538,no
3,3,45886,/workspace/persistent/HTClipper/data/IMAGES/ny...,539,yes
4,4,45886,/workspace/persistent/HTClipper/data/IMAGES/ny...,539,yes
...,...,...,...,...,...
14742,14742,58748,/workspace/persistent/HTClipper/data/IMAGES/ny...,671,yes
14743,14743,58748,/workspace/persistent/HTClipper/data/IMAGES/ny...,671,yes
14744,14744,58748,/workspace/persistent/HTClipper/data/IMAGES/ny...,671,yes
14745,14745,58748,/workspace/persistent/HTClipper/data/IMAGES/ny...,671,yes
