In [1]:
# install for AWS
!pip install torch
!pip install pandas
!pip install scikit-image
!pip install scikit-learn
!pip install matplotlib
!pip install torchvision
!pip install s3fs
!pip install boto3
!pip install tqdm
!pip install fiftyone
!pip install pycocotools

[33mDEPRECATION: devscripts 2.20.2ubuntu2 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
[33mDEPRECATION: devscripts 2.20.2ubuntu2 has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of devscripts or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m

In [2]:
import os
import torch
import tarfile
import shutil
import torchvision
import random
import warnings
import boto3
import s3fs
import io
import time
import botocore.exceptions
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch.nn.functional as F
import getpass
import json

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, utils, models, datasets
from torch import nn, optim
from torch.optim import lr_scheduler
from io import BytesIO
from tqdm import tqdm
from skimage import io, transform
from PIL import Image
from pycocotools import mask as maskUtils

warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

import fiftyone as fo
import fiftyone.brain as fob
import fiftyone.zoo as foz
from fiftyone import ViewField as F

In [3]:
# Initialize S3 client
s3_client = boto3.client('s3', region_name='us-west-2')

In [4]:
# Define the S3 bucket name and prefixes
bucket_name = 'w210facetdata'
annotations_prefix = 'annotations/'
images_prefix = 'images/'

In [5]:
# Load CSV annotations from S3
annotations_s3_path = f's3://{bucket_name}/{annotations_prefix}'
gt_df = pd.read_csv(f'{annotations_s3_path}annotations.csv')

In [6]:
s3_client = boto3.client('s3', region_name='us-west-2')
bucket_name = 'w210facetdata'
images_prefix = 'images/'

local_images_dir = 'local_images_dir'
os.makedirs(local_images_dir, exist_ok=True)

# List the objects in the S3 bucket
objects = s3_client.list_objects(Bucket=bucket_name, Prefix=images_prefix)
for obj in objects.get('Contents', []):
    # Skip the prefix itself
    if obj['Key'] == images_prefix:
        continue
    local_file_path = os.path.join(local_images_dir, os.path.basename(obj['Key']))
    s3_client.download_file(bucket_name, obj['Key'], local_file_path)

In [7]:
fo.delete_dataset('local_images_dir')

dataset = fo.Dataset(name='local_images_dir', persistent=True)
dataset.add_images_dir(local_images_dir)
dataset.compute_metadata()



 100% |█████████████████| 999/999 [211.2ms elapsed, 0s remaining, 4.7K samples/s]      
Computing metadata...
 100% |█████████████████| 999/999 [2.7s elapsed, 0s remaining, 414.5 samples/s]      


In [8]:
existing_datasets = fo.list_datasets()
print(existing_datasets)

['FACET', 'local_images_dir']


In [9]:
print(dataset)

Name:        local_images_dir
Media type:  image
Num samples: 999
Persistent:  True
Tags:        []
Sample fields:
    id:       fiftyone.core.fields.ObjectIdField
    filepath: fiftyone.core.fields.StringField
    tags:     fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)


In [10]:
session = fo.launch_app(dataset)


In [11]:
BOOLEAN_PERSONAL_ATTRS = (
    "has_facial_hair",
    "has_tattoo",
    "has_cap",
    "has_mask",
    "has_headscarf",
    "has_eyeware",
)
def add_boolean_person_attributes(detection, row_index):
    for attr in BOOLEAN_PERSONAL_ATTRS:
        detection[attr] = gt_df.loc[row_index, attr].astype(bool)

In [12]:
def get_hairtype(row_index):
    hair_info = gt_df.loc[row_index, gt_df.columns.str.startswith('hairtype')]
    hairtype = hair_info[hair_info == 1]
    if len(hairtype) == 0:
        return None
    return hairtype.index[0].split('_')[1]
def get_haircolor(row_index):
    hair_info = gt_df.loc[row_index, gt_df.columns.str.startswith('hair_color')]
    haircolor = hair_info[hair_info == 1]
    if len(haircolor) == 0:
        return None
    return haircolor.index[0].split('_')[2]

In [13]:
def add_person_attributes(detection, row_index):
    detection["hairtype"] = get_hairtype(row_index)
    detection["haircolor"] = get_haircolor(row_index)
    add_boolean_person_attributes(detection, row_index)

In [14]:
def get_perceived_gender_presentation(row_index):
    gender_info = gt_df.loc[row_index, gt_df.columns.str.startswith('gender')]
    pgp = gender_info[gender_info == 1]
    if len(pgp) == 0:
        return None
    return pgp.index[0].replace("gender_presentation_", "").replace("_", " ")
def get_perceived_age_presentation(row_index):
    age_info = gt_df.loc[row_index, gt_df.columns.str.startswith('age')]
    pap = age_info[age_info == 1]
    if len(pap) == 0:
        return None
    return pap.index[0].split('_')[2]

In [15]:
def get_skintone(row_index):
    skin_info = gt_df.loc[row_index, gt_df.columns.str.startswith('skin_tone')]
    return skin_info.to_dict()

In [16]:
def add_protected_attributes(detection, row_index):
    detection["perceived_age_presentation"] = get_perceived_age_presentation(row_index)
    detection["perceived_gender_presentation"] = get_perceived_gender_presentation(row_index)
    detection["skin_tone"] = get_skintone(row_index)

In [17]:
VISIBILITY_ATTRS = ("visible_torso", "visible_face", "visible_minimal")


In [18]:
def get_lighting(row_index):
    lighting_info = gt_df.loc[row_index, gt_df.columns.str.startswith('lighting')]
    lighting = lighting_info[lighting_info == 1]
    if len(lighting) == 0:
        return None
    lighting = lighting.index[0].replace("lighting_", "").replace("_", " ")
    return lighting
def add_other_attributes(detection, row_index):
    detection["lighting"] = get_lighting(row_index)
    for attr in VISIBILITY_ATTRS:
        detection[attr] = gt_df.loc[row_index, attr].astype(bool)

In [19]:
def create_detection(row_index, sample):
    bbox_dict = json.loads(gt_df.loc[row_index, "bounding_box"])
    x, y, w, h = bbox_dict["x"], bbox_dict["y"], bbox_dict["width"], bbox_dict["height"]
    cat1, cat2 = bbox_dict["dict_attributes"]["cat1"], bbox_dict["dict_attributes"]["cat2"]
    person_id = gt_df.loc[row_index, "person_id"]
    img_width, img_height = sample.metadata.width, sample.metadata.height
    bounding_box = [x/img_width, y/img_height, w/img_width, h/img_height]
    detection = fo.Detection(
        label=cat1, 
        bounding_box=bounding_box,
        person_id=person_id,
        )
    if cat2 != 'none':
        detection["class2"] = cat2
    add_person_attributes(detection, row_index)
    add_protected_attributes(detection, row_index)
    add_other_attributes(detection, row_index)
    return detection

In [20]:
def add_ground_truth_labels(dataset):
    for sample in dataset.iter_samples(autosave=True, progress=True):
        sample_annos = gt_df[gt_df['filename'] == sample.filename]
        detections = []
        for row in sample_annos.iterrows():
            row_index = row[0]
            detection = create_detection(row_index, sample)
            detections.append(detection)
        sample["ground_truth"] = fo.Detections(detections=detections)
    dataset.add_dynamic_sample_fields()
## add all of the ground truth labels
add_ground_truth_labels(dataset)



 100% |█████████████████| 999/999 [13.1s elapsed, 0s remaining, 82.5 samples/s]      


In [None]:
def add_coco_masks_to_dataset(dataset):
    # coco_masks = json.load(open(f"{annotations_s3_path}/coco_masks.json", "r"))
    s3_client = boto3.client('s3')
    bucket_name = 'w210facetdata'
    object_key = 'annotations/coco_masks.json'
    s3_object = s3_client.get_object(Bucket=bucket_name, Key=object_key)
    coco_masks = json.load(s3_object['Body'])
    cmas = coco_masks["annotations"]
    FILENAME_TO_ID = {
        img["file_name"]: img["id"]
        for img in coco_masks["images"]
    }
    CAT_TO_LABEL = {cat["id"]: cat["name"] for cat in coco_masks["categories"]}
    for sample in dataset.iter_samples(autosave=True, progress=True):
        fn = sample.filename
        if fn not in FILENAME_TO_ID:
            continue
        img_id = FILENAME_TO_ID[fn]
        img_width, img_height = sample.metadata.width, sample.metadata.height
        sample_annos = [a for a in cmas if a["image_id"] == img_id]
        if len(sample_annos) == 0:
            continue
        coco_detections = []
        for ann in sample_annos:
            label = CAT_TO_LABEL[ann["category_id"]]
            bbox = ann['bbox']
            ann_id = ann['ann_id']
            person_id = ann['facet_person_id']
            mask = maskUtils.decode(ann["segmentation"])
            mask = Image.fromarray(255*mask)
            ## Change bbox to be in the format [x, y, x, y]
            bbox[2] = bbox[0] + bbox[2]
            bbox[3] = bbox[1] + bbox[3]
            ## Get the cropped image
            cropped_mask = np.array(mask.crop(bbox)).astype(bool)
            ## Convert to relative [x, y, w, h] coordinates
            bbox[2] = bbox[2] - bbox[0]
            bbox[3] = bbox[3] - bbox[1]
            bbox[0] = bbox[0]/img_width
            bbox[1] = bbox[1]/img_height
            bbox[2] = bbox[2]/img_width
            bbox[3] = bbox[3]/img_height
            new_detection = fo.Detection(
                label=label, 
                bounding_box=bbox,
                person_id=person_id,
                ann_id=ann_id,
                mask=cropped_mask,
                )
            coco_detections.append(new_detection)
        sample["coco_masks"] = fo.Detections(detections=coco_detections)
## add the masks
add_coco_masks_to_dataset(dataset)




  63% |██████████/------| 633/999 [40.7s elapsed, 21.0s remaining, 18.8 samples/s] 

In [None]:
yolov5 = foz.load_zoo_model('yolov5m-coco-torch')


In [None]:
from django.db.models import F


In [None]:
dataset.apply_model(yolov5, label_field="yolov5m")
### Just retain the "person" detections
people_view_values = dataset.filter_labels("yolov5m", F("label") == "person").values("yolov5m")
dataset.set_values("yolov5m", people_view_values)
dataset.save()

In [None]:
## get a list of all 52 classes
facet_classes = dataset.distinct("ground_truth.detections.label")
## instantiate a CLIP model with these classes
clip = foz.load_zoo_model(
    "clip-vit-base32-torch",
    text_prompt="A photo of a",
    classes=facet_classes,
)

In [None]:
patch_view = dataset.to_patches("ground_truth")
patch_view.apply_model(clip, label_field="clip")
dataset.save_view("patch_view", patch_view)

In [None]:

def _evaluate_detection_model(dataset, label_field):
    eval_key = "eval_" + label_field.replace("-", "_")
    dataset.evaluate_detections(label_field, "ground_truth", eval_key=eval_key, classwise=False)
    
    for sample in dataset.iter_samples(autosave=True, progress=True):
        for pred in sample[label_field].detections:
            iou_field = f"{eval_key}_iou"
            if iou_field not in pred:
                continue
            iou = pred[iou_field]
            for it in IOU_THRESHS:
                pred[f"{iou_field}_{str(it).replace('.', '')}"] = iou >= it

In [None]:
def _compute_detection_mAR(sample_collection, label_field):
    """Computes the mean average recall of the specified detection field.
    -- computed as the average over iou thresholds of the recall at
    each threshold.
    """
    eval_key = "eval_" + label_field.replace("-", "_")
    iou_recalls = []
    for it in IOU_THRESHS:
        field_str = f"{label_field}.detections.{eval_key}_iou_{str(it).replace('.', '')}"
        counts = sample_collection.count_values(field_str)
        tp, fn = counts.get(True, 0), counts.get(False, 0)
        recall = tp/float(tp + fn) if tp + fn > 0 else 0.0
        iou_recalls.append(recall)
    return np.mean(iou_recalls)

In [None]:
def get_concept_attr_detection_mAR(dataset, label_field, concept, attributes):
    sub_view = dataset.filter_labels("ground_truth", F("label") == concept)
    for attribute in attributes.items():
        if "skin_tone" in attribute[0]:
            sub_view = sub_view.filter_labels("ground_truth", F(f"skin_tone.{attribute[0]}") != 0)
        else:
            sub_view = sub_view.filter_labels(f"ground_truth", F(attribute[0]) == attribute[1])
    return _compute_detection_mAR(sub_view, label_field)

In [None]:
concept = 'gymnast'
attributes = {"hairtype": "curly", "haircolor": "black"}
get_concept_attr_detection_mAR(dataset, "yolov5m", concept, attributes)
## 0.875

In [None]:
def _evaluate_classification_model(dataset, prediction_field):
    patch_view = dataset.load_saved_view("patch_view")
    eval_key = "eval_" + prediction_field
    
    for sample in patch_view.iter_samples(progress=True):
        sample[eval_key] = (
            sample.ground_truth.label == sample[prediction_field].label
        )
        sample.save()
    dataset.save_view("patch_view", patch_view, overwrite=True)

In [None]:
def _compute_classification_recall(patch_collection, label_field):
    eval_key = "eval_" + label_field.split("_")[0]
    counts = patch_collection.count_values(eval_key)
    tp, fn = counts.get(True, 0), counts.get(False, 0)
    recall = tp/float(tp + fn) if tp + fn > 0 else 0.0
    return recall

In [None]:
def get_concept_attr_classification_recall(dataset, label_field, concept, attributes):
    patch_view = dataset.load_saved_view("patch_view")
    sub_patch_view = patch_view.match(F("ground_truth.label") == concept)
    for attribute in attributes.items():
        if "skin_tone" in attribute[0]:
            sub_patch_view = sub_patch_view.match(F(f"ground_truth.skin_tone.{attribute[0]}") != 0)
        else:
            sub_patch_view = sub_patch_view.match(F(f"ground_truth.{attribute[0]}") == attribute[1])
    return _compute_classification_recall(sub_patch_view, label_field)

In [None]:
get_concept_attr_classification_recall(dataset, "clip", concept, attribute)
## 0.6193353474320241

In [None]:
def get_concept_attr_recall(dataset, label_field, concept, attribute):
    if label_field in dataset.get_field_schema().keys():
        return get_concept_attr_detection_mAR(dataset, label_field, concept, attribute)
    else:
        return get_concept_attr_classification_recall(dataset, label_field, concept, attribute)

In [None]:
def compute_disparity(dataset, label_field, concept, attribute1, attribute2):
    recall1 = get_concept_attr_recall(dataset, label_field, concept, attribute1)
    recall2 = get_concept_attr_recall(dataset, label_field, concept, attribute2)
    return recall1 - recall2

In [None]:
attrs1 = {"hairtype": "curly"}
attrs2 = {"hairtype": "straight"}
for concept in ["astronaut", "singer", "judge", "student"]:
    disparity = compute_disparity(dataset, "clip", concept, attrs1, attrs2)     
    print(f"{concept}: {disparity}")
#### OUTPUT ####
## astronaut: -0.8269230769230769
## singer: -0.0008051529790660261
## judge: -0.06666666666666667
## student: 0.16279069767441856

# Bullshit


In [None]:
# Define the S3 bucket name and prefixes
bucket_name = 'w210facetdata'
annotations_prefix = 'annotations/'
image_dir = 'images/'

In [None]:
# Extracting unique class labels from the 'class1' column
classes = df['class1'].unique()

# Creating a mapping from index to class label
idx_to_class = {i: j for i, j in enumerate(classes)}

# Creating a reverse mapping from class label to index
class_to_idx = {value: key for key, value in idx_to_class.items()}

# Creating a mapping from index to annotation column name starting from the 7th column
idx_to_annot = {i: j for i, j in enumerate(df.columns[6:])}


In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
# Split data into train, validation, and test sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42)


In [None]:
# toggle on/off subsetting
# Define the number of samples you want in your subset
subset_size = 1000  # Adjust the size as needed

# Create a smaller subset of your dataset
train_data = train_data[:subset_size]
test_data = test_data[:subset_size]
val_data = val_data[:subset_size]

In [None]:
class CustomDataset(Dataset):
    def __init__(self, split_data, image_dir, transform=None):
        self.data = split_data
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = self.data.iloc[idx, 2]
        image_key = f'{images_prefix}{img_name}'  # Construct S3 object key
        # Load image from S3
        with s3.open(f'{bucket_name}/{image_key}', 'rb') as file:
            img_data = file.read()
        
        # Open the image directly from the byte stream using PIL
        image = Image.open(BytesIO(img_data))
        annotations = self.data.iloc[idx, 6:].values.astype(np.float16).reshape(-1, 1)
        label = class_to_idx[self.data.iloc[idx, 3]]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label), torch.from_numpy(annotations)

# Create custom datasets and data loaders
train_dataset = CustomDataset(split_data=train_data, image_dir=image_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(split_data=test_data, image_dir=image_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

val_dataset = CustomDataset(split_data=val_data, image_dir=image_dir, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
class DeepNN(nn.Module):
    def __init__(self, num_classes=52):
        super(DeepNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(65536, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
class LightNN(nn.Module):
    def __init__(self, num_classes=52):
        super(LightNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(32768, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
criterion_teacher = nn.CrossEntropyLoss()  # Loss for teacher model
criterion_student = nn.KLDivLoss()  # Knowledge distillation loss

# Instantiate the teacher and student models
teacher_model = DeepNN(num_classes=52).to('cuda')

student_model = LightNN(num_classes=52).to('cuda')


# Define optimizer for the student model
optimizer_student = optim.Adam(student_model.parameters(), lr=.01)
optimizer_teacher = optim.Adam(teacher_model.parameters(), lr=.01)

In [None]:
batch_size = 32
learning_rate = 0.001
fine_tune_learning_rate = learning_rate / 10
num_classes = 52
num_epochs = 2
fine_tune_epochs = 2
disparity_weight = 0.1
alpha = 0.5
temperature = 5.0
eps = 1e-8

In [None]:
torch.autograd.set_detect_anomaly(True)

# Lists to store loss values
kd_loss_values = []
ce_loss_values = []
disparity_loss_values = []
total_loss_values = []


# Training loop
for epoch in range(num_epochs):
    pbar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{num_epochs}')

    student_model.train()
    teacher_model.train()

    for images, labels, annotations in pbar:
        images, labels = images.to('cuda'), labels.to('cuda')
        
        optimizer_student.zero_grad()
        optimizer_teacher.zero_grad()

        # Forward pass
        teacher_outputs = teacher_model(images)
        student_outputs = student_model(images)

        # Calculate additional metrics including recall
        y_true = labels.cpu().numpy()
        y_pred_student = torch.argmax(student_outputs, dim=1).cpu().numpy()
        y_pred_teacher = torch.argmax(teacher_outputs, dim=1).cpu().numpy()

        # Calculate the Knowledge Distillation loss and Cross Entropy loss
        kd_loss = criterion_student(
            F.log_softmax(student_outputs / temperature, dim=1),  # Apply temperature scaling
            F.softmax(teacher_outputs / temperature, dim=1)+eps  # Apply temperature scaling
        )
        ce_loss = criterion_teacher(student_outputs, labels)
        
        # Append the loss values for plotting
        kd_loss_values.append(kd_loss.item())
        ce_loss_values.append(ce_loss.item())


        # Combine the losses
        total_loss = alpha * kd_loss + (1 - alpha) * ce_loss
        
        # Append the total loss value for plotting
        total_loss_values.append(total_loss.item())

        # Perform the backward pass
        total_loss.backward()

        # Optimize the models
        optimizer_student.step()
        optimizer_teacher.step()
        
        # Output the loss values
        print(f'KD Loss: {kd_loss.item()}')
        print(f'CE Loss: {ce_loss.item()}')
        print(f'Total Loss: {total_loss.item()}')

    # Step the learning rate scheduler
    optimizer_student.step()
    optimizer_teacher.step()

# Disable anomaly detection when done
torch.autograd.set_detect_anomaly(False)