# Facial prediction: fine tuning and an attempt using wandb

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for i,filename in enumerate(filenames):
        print(os.path.join(dirname, filename))
        if i>=10:
            break
        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo ... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

## Content of the notebook
1. Beauty rate using pretrained model ViT on hugging face Transformers
2. denoise, refine the photo
3. generate ID photo

##  Import dataset, preprocessing, Composing dataloader

In [None]:
from tqdm.notebook import tqdm

BASE_DIR = "/kaggle/input/scut-fbp5500-v2-facial-beauty-scores"
data=[]

with open(f'{BASE_DIR}/labels.txt', 'r',encoding='utf-8') as labels_file:
    labels = labels_file.readlines()
#     print(labels)
    for label in tqdm(labels):
        row = label.rstrip('\n').split(' ')
        data.append(row)
        
df=pd.DataFrame(data, columns =['filename','beauty_rate'])

In [None]:
import torchvision
IMAGE_DIR = BASE_DIR +'/Images/Images/'

image_tensor = torchvision.io.read_image(IMAGE_DIR+df.iloc[1,0])
image_tensor.shape

In [None]:
## sample a few images to show
import matplotlib.pyplot as plt
from PIL import Image
import math
import torch


def plot_beauty(df, img_dir, num=5 ,random_seed=42):
    fig = plt.figure(figsize=(9,9))
    rows, cols = math.ceil(num/3) ,3
    torch.manual_seed(random_seed)
    for i in range(num):
        id = torch.randint(0,len(df),size=[1]).item()
        img_path, label = f'{img_dir}/{df.iloc[id,0]}', df.iloc[id,1]
        fig.add_subplot(rows, cols, i+1)
        im = Image.open(img_path)
        plt.imshow(im)
        plt.title('beauty label:'+label)
        plt.axis(False)
    

In [None]:
plot_beauty(df, IMAGE_DIR, random_seed=68)

In [None]:
from torch.utils.data import Dataset, DataLoader, Subset
import os


class FaceData(Dataset):
    def __init__(self, df, img_dir, transform):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        filename, label = self.df.iloc[idx].values
        img_path = os.path.join(self.img_dir, filename)
        
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        return image, torch.tensor(float(label), dtype=torch.float32), img_path
    
#     def get_path_by_idx(self, idx):
#         filename, label = self.df.iloc[idx].values
#         img_path = os.path.join(self.img_dir, filename)
#         return img_path
        

In [None]:
# from torchvision import transforms

# transform1 = transforms.Compose([
#     transforms.Resize((128,128)),
#     transforms.ToTensor()
# ])

# faceds = FaceData(df, f'{BASE_DIR}/Images/Images', transform = transform1)

In [None]:
#### 2. train / test split

# val_size = 0.2
# indices = list(range(len(df)))

# np.random.shuffle(indices)
# split = int(np.floor(val_size * len(df)))
# train_indices, val_indices = indices[split:], indices[:split]

# train_ds = Subset(faceds, train_indices)
# val_ds = Subset(faceds, val_indices)

# train_loader = DataLoader(train_ds, batch_size=32, shuffle = True)
# val_loader = DataLoader(val_ds, batch_size=32, shuffle = True)

## Using the keypoint detection model from pytorch
https://pytorch.org/vision/stable/models.html#keypoint-detection

In [None]:
from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
from torchvision.io import read_image

person_int = read_image(IMAGE_DIR+df.iloc[8,0])

weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

person_float = transforms(person_int)

model = keypointrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

outputs = model([person_float])
print(outputs)

In [None]:
kpts = outputs[0]['keypoints']
scores = outputs[0]['scores']

print(kpts)
print(scores)

In [None]:
import torchvision.transforms.functional as F

def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])


In [None]:
detect_threshold = 0.75
idx = torch.where(scores > detect_threshold)
keypoints = kpts[idx]

In [None]:
from torchvision.utils import draw_keypoints

res = draw_keypoints(person_int, keypoints, colors="blue",radius=3)
show(res)

In [None]:
# dir(model)
# model.state_dict
# model

In [None]:
# Print a summary using torchinfo (uncomment for actual output)
summary(model=model, 
        input_size=(32, 3,128,128), # make sure this is "input_size", not "input_shape"
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
) 

## Use pretrained model on transformers ViT, and finetune to do image classification

In [None]:
!pip install transformers

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

from transformers import AutoImageProcessor, ViTForImageClassification, ViTFeatureExtractor

modelVit = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
image_processor = AutoImageProcessor.from_pretrained('google/vit-base-patch16-224')


# Step 2: Modify the Model Architecture
class ModifiedViT(nn.Module):
    def __init__(self, base_model):
        super(ModifiedViT, self).__init__()
        self.base_model = base_model
        self.fc1 = nn.Linear(base_model.config.hidden_size, 256)
        self.dropout1 = nn.Dropout(0.5)
        self.classifier = nn.Linear(256, 1)

    def forward(self, x):
        x = self.base_model.vit(x).last_hidden_state[:, 0]
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.dropout1(x)
        x = self.classifier(x)
        return x

In [None]:
## Make full use of GPUs Aavailable, since we have 2 T4 to use.

modifiedVit = ModifiedViT(modelVit)

print("Number of GPUs available:", torch.cuda.device_count())

if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    ModifiedViT = nn.DataParallel(ModifiedViT)

In [None]:
## freeze the parameters of pretrained ViT

modifiedVit.base_model.classifier = None

for param in modifiedVit.base_model.parameters():
    param.requires_grad = False

In [None]:
modifiedVit
summary(modifiedVit)

In [None]:
image_processor

In [None]:
## construct transformation for images

from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

normalize =Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if "height" in image_processor.size:
    size=(image_processor.size["height"],image_processor.size["width"])
    
    crop_size = size
    max_size =None
    

train_transforms = Compose(
    [
        RandomResizedCrop(crop_size),
        RandomHorizontalFlip(p=0.4),
        ToTensor(),
        normalize,
    ]
)


In [None]:
val_transforms = Compose(
    [
        Resize(size),
        CenterCrop(crop_size),
        ToTensor(),
        normalize,
    ]
)


In [None]:
## Create train and test dataset and dataloader 


val_size = 0.2
indices = list(range(len(df)))

np.random.shuffle(indices)
split = int(np.floor(val_size * len(df)))
train_indices, val_indices = indices[split:], indices[:split]


# train_ds = Subset(faceds, train_indices)
# val_ds = Subset(faceds, val_indices)

train_ds = FaceData(df.iloc[train_indices,:], f'{BASE_DIR}/Images/Images', transform = train_transforms)
val_ds = FaceData(df.iloc[val_indices,:], f'{BASE_DIR}/Images/Images', transform = val_transforms)


train_loader = DataLoader(train_ds, batch_size=32, shuffle = True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle = True, drop_last=True)

In [None]:
type(df.iloc[train_indices,:])
print( f'{BASE_DIR}/Images/Images')

In [None]:
## Use weight and bias to monitor and record the experiments
try:
    import wandb
except:
    print("[INFO] Couldn't find wandb, ... installing it.")
    !pip install wandb
    import wandb


In [None]:
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
kaggle_secret = user_secrets.get_secret("wandb_api_key") 
wandb.login(key=kaggle_secret)

In [None]:
# import wandb
# import random

# # start a new wandb run to track this script
# wandb.init(
#     # set the wandb project where this run will be logged
#     project="facial_beauty",
#     settings=wandb.Settings(start_method="fork"),
# #     id ="facial-240706",
# #     resume = "must",
# )

In [None]:
# # Log hyperparameters
# wandb.config = {
#     "learning_rate": 0.001,
#     "epochs": 5,
#     "batch_size": 32
# }


In [None]:
# Define hyperparameter space
epochs_options = [5, 8]
learning_rate_options = [0.0005]
optimizer_options = ['adam', 'sgd']

In [None]:
def initialize_model():
    modelVit = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
    modifiedVit_ins = ModifiedViT(modelVit)

    print("Number of GPUs available:", torch.cuda.device_count())

    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        modifiedViT_ins = nn.DataParallel(modifiedViT_ins)
    ## freeze the parameters of pretrained ViT

    modifiedVit_ins.base_model.classifier = None

    for param in modifiedVit_ins.base_model.parameters():
        param.requires_grad = False
        
    print(summary(modifiedVit_ins))
    
    return modifiedVit_ins

In [None]:
# Step 4: Fine-Tune the Model


def train_model(num_epochs, learning_rate, optimizer):
    # Start a new wandb run for each combination
    wandb.init(project="facial_beauty", id=f"config_{num_epochs}_{optimizer}",
                config={"epochs": num_epochs, "optimizer": optimizer})
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            

    criterion = nn.MSELoss()
    modifiedVit1 = initialize_model()
    modifiedVit1.to(device)
    if optimizer=='adam':
        optimizer = optim.Adam(modifiedVit1.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(modifiedVit1.parameters(), lr=learning_rate)
        
    for epoch in range(num_epochs):
        modifiedVit1.train()
        running_loss = 0.0
        for images, labels, _ in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = modifiedVit1(images)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}')

        # Log the loss
        wandb.log({"train_loss": running_loss / len(train_loader.dataset)})


        modifiedVit1.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels, _ in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = modifiedVit1(images)
                loss = criterion(outputs.squeeze(), labels)
                val_loss += loss.item() * images.size(0)

        val_epoch_loss = val_loss / len(val_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Val Loss: {val_epoch_loss:.4f}')
        # Log the loss
        wandb.log({"val_loss": val_loss / len(val_loader.dataset)})
        
        # Finish the run
    wandb.finish()


## Hyperparameter tuning

In [None]:
for epochs in epochs_options:
    for learning_rate in learning_rate_options:
        for optimizer_choice in optimizer_options:
            
            # Train the model
            train_model(epochs, learning_rate, optimizer_choice)

In [None]:
# Set the model to evaluation mode
modifiedVit.eval()

index = 0
# Get the image and label
images, labels, _ = next(iter(val_loader))
image = images[index]
label = labels[index]

# Add a batch dimension (required for the model)
image = image.unsqueeze(0)

# Move the image and the model to the same device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image = image.to(device)
model = model.to(device)

# Make the prediction
with torch.no_grad():
    output = modifiedVit(image)
# If the output is a tensor, convert it to a list/float
predicted_value = output.item()

# Display the image and the prediction
image = image.squeeze().cpu().numpy().transpose((1, 2, 0))
plt.imshow(image)
plt.title(f"Predicted: {predicted_value:.4f}, Actual: {label.item():.4f}")
plt.axis('off')
plt.show()


## Calculate the predictions for the validation set, and plot the most mistaken predictions, to dig out reasons.

In [None]:
val_pred_lst1, val_pred_lst2, val_pred_lst3 = [],[],[]


for i in tqdm(range(len(val_loader))):
    images, labels, paths = next(iter(val_loader))
#     print(labels)
#     print(paths)

    # Move the image and the model to the same device (CPU or GPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    images = images.to(device)
    model = model.to(device)

    # Make the prediction
    with torch.no_grad():
        outputs = modifiedVit(images)

        outputs = torch.transpose(outputs, 0,1)
#         print(outputs)

        outputs = outputs.squeeze().to('cpu')
    val_pred_lst1 += list(paths)
    val_pred_lst2 += labels.tolist()
    val_pred_lst3 += outputs.tolist()

In [None]:
val_pred = pd.DataFrame({
    'img_path': val_pred_lst1,
    'label': val_pred_lst2,
    'pred': val_pred_lst3
})

In [None]:
val_pred.head()

In [None]:
def plot_by_err(df, num_plots):
    # Calculate the absolute difference between Column2 and Column3
    df['AbsDiff'] = (df['label'] - df['pred']).abs()

    # Sort the DataFrame by the absolute difference in descending order
    df_sorted = df.sort_values(by='AbsDiff', ascending=False)

    # Select the top rows
    top_ = df_sorted.head(num_plots)
    print(top_)
    
    fig = plt.figure(figsize=(9,9))
    rows, cols = math.ceil(num_plots/3) ,3
#     torch.manual_seed(random_seed)
    for i in range(num_plots):
        top_.iloc[i].values
        img_path, label, pred,_ =  top_.iloc[i].values
        fig.add_subplot(rows, cols, i+1)
        im = Image.open(img_path)
        plt.imshow(im)
        plt.title(f'label:{label:.4f}|pred:{pred:.4f}')
        plt.axis(False)
    

In [None]:
plot_by_err(val_pred, 18)

## Widget to predict arbitrary photo!

In [None]:

import torch
from torchvision import transforms, models
from IPython.display import display, clear_output
from ipywidgets import widgets, VBox, Output
from PIL import Image
import io
import matplotlib.pyplot as plt


def preprocess_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = Image.open(io.BytesIO(image)).convert("RGB")
    image = transform(image).unsqueeze(0)
    return image.to(device)

def predict_beauty_score(image):
    image = preprocess_image(image)
    model.eval()
    with torch.no_grad():
        output = model(image)
        score = output.item()
    return score

def upload_and_predict():
    uploader = widgets.FileUpload(accept='image/*', multiple=False)
    result_button = widgets.Button(description="결과 보기")
    output = Output()

    def on_button_click(b):
        for filename in uploader.value:
            content = uploader.value[filename]['content']
            uploaded_image = content
            score = predict_beauty_score(uploaded_image)
            img = Image.open(io.BytesIO(uploaded_image))

            with output:
                clear_output(wait=True)
                plt.figure(figsize=(6, 6))
                plt.imshow(img)
                plt.title(f'Beauty Score: {score:.2f}')
                plt.axis('off')
                plt.show()

    result_button.on_click(on_button_click)
    display(VBox([uploader, result_button, output]))

upload_and_predict()
