In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
data_dir = '../data'
img_ext = ['jpg', 'png', 'jpeg']

segmentation = ['face', 'body', 'clothing', 'environment']

In [3]:
image_list = []
for image in os.listdir(f'{data_dir}/images_ps'):
    if image.split('.')[1] in img_ext:
        image_list.append(image)

len(image_list)

1126

In [4]:
# Initialize dataframe
df = pd.read_csv(f'{data_dir}/annotation_data/face/data_faceIdentity.csv').set_index('imageName')

columns = []
for i in segmentation:
    for file in os.listdir(f'{data_dir}/annotation_data/{i}'):
        filename = file.split('.')[0]
        columns.append(filename)

all_features = pd.DataFrame(index=df.index, columns=columns)
# all_features

In [5]:
# put data into initialized dataframe and save it
for i in segmentation:
    for file in os.listdir(f'{data_dir}/annotation_data/{i}'):
        filename = file.split('.')[0]
        df = pd.read_csv(f'{data_dir}/annotation_data/{i}/{file}').set_index('imageName')
        for j in df.index:
            all_features.loc[j, filename] = list(df.loc[j])

all_features.to_csv('../data/annotation_data/data_all.csv')

In [6]:
# define function to get the aggregated value of the ratings
def get_aggregated_value(filename):
    trait_name = filename.split('/')[-1].split('_')[0]
    df = pd.read_csv(filename).set_index('imageName')
    sum = df.sum(axis=1)
    count = df.count(axis=1)
    df = pd.DataFrame({'sum':sum, 'count':count})
    df[f'{trait_name}'] = df['sum'] / df['count']
    df = df.get([f'{trait_name}'])
    return df

In [7]:
# goes through the data for all ratings and add the aggregated value for each image to a list
traits = []
trait_dir = '../data/rawdata_trait_rating/'
for trait_csv in os.listdir('../data/rawdata_trait_rating/'):
    traits.append(get_aggregated_value(trait_dir + trait_csv))

# create dataframe from the aggregated values of each image and then put them into one column training target
traits = pd.concat(traits, axis=1)
new_column = str(list(traits.columns))
traits[new_column] = traits.apply(lambda row: [
    row['attractive'], row['competent'], row['dominant'], row['feminine'], row['open'], row['warm'], row['youthful']
], axis=1)
traits = traits.get([new_column])
# traits

In [8]:
# combine the training data and the training target
all_data = all_features.merge(traits, left_index=True, right_index=True)

# Define a function to convert list elements to floats
def convert_list_elements_to_float(cell):
    if isinstance(cell, list):
        return np.array(cell, dtype=np.float32)
    return cell

# Apply the function to each cell in the DataFrame
all_data = all_data.map(convert_list_elements_to_float)

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# seperate the training data and the target label
X = all_data.get(list(all_data.columns)[:-1])

# dataframe to store scaled data
scaled_X = pd.DataFrame(index=X.index)

# apply Min-Max scaling to training data
for i, feature in enumerate(X):
    scaler = MinMaxScaler()
    stacked = np.vstack(X[feature])
    scaled = scaler.fit_transform(stacked)
    unstacked = [list(row) for row in scaled]
    scaled_X[feature] = unstacked

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

def cosine_sim(a, b):
    a = np.array(a).reshape(1, -1)
    b = np.array(b).reshape(1, -1)
    return cosine_similarity(a, b)[0][0]

In [11]:
# get the labels
y = all_data.assign(label=all_data.get([list(all_data.columns)[-1]])).get(['label'])

# concatenate all the features together into one list
scaled_X['input'] = scaled_X.apply(lambda row: np.array(sum([row.iloc[i] for i in range(len(row))], [])), axis=1)
scaled_all = scaled_X.get(['input']).merge(y, left_index=True, right_index=True).reset_index()
scaled_all

Unnamed: 0,imageName,input,label
0,obj365_train_000000121119_000.jpg,"[0.92500126, 0.3313901, 0.48801482, 0.3691098,...","[2.1190476, 4.6666665, 4.2380953, 1.4761904, 3..."
1,COCO_train2014_000000015645.jpg,"[0.5208456, 0.74441016, 0.82499117, 0.7367672,...","[4.9761906, 5.452381, 5.2619047, 6.404762, 4.7..."
2,openimages_9375bad4f7fb7f07_000.jpg,"[0.3224957, 0.56463706, 0.54845953, 0.4655692,...","[5.418605, 4.604651, 3.1860466, 6.395349, 3.90..."
3,obj365_train_000000152196_000.jpg,"[0.41080356, 0.8314114, 0.8082274, 0.56753117,...","[5.142857, 4.642857, 3.7380953, 6.6904764, 4.0..."
4,obj365_train_000000124687_000_v2.jpg,"[0.66848725, 0.5300725, 0.5294587, 0.68622065,...","[2.139535, 3.6046512, 3.2790697, 1.3023256, 3...."
...,...,...,...
1120,openimages_760d6765c8fd2994_000.jpg,"[0.17559308, 0.38829413, 0.54677373, 0.5846894...","[2.4186046, 4.7906976, 3.5581396, 5.4418607, 3..."
1121,obj365_train_000000295170_000.jpg,"[0.63697493, 0.59561455, 0.36056772, 0.3848691...","[2.4651163, 4.581395, 5.023256, 1.3953488, 3.2..."
1122,obj365_train_000000634686_000.jpg,"[0.58380586, 0.7148745, 0.8758831, 0.69453996,...","[1.8837209, 4.2619047, 3.3953488, 1.6046512, 4..."
1123,COCO_val2014_000000509192.jpg,"[0.17919587, 0.6266786, 0.23679179, 0.39790532...","[3.511628, 4.5116277, 3.5581396, 6.0465117, 3...."


In [12]:
import json

def convert_numpy(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj
    
pairs = []
with open('../data/all_image_pairs.jsonl', 'w') as file:
    for i in range(len(scaled_all)):
        for j in range(i + 1, len(scaled_all)):
            row1 = scaled_all.iloc[i]
            row2 = scaled_all.iloc[j]
            json_line = json.dumps({
                'imageNames': (row1['imageName'],row2['imageName']),
                'input': (list(row1['input']),list(row2['input'])),
                'label': list(row1['label']-row2['label'])
            }, default=convert_numpy)
            file.write(json_line + '\n')

In [None]:
pairs_df = pd.read_json('../data/all_image_pairs.jsonl', line=True)
# pairs_df = pairs_df.drop(columns=pairs_df.columns[0])
pairs_df

Unnamed: 0,imageNames,input,label
0,"('obj365_train_000000121119_000.jpg', 'COCO_tr...","(array([1.8500025 , 0.6627802 , 0.97602963, .....",[-2.857143 -0.7857146 -1.0238094 -4.928571...
1,"('obj365_train_000000121119_000.jpg', 'openima...","(array([1.8500025 , 0.6627802 , 0.97602963, .....",[-3.2995572 0.06201553 1.0520487 -4.919158...
2,"('obj365_train_000000121119_000.jpg', 'obj365_...","(array([1.8500025 , 0.6627802 , 0.97602963, .....",[-3.0238094 0.02380943 0.5 -5.214286...
3,"('obj365_train_000000121119_000.jpg', 'obj365_...","(array([1.8500025 , 0.6627802 , 0.97602963, .....",[-0.02048731 1.0620153 0.9590256 0.173864...
4,"('obj365_train_000000121119_000.jpg', 'COCO_tr...","(array([1.8500025 , 0.6627802 , 0.97602963, .....",[-1.2619047 0.9523809 0.40476203 0.142857...
...,...,...,...
632245,"('obj365_train_000000295170_000.jpg', 'COCO_va...","(array([1.2739499 , 1.1912291 , 0.72113544, .....",[-1.0465117 0.06976748 1.4651163 -4.651163...
632246,"('obj365_train_000000295170_000.jpg', 'COCO_tr...","(array([1.2739499 , 1.1912291 , 0.72113544, .....",[-1.0586932 0.31949043 1.8089702 -0.985603...
632247,"('obj365_train_000000634686_000.jpg', 'COCO_va...","(array([1.1676117 , 1.429749 , 1.7517662 , .....",[-1.627907 -0.24972296 -0.16279078 -4.44186 ...
632248,"('obj365_train_000000634686_000.jpg', 'COCO_tr...","(array([1.1676117 , 1.429749 , 1.7517662 , .....",[-1.6400886 0. 0.18106318 -0.776301...


In [None]:
# split the training and testing set
X_train, X_test, y_train, y_test = train_test_split(pairs_df.get(['input']), pairs_df.get(['label']), test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# # get only the concatenated features
# X_train = X_train.get('concatenated')
# X_val = X_val.get('concatenated')
# X_test_label = X_test.get('concatenated')

# # get only the ratings
# y_train = y_train.get(y_train.columns[0])
# y_val = y_val.get(y_val.columns[0])
# y_test_label = y_test.get(y_test.columns[0])

# # convert everything to float ndarray
# X_train = np.vstack(X_train)
# X_val = np.vstack(X_val)
# X_test = np.vstack(X_test_label)

# # convert everything to float ndarray
# y_train = np.vstack(y_train)
# y_val = np.vstack(y_val)
# y_test = np.vstack(y_test_label)

# get only the concatenated features
# X_train = X_train.get(['features'])
# X_val = X_val.get(['features'])
# X_test = X_test.get(['features'])

X_train.iloc[0].loc['input']
# # get only the ratings
# y_train = y_train.get([y_train.columns[0]])
# y_val = y_val.get([y_val.columns[0]])
# y_test = y_test.get([y_test.columns[0]])

# train = X_train.merge(y_train, left_index=True, right_index=True)
# val = X_val.merge(y_val, left_index=True, right_index=True)
# test = X_test.merge(y_test, left_index=True, right_index=True)

ModuleNotFoundError: No module named 'numpy.dtype'

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [12]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)

# create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [13]:
# model definition
class NeuralNet(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.5)
        
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.dropout2 = nn.Dropout(0.5)
        
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.dropout3 = nn.Dropout(0.5)
        
        self.fc4 = nn.Linear(32, output_size)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x

# model definition
class NeuralNet2(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNet2, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)

In [14]:
# Siamese Network Definition
class SiameseNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(SiameseNetwork, self).__init__()
        self.shared_net = NeuralNet2(input_size, output_size)

    def forward_once(self, x):
        return self.shared_net(x)

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

# Contrastive Loss Definition
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

In [15]:
# Instantiate the model, loss function, and optimizer
input_size = X_train_tensor.shape[1]
output_size = y_train_tensor.shape[1]

In [16]:
model = SiameseNetwork(input_size, output_size).to(device)
model.apply(weights_init)
criterion = ContrastiveLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Add learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=30)
prev_lr = optimizer.param_groups[0]['lr']
best_loss = float('inf')  # Initialize to infinity

# Training loop
num_epochs = 500

# Early stopping parameters
early_stop_patience = 100
no_improvement_count = 0

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        X_batch_1, X_batch_2 = X_batch[:X_batch.shape[0]//2], X_batch[X_batch.shape[0]//2:]
        X_batch_1 = X_batch_1.to(device)
        X_batch_2 = X_batch_2.to(device)
        # y_batch_1, y_batch_2 = y_batch[:y_batch.shape[0]//2], y_batch[y_batch.shape[0]//2:]
        labels = torch.randint(0, 2, (X_batch.shape[0]//2,)).to(device)  # Example labels (0: different, 1: similar)
        
        output1, output2 = model(X_batch_1, X_batch_2)
        loss = criterion(output1, output2, labels)
        optimizer.zero_grad()
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        epoch_loss += loss.item()
    
    training_loss = epoch_loss / len(train_loader)

    # Validation step
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            X_batch_1, X_batch_2 = X_batch[:X_batch.shape[0]//2].to(device), X_batch[X_batch.shape[0]//2:].to(device)
            y_batch_1, y_batch_2 = y_batch[:y_batch.shape[0]//2], y_batch[y_batch.shape[0]//2:]
            labels = torch.randint(0, 2, (X_batch.shape[0]//2,)).to(device)  # Example labels (0: different, 1: similar)

            output1, output2 = model(X_batch_1, X_batch_2)
            loss = criterion(output1, output2, labels)
            val_loss += loss.item()
    
    validation_loss = val_loss / len(val_loader)

    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {training_loss:.4f}, Validation loss: {validation_loss}')

    # Save the model if it has the best loss so far
    if validation_loss < best_loss:
        best_loss = validation_loss
        torch.save(model.state_dict(), 'best_model_siamese.pth')
        no_improvement_count = 0  # Reset counter
    else:
        no_improvement_count += 1
    
    # Step the scheduler
    scheduler.step(validation_loss)
    
    # Early stopping
    if no_improvement_count >= early_stop_patience:
        print(f"Early stopping at epoch {epoch+1}")
        break

Epoch [20/500], Training Loss: 0.2719, Validation loss: 0.23856769502162933
Epoch [40/500], Training Loss: 0.2700, Validation loss: 0.29110442847013474
Epoch [60/500], Training Loss: 0.2835, Validation loss: 0.29173924028873444
Epoch [80/500], Training Loss: 0.2779, Validation loss: 0.28698649629950523
Epoch [100/500], Training Loss: 0.2739, Validation loss: 0.2808472290635109
Epoch [120/500], Training Loss: 0.2627, Validation loss: 0.26927024871110916
Epoch [140/500], Training Loss: 0.2677, Validation loss: 0.3036064878106117
Epoch [160/500], Training Loss: 0.2541, Validation loss: 0.2695915587246418
Early stopping at epoch 173


In [17]:
# import torch
# import torch.nn.functional as F
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
# import numpy as np

# # Assume siamese_net is your trained Siamese network
# # input1, input2, and pair_labels are your test data and labels

# # Forward pass
# output1, output2 = siamese_net(input1, input2)

# # Calculate Euclidean distances
# distances = F.pairwise_distance(output1, output2)

# # Determine an optimal threshold
# # For simplicity, we can use a fixed threshold (e.g., 0.5), but in practice, you might want to tune this
# threshold = 0.5

# # Predict labels based on the threshold
# predicted_labels = (distances < threshold).float()

# # Convert tensors to numpy arrays for metric calculation
# pair_labels_np = pair_labels.cpu().numpy()
# predicted_labels_np = predicted_labels.cpu().numpy()

# # Calculate evaluation metrics
# accuracy = accuracy_score(pair_labels_np, predicted_labels_np)
# precision = precision_score(pair_labels_np, predicted_labels_np)
# recall = recall_score(pair_labels_np, predicted_labels_np)
# f1 = f1_score(pair_labels_np, predicted_labels_np)

# # Calculate ROC curve and AUC
# fpr, tpr, _ = roc_curve(pair_labels_np, distances.cpu().numpy())
# roc_auc = auc(fpr, tpr)

# # Print metrics
# print(f'Accuracy: {accuracy:.4f}')
# print(f'Precision: {precision:.4f}')
# print(f'Recall: {recall:.4f}')
# print(f'F1-Score: {f1:.4f}')
# print(f'AUC: {roc_auc:.4f}')

# # Plot ROC curve
# import matplotlib.pyplot as plt

# plt.figure()
# plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.4f})')
# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver Operating Characteristic')
# plt.legend(loc='lower right')
# plt.show()


Ideal Loss: 0.01~0.1225