In [13]:
import pandas as pd
from torch.utils.data import Dataset, Subset, DataLoader
# from torchvision import datasets, models, transforms
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import time
from pathlib import Path

torch.manual_seed(42)

num_epochs = 30  # each epoch is one pass over the whole dataset
batch_size = 128  # how many samples per training step (32,64,128)
num_workers = 7  # how many CPU cores to use

n_features = 1000 # How many features per image



In [2]:
class FoodTuples(Dataset):
    """
    Class to load food tuples. Individual items consist of:
     - x: concatenated image features of a tuple (AB)
     - y: corresponding label (1: is similar 0: not similar, None: no label)
    """
    def __init__(self, features_file, triplets_file):
        print("initializing " + str(triplets_file) + " dataset...")
        self.img_features = pd.read_csv("data/" + features_file, header=None, index_col=0)
        self.triplets = pd.read_csv("data/" + triplets_file, sep=" ", header=None).to_numpy()
        
        n_triplets = self.triplets.shape[0]
        n_features = self.img_features.shape[1]
        
        self.tuples = np.zeros((2 * n_triplets, 2 * n_features ), dtype=np.int32)
        self.labels = np.zeros((2 * n_triplets, 1 ), dtype=np.bool)
        
         
        # For each triplet add 2  variations
        idx = 0
        for triplet in  self.triplets:
            a, b, c = triplet[0], triplet[1], triplet[2]
            a_features = self.img_features.loc[[a]].to_numpy(dtype=np.float32)
            b_features = self.img_features.loc[[b]].to_numpy(dtype=np.float32)
            c_features = self.img_features.loc[[c]].to_numpy(dtype=np.float32)

            # AB = 1, BA = 1
            if np.random.random() < 0.5:
                self.tuples[idx, :] =  np.squeeze(np.concatenate((a_features, b_features), axis=1))
                self.labels[idx,0] = 1
            else:
                self.tuples[idx, :] =  np.squeeze(np.concatenate((b_features, a_features), axis=1))
                self.labels[idx,0] = 1

            # AC = 0, CA = 0
            if np.random.random() < 0.5:
                self.tuples[idx+1, :] =  np.squeeze(np.concatenate((c_features, a_features), axis=1))
                self.labels[idx+1,0] = 0
            else:
                self.tuples[idx+1, :] =  np.squeeze(np.concatenate((a_features, c_features), axis=1))
                self.labels[idx+1,0] = 0
            idx += 2

        print("done")

    def __len__(self):
        return self.tuples.shape[0]

    def __getitem__(self, idx):
        features = self.tuples[idx]
        label = self.labels[idx]
        return {"x": features, "y": label}
    
 
    
class FoodTriplets(Dataset):
    """
    Class to load food triplets. Individual items consist of:
     - x: concatenated image features of a triplet (ABC)
     - y: corresponding label (1: B is more similar, 0: C is more similar, None: no label)
    """
    def __init__(self, features_file, triplets_file, is_labelled_data=False, train=False, extend=False):
        print("initializing " + str(triplets_file) + " dataset...")
        self.img_features = pd.read_csv("data/" + features_file, header=None, index_col=0)
        self.triplets = pd.read_csv("data/" + triplets_file, sep=" ", header=None).to_numpy()
        self.is_labelled_data = is_labelled_data
        self.labels = None
        self.extend = extend
 
        print("done")

    def __len__(self):
        return len(self.triplets)

    def __getitem__(self, idx):
        triplet = self.triplets[idx]
        a, b, c = triplet[0], triplet[1], triplet[2]
        a_features = self.img_features.loc[[a]].to_numpy(dtype=np.float32)
        b_features = self.img_features.loc[[b]].to_numpy(dtype=np.float32)
        c_features = self.img_features.loc[[c]].to_numpy(dtype=np.float32)
        features = np.squeeze(np.concatenate((a_features, b_features, c_features), axis=1))
        label = 0  # dummy label
        if self.is_labelled_data:
            label = np.array([self.labels[idx]])
        return {"x": features, "y": label}
    
 

 
class SimilarityNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        # 2000
        # 1000
        # 500
        # 250
        # 1
        
        nin = 2000
        n1 = 1000
        n2 = 500
        n3 = 250
#         n4 = int(n_features/4)
#         n5 = int(n_features/8)
        nout = 1

        p_dropout1 = 0.3
        p_dropout2 = 0.5
        p_dropout3 = 0.5
        p_dropout4 = 0.5
        p_dropout5 = 0.2

        self.classifier = nn.Sequential(
            nn.Linear(in_features=nin, out_features=n1),
            nn.BatchNorm1d(n1),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout1, inplace=False),
            nn.Linear(in_features=n1, out_features=n2),
            nn.BatchNorm1d(n2),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout2, inplace=False),
            nn.Linear(in_features=n2, out_features=n3),
            nn.BatchNorm1d(n3),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout3, inplace=False),
            nn.Linear(in_features=n3, out_features=nout),
#             nn.BatchNorm1d(n4),
#             nn.ReLU(inplace=True),
#             nn.Dropout(p=p_dropout4, inplace=False),
#             nn.Linear(in_features=n4, out_features=n5),
#             nn.ReLU(inplace=True),
#             nn.BatchNorm1d(n5),
#             nn.Dropout(p=p_dropout5, inplace=False),
#             nn.Linear(in_features=n5, out_features=nout),
            nn.Sigmoid()
        )


    def forward(self, x):
        logits = self.classifier(x)
        return logits
    
    
def get_model(device):
    return SimpleNetwork().to(device)

class SimpleNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        nin = 3*n_features
        n1 = 2*n_features
        n2 = n_features
        n3 = int(n_features/2)
        n4 = int(n_features/4)
        n5 = int(n_features/8)
        nout = 1

        p_dropout1 = 0.3
        p_dropout2 = 0.5
        p_dropout3 = 0.5
        p_dropout4 = 0.5
        p_dropout5 = 0.2

        self.classifier = nn.Sequential(
            nn.Linear(in_features=nin, out_features=n1),
            nn.BatchNorm1d(n1),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout1, inplace=False),
            nn.Linear(in_features=n1, out_features=n2),
            nn.BatchNorm1d(n2),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout2, inplace=False),
            nn.Linear(in_features=n2, out_features=n3),
            nn.BatchNorm1d(n3),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout3, inplace=False),
            nn.Linear(in_features=n3, out_features=n4),
            nn.BatchNorm1d(n4),
            nn.ReLU(inplace=True),
            nn.Dropout(p=p_dropout4, inplace=False),
            nn.Linear(in_features=n4, out_features=n5),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(n5),
            nn.Dropout(p=p_dropout5, inplace=False),
            nn.Linear(in_features=n5, out_features=nout),
            nn.Sigmoid()
        )
        
        
      
    def forward(self, x):
        logits = self.classifier(x)
        return logits

In [3]:
# Specify some file names
image_features_file = "train_image_features_mobilenet_v3_small.csv"
train_triplets_file = "train_triplets.txt" #"train_triplets.txt"
test_triplets_file = "test_triplets.txt" #"test_triplets.txt"

 
# initialize datasets
# train_data = FoodTriplets(image_features_file, train_triplets_file, is_labelled_data=True, train=True, extend=False)
# test_data = FoodTriplets(image_features_file, test_triplets_file, is_labelled_data=False, train=False, extend=False)

# initialize datasets
train_data = FoodTuples(image_features_file, train_triplets_file)

test_data = FoodTriplets(image_features_file, test_triplets_file, is_labelled_data=False, train=False, extend=False)



initializing train_triplets.txt dataset...
done
initializing test_triplets.txt dataset...
done


In [4]:
# Split train set into train and test set to assess accuracy on unused set
l_train = len(train_data)
val_size = int(0.7 * l_train + 1)
indices = list(range(l_train))
np.random.shuffle(indices)
val_indices, t_indices = indices[:val_size], indices[val_size:]

len(val_indices)
len(t_indices)


train_trainloader = DataLoader(torch.utils.data.Subset(train_data, t_indices), batch_size=batch_size, shuffle=True, num_workers=num_workers)
train_testloader = DataLoader(torch.utils.data.Subset(train_data, val_indices), batch_size=batch_size, shuffle=True, num_workers=num_workers)

test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=num_workers)



In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# device = 'cpu'

model = SimilarityNetwork().to(device)


criterion = nn.BCELoss()
# optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.01, nesterov=True)
optimizer = optim.Adam(model.parameters(), lr=0.01)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [16]:
model.train()

num_epochs = 100  # each epoch is one pass over the whole dataset

for epoch in range(0,num_epochs):
    
    
    # TRAINING
    model.train()
    
    train_loss = []

    for i, data in enumerate(train_trainloader, 1):
        inputs =  data["x"].float()
        labels =  data["y"].float()
        
        inputs = inputs.to(device)
        labels = labels.to(device)
                
        # zero the parameter gradients
        optimizer.zero_grad()  # all the tensors have .grad attribute
        
        # forward propagation
        logits = model(inputs) # forward propagation
        loss = criterion(logits, labels) # computing the loss for predictions
        
        # Backward propagation
        loss.backward() # backpropgation
        # Optimization step.
        optimizer.step() # applying an optimization step
        
        train_loss.append(loss)
 
    
    
    # ACCURACY ON TRAINING DATA
    model.eval()
    
    train_accuracy = []
    
    for i, data in enumerate(train_trainloader, 1):
#         end = time.time()
        
        inputs =  data["x"].float()
        labels =  data["y"].float()
        inputs = inputs.to(device)
        labels = labels.to(device)

        bs = inputs.size(0)

        with torch.no_grad():
            logits = model(inputs)
            loss = criterion(logits, labels)

            acc = ((torch.round(logits) == labels).sum().float() / bs).float()
            train_accuracy.append(acc)
    
    # TESTING
    model.eval()
    
    test_accuracy = []
    
    for i, data in enumerate(train_testloader, 1):        
        inputs =  data["x"].float()
        labels =  data["y"].float()
        inputs = inputs.to(device)
        labels = labels.to(device)

        bs = inputs.size(0)

        with torch.no_grad():
            logits = model(inputs)
            loss = criterion(logits, labels)
            
            acc = ((torch.round(logits) == labels).sum().float() / bs).float()
            test_accuracy.append(acc)

    train_accuracy_epoch = torch.mean(torch.stack(train_accuracy)).item()
    test_accuracy_epoch = torch.mean(torch.stack(test_accuracy)).item()
    
    print(f'Epoch {epoch} \t Train_acc: {train_accuracy_epoch:.3f} \t Test_acc: {test_accuracy_epoch:.3f}')
    

Epoch 0 	 Train_acc: 0.884 	 Test_acc: 0.580
Epoch 1 	 Train_acc: 0.993 	 Test_acc: 0.627
Epoch 2 	 Train_acc: 0.980 	 Test_acc: 0.618
Epoch 3 	 Train_acc: 0.945 	 Test_acc: 0.609
Epoch 4 	 Train_acc: 0.733 	 Test_acc: 0.570
Epoch 5 	 Train_acc: 0.954 	 Test_acc: 0.604
Epoch 6 	 Train_acc: 0.919 	 Test_acc: 0.591
Epoch 7 	 Train_acc: 0.982 	 Test_acc: 0.633
Epoch 8 	 Train_acc: 0.979 	 Test_acc: 0.621
Epoch 9 	 Train_acc: 0.963 	 Test_acc: 0.607
Epoch 10 	 Train_acc: 0.993 	 Test_acc: 0.622
Epoch 11 	 Train_acc: 0.948 	 Test_acc: 0.601
Epoch 12 	 Train_acc: 0.926 	 Test_acc: 0.600
Epoch 13 	 Train_acc: 0.896 	 Test_acc: 0.602
Epoch 14 	 Train_acc: 0.983 	 Test_acc: 0.618
Epoch 15 	 Train_acc: 0.946 	 Test_acc: 0.603
Epoch 16 	 Train_acc: 0.958 	 Test_acc: 0.613
Epoch 17 	 Train_acc: 0.971 	 Test_acc: 0.604
Epoch 18 	 Train_acc: 0.981 	 Test_acc: 0.610
Epoch 19 	 Train_acc: 0.985 	 Test_acc: 0.625
Epoch 20 	 Train_acc: 0.679 	 Test_acc: 0.534
Epoch 21 	 Train_acc: 0.969 	 Test_acc: 0.60

In [20]:
model.eval()

n_testdata = len(test_data)
predictions = np.zeros(n_testdata)


for i, data in enumerate(test_dataloader): 
    
    inputs =  data["x"].float()
    inputs = inputs.to(device)

    A_tensor = inputs[0][:1000]
    B_tensor = inputs[0][1000:2000]
    C_tensor = inputs[0][2000:3000]
    
    
    # AB
    AB_tensor = torch.cat((A_tensor, B_tensor), 0)
    AB_tensor = AB_tensor.reshape(1,-1)
        
#     print(AB_tensor.shape)
    
    AC_tensor = torch.cat((A_tensor, C_tensor), 0)
    AC_tensor = AC_tensor.reshape(1,-1)

    output_AB = model(AB_tensor)
    output_AC = model(AC_tensor)
    
#     print(output_AB[0][0].item(),output_AC[0][0].item())
    
#     if i > 20:
#         break
#     break
    if output_AB > output_AC:
        predictions[i] = 1
#         print('AB')
    else:
#         print('AC')
        predictions[i] = 0
    
    if i % 1000 == 0:
        print(f"Predicted: {i}/{n_testdata}")
        
         
        
 
# res = pd.DataFrame(predictions).astype(int)
res = pd.DataFrame(predictions).astype(int)
print("saving predictions...")
res.to_csv('data/predictions_mobilenet_AB_AC.csv', index=False, header=False)
print("done")

res.head(100)



Predicted: 0/59544
Predicted: 1000/59544
Predicted: 2000/59544
Predicted: 3000/59544
Predicted: 4000/59544
Predicted: 5000/59544
Predicted: 6000/59544
Predicted: 7000/59544
Predicted: 8000/59544
Predicted: 9000/59544
Predicted: 10000/59544
Predicted: 11000/59544
Predicted: 12000/59544
Predicted: 13000/59544
Predicted: 14000/59544
Predicted: 15000/59544
Predicted: 16000/59544
Predicted: 17000/59544
Predicted: 18000/59544
Predicted: 19000/59544
Predicted: 20000/59544
Predicted: 21000/59544
Predicted: 22000/59544
Predicted: 23000/59544
Predicted: 24000/59544
Predicted: 25000/59544
Predicted: 26000/59544
Predicted: 27000/59544
Predicted: 28000/59544
Predicted: 29000/59544
Predicted: 30000/59544
Predicted: 31000/59544
Predicted: 32000/59544
Predicted: 33000/59544
Predicted: 34000/59544
Predicted: 35000/59544
Predicted: 36000/59544
Predicted: 37000/59544
Predicted: 38000/59544
Predicted: 39000/59544
Predicted: 40000/59544
Predicted: 41000/59544
Predicted: 42000/59544
Predicted: 43000/59544
P

Unnamed: 0,0
0,1
1,0
2,1
3,0
4,1
...,...
95,1
96,1
97,0
98,1
