Thinking of APR. Are we doing the dual optimization with the classification model + the pertubation added?

To do: think of how to split data to train/test/ tune 80/10/10

In [1]:
#import relevant library

import math
import os
import random
import pickle
import argparse
from collections import deque
import time
from datetime import timedelta
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
# from torchvision import transforms
from torch.utils.data import IterableDataset, DataLoader, get_worker_info
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split



In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
## time the process
def get_time_dif(start_time):
    """get the running time"""
    end_time = time.time()
    time_dif = end_time - start_time
    return timedelta(seconds=int(round(time_dif)))

In [None]:

## set up the u,i,j triplet for BPR framework
class GetTriplePair(IterableDataset):
    # for ml-1m we load in 3760 item 6040 user and 994169 train pair
    def __init__(self, item_size, user_list, pair, shuffle, num_epochs):
        self.item_size = item_size
        self.user_list = user_list
        self.pair = pair
        self.shuffle = shuffle
        self.num_epochs = num_epochs

    def __iter__(self):
        self.example_size = self.num_epochs * len(self.pair)
        self.example_index_queue = deque([])
        self.seed = 0
        self.start_list_index = None
        self.num_workers = 1
        self.index = 0
        return self

    def __next__(self):
        if self.index >= self.example_size:
            raise StopIteration
        # If `example_index_queue` is used up, replenish this list.
        while len(self.example_index_queue) == 0:
            index_list = list(range(len(self.pair)))
            if self.shuffle:
                random.Random(self.seed).shuffle(index_list)
                self.seed += 1
            if self.start_list_index is not None:
                index_list = index_list[self.start_list_index::self.num_workers]

                # Calculate next start index
                self.start_list_index = (self.start_list_index + (self.num_workers - (len(self.pair) % self.num_workers))) % self.num_workers
            self.example_index_queue.extend(index_list)
        result = self._example(self.example_index_queue.popleft())
        self.index += self.num_workers
        return result

    def _example(self, idx):
        # in a train pair, format = (u,i), j = a random item which does not exist in user u's list of items
        u = self.pair[idx][0]
        i = self.pair[idx][1]
        j = np.random.randint(self.item_size)
        while j in self.user_list[u]:
            j = np.random.randint(self.item_size)
        return u, i, j


In [None]:

## chunk to define matrix factorization part
class fair_reprogram(nn.Module):
    def __init__(self, user_size, item_size, dim, reg, reg_adv, eps):
        super().__init__()
        ##init the embedding for U and I
        self.W = nn.Parameter(torch.empty(user_size, dim))  # User embedding
        self.H = nn.Parameter(torch.empty(item_size, dim))  # Item embedding
        nn.init.xavier_normal_(self.W.data)
        nn.init.xavier_normal_(self.H.data)
        self.reg = reg
        self.user_size = user_size
        self.item_size = item_size
        self.dim = dim
        self.reg_adv = reg_adv
        self.eps = eps
        self.update_u = None
        self.update_i = None
        self.update_j = None

## forward cal, this is the part that
    def forward(self, u, i, j, epoch):

        ##u,i,j respectively, each is a vector of dim embedding (default = 64)
        u = self.W[u, :]
        i = self.H[i, :]
        j = self.H[j, :]

        ## Enables this Tensor to have their grad populated during backward(), convert any non-leaf tensor into a leaf tensor,
        ##https://stackoverflow.com/questions/73698041/how-retain-grad-in-pytorch-works-i-found-its-position-changes-the-grad-result
        u.retain_grad()
        u_clone = u.data.clone()
        i.retain_grad()
        i_clone = i.data.clone()
        j.retain_grad()
        j_clone = j.data.clone()

        ## mf, dot product of user with pos/neg item
        x_ui = torch.mul(u, i).sum(dim=1)
        x_uj = torch.mul(u, j).sum(dim=1)


        #similar to clip value, find diff between ui and uj
        x_uij =torch.clamp(x_ui - x_uj,min=-80.0,max=1e8)
        #logsigmoid this is equivalent to equation 1 in the paper (classic loss of bpr)
        log_prob = F.logsigmoid(x_uij).sum()
        # regularization = lambda * l2 norm of u, i, j
        regularization = self.reg * (u.norm(dim=1).pow(2).sum() + i.norm(dim=1).pow(2).sum() + j.norm(dim=1).pow(2).sum())

        ## original bpr loss,
        loss = -log_prob + regularization

        return loss
        # add adv training after a certain number of epochs, here is the part which we add hypernet module
        if epoch not in range(args.epochs, args.adv_epoch + args.epochs):
            """Normal training"""
            loss.backward()
            return loss

        else:
            """Adversarial training:
                    1.Backward to get grads
                    2.Construct adversarial perturbation
                    3.Add adversarial perturbation to embeddings
                    4.Calculate APR loss
            """
            # Backward to get grads
            # this would be the part we change in defining delta, delta = HPN (phi)

            # should we calculate based on gradient of the adv_loss instead of the loss function?, originally, computed based on loss function
            loss.backward(retain_graph=True) ## need to retain graph here so as to we can backprop the adv_loss
            ##recheck this
            grad_u = u.grad
            grad_i = i.grad
            grad_j = j.grad

            # Construct adversarial perturbation based on gradient of loss function, and normalize it with epsilon * norm
            if grad_u is not None:
                delta_u = nn.functional.normalize(grad_u, p=2, dim=1, eps=self.eps)
            else:
                delta_u = torch.rand(u.size())
            if grad_i is not None:
                delta_i = nn.functional.normalize(grad_i, p=2, dim=1, eps=self.eps)
            else:
                delta_i = torch.rand(i.size())
            if grad_j is not None:
                delta_j = nn.functional.normalize(grad_j, p=2, dim=1, eps=self.eps)
            else:
                delta_j = torch.rand(j.size())

            # Add adversarial perturbation to embeddings, now we have q+delta, p+delta
            x_ui_adv = torch.mul(u + delta_u, i + delta_i).sum(dim=1)
            x_uj_adv = torch.mul(u + delta_u, j + delta_j).sum(dim=1)

            # find difference between pos and neg item, then clip value
            x_uij_adv = torch.clamp(x_ui_adv - x_uj_adv,min=-80.0,max=1e8)

            # Calculate APR loss with logsigmoid
            log_prob = F.logsigmoid(x_uij_adv).sum()
            adv_loss = self.reg_adv *(-log_prob) + loss # this is adversarial loss (equation 4 in paper)
            adv_loss.backward()

            return adv_loss

In [None]:
def fairness_reprogramming(self, u, i, j):
    """Reprogramming phase:
        1.Freeze the user and item embedding -- done by saving checkpoint
        2.Calculate the perturbation to achieve fairness objective
        3.Add perturbation to the alr frozen embedding
        4.Calculate the overall loss function after update
    """

    # Initialize a fix random perturbation
    perturbation = torch.rand(1)
    
    #load user and item embedding, which has been trained in BPR
    u = list(model1.items())[0][1]
    i = list(model1.items())[1][1][i]
    j = list(model1.items())[1][1][j]
        
    # Add adversarial perturbation to embeddings, now we have q+delta, p+delta
    x_ui_adv = torch.mul(u , i + perturbation).sum(dim=1)
    x_uj_adv = torch.mul(u , j + perturbation).sum(dim=1)

    # find difference between pos and neg item, then clip value
    x_uij_adv = torch.clamp(x_ui_adv - x_uj_adv,min=-80.0,max=1e8)

    # Calculate loss with perturbed embedding with logsigmoid
    log_prob = F.logsigmoid(x_uij_adv).sum()
            
    #set up an adversary to identify group of items
    adversary_rs        
            
    # modify the adversarial loss here
    adv_loss = self.reg_adv *(-log_prob) + loss # this is adversarial loss (equation 4 in paper)
    adv_loss.backward()

    return adv_loss

In [142]:
#load the results of BPR
model1 = (torch.load('models/01_pytorch_workflow_model_1.pth'))
list(model1.items())[0][1].size()

torch.Size([6040, 64])

In [94]:
list(model1.items())[0][1]

tensor([[ 9.3768e-04, -4.2642e-02,  2.0659e-02,  ..., -2.2226e-02,
          3.1339e-02, -3.3840e-02],
        [ 1.5390e-04, -3.8164e-03,  1.1938e-02,  ..., -1.9504e-03,
         -5.5167e-03,  2.5199e-02],
        [-1.7836e-03, -3.0050e-02, -3.0792e-02,  ...,  1.1926e-02,
         -1.7736e-02,  6.9641e-03],
        ...,
        [-3.4588e-02, -1.5128e-03, -5.1930e-02,  ...,  3.3526e-02,
         -3.9308e-03,  1.3962e-02],
        [ 1.6954e-05, -1.2272e-02,  3.1606e-02,  ..., -2.2976e-02,
         -1.5850e-03, -2.3360e-02],
        [ 1.0627e-02, -2.0485e-02, -3.4491e-02,  ..., -1.7362e-02,
          3.7945e-03,  7.2581e-03]])

In [156]:
#goal = input y and yhat and return the group of the item?

# Build model
class adversary_rs(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        """Initializes all required hyperparameters for a multi-class classification model.

        Args:
            input_features (int): Number of input features to the model.
            out_features (int): Number of output features of the model
              (how many classes there are).
            hidden_units (int): Number of hidden units between layers, default 8.
        """
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            # nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change)
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            # nn.ReLU(), # <- does our dataset require non-linear layers? (try uncommenting and see if the results change)
            nn.Linear(in_features=hidden_units, out_features=output_features), # how many classes are there?
        )
    
    def forward(self, x):
        return self.linear_layer_stack(x)

# Create an instance of BlobModel and send it to the target device
mlp = adversary_rs(input_features=1, 
                    output_features=6, 
                    hidden_units=500).to(device)


In [152]:
 # Define the loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
  # Prepare CIFAR-10 dataset
trainloader = torch.utils.data.DataLoader(X, batch_size=512, shuffle=True, num_workers=1)


In [153]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

In [154]:
# Fit the model
torch.manual_seed(42)

# Set number of epochs
epochs = 1000

test_df = pd.read_csv('test_adversary2.dat', sep=',', encoding="utf-8",engine='python')
test_df = test_df[['genres', 'rating']]

X = torch.tensor(test_df['rating'].values).type(torch.float)
Y = torch.tensor(test_df['genres'].astype('category').cat.codes).type(torch.LongTensor)   


X_blob_train, X_blob_test, y_blob_train, y_blob_test = train_test_split(X,
    Y,
    test_size=0.2,
    random_state=181
)

# Put data to target device
X_blob_train, y_blob_train = X_blob_train.to(device).view(-1,1), y_blob_train.to(device)
X_blob_test, y_blob_test = X_blob_test.to(device).view(-1,1), y_blob_test.to(device)

for epoch in range(epochs):
    ### Training
    mlp.train()

    # 1. Forward pass
    y_logits = mlp(X_blob_train) # model outputs raw logits 
    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels
    # print(y_logits)
    # 2. Calculate loss and accuracy
    loss = loss_fn(y_logits, y_blob_train) 
    acc = accuracy_fn(y_true=y_blob_train,
                      y_pred=y_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    mlp.eval()
    with torch.inference_mode():
      # 1. Forward pass
        test_logits = mlp(X_blob_test)
        test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)
      # 2. Calculate test loss and accuracy
        test_loss = loss_fn(test_logits, y_blob_test)
        test_acc = accuracy_fn(y_true=y_blob_test,
                             y_pred=test_pred)

    # Print out what's happening
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Acc: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Acc: {test_acc:.2f}%") 

Epoch: 0 | Loss: 1.73429, Acc: 40.30% | Test Loss: 1.47605, Test Acc: 44.84%
Epoch: 10 | Loss: 1.42034, Acc: 44.80% | Test Loss: 1.40173, Test Acc: 44.84%
Epoch: 20 | Loss: 1.39443, Acc: 45.17% | Test Loss: 1.38122, Test Acc: 45.13%
Epoch: 30 | Loss: 1.37248, Acc: 45.83% | Test Loss: 1.35702, Test Acc: 46.02%
Epoch: 40 | Loss: 1.35933, Acc: 47.01% | Test Loss: 1.34779, Test Acc: 46.02%
Epoch: 50 | Loss: 1.35134, Acc: 48.19% | Test Loss: 1.34180, Test Acc: 48.67%
Epoch: 60 | Loss: 1.34569, Acc: 47.82% | Test Loss: 1.33921, Test Acc: 48.38%
Epoch: 70 | Loss: 1.34156, Acc: 47.97% | Test Loss: 1.33752, Test Acc: 48.97%
Epoch: 80 | Loss: 1.33857, Acc: 48.12% | Test Loss: 1.33625, Test Acc: 48.67%
Epoch: 90 | Loss: 1.33634, Acc: 48.12% | Test Loss: 1.33610, Test Acc: 48.67%
Epoch: 100 | Loss: 1.33465, Acc: 48.12% | Test Loss: 1.33587, Test Acc: 48.67%
Epoch: 110 | Loss: 1.33335, Acc: 48.12% | Test Loss: 1.33594, Test Acc: 48.67%
Epoch: 120 | Loss: 1.33233, Acc: 48.12% | Test Loss: 1.33612, T

In [158]:

for name, param in mlp.state_dict().items():
    print(name, param.size())

linear_layer_stack.0.weight torch.Size([500, 1])
linear_layer_stack.0.bias torch.Size([500])
linear_layer_stack.1.weight torch.Size([500, 500])
linear_layer_stack.1.bias torch.Size([500])
linear_layer_stack.2.weight torch.Size([6, 500])
linear_layer_stack.2.bias torch.Size([6])


In [6]:
test_df = pd.read_csv('test_adversary2.dat', sep=',', encoding="utf-8",engine='python')
test_df.head

<bound method NDFrame.head of       Unnamed: 0  movie_id    genres    rating
0              4         5    Comedy  3.006757
1              8         9    Action  2.656863
2             13        14     Drama  3.542484
3             17        18  Thriller  3.337580
4             18        19    Comedy  2.480720
...          ...       ...       ...       ...
1689        3700      3947  Thriller  3.472727
1690        3701      3948    Comedy  3.635731
1691        3702      3949     Drama  4.115132
1692        3703      3950     Drama  3.666667
1693        3704      3951     Drama  3.900000

[1694 rows x 4 columns]>

In [129]:
X_blob.shape

torch.Size([1000, 2])

In [135]:
torch.unique(Y)

tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
         56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
         70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
         98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
        126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
        140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
        154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
        168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 1

In [5]:
import pandas as pd
