In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import numpy as np
import sys
sys.path.append("./drive/MyDrive/287 Project")

In [None]:
import torchtext
from torchtext.data import get_tokenizer
from torchtext import data, datasets

In [None]:
from typing import *
import torch
from torch import nn, optim, Tensor
from torch.nn import CrossEntropyLoss
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
from torch.utils.data import DataLoader, Dataset

In [None]:
import torch.nn.functional as F
from itertools import chain

In [None]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.13.0-py3-none-any.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 9.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 61.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 54.1 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 682 kB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 50.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attem

In [None]:
from blog_data_reader import get_dataset
from example import get_hidden_size

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [None]:
# Load the Dataset
train, dev, test = get_dataset()

In [None]:
class BlogDataset(Dataset):
    def __init__(self, dataset):
      self.dataset = dataset
    def __len__(self) -> int:
        return len(self.dataset)
    def __getitem__(self, idx: int):
      example = self.dataset[idx]
      aux_labels = example.get_aux_labels()
      # Tokenized Text, Topic Label, Age of Author, Gender of Author, Length of Text
      return torch.tensor(example.get_tokenized()), torch.tensor(example.get_label()), torch.tensor(aux_labels[0]), torch.tensor(aux_labels[1]), torch.tensor(len(example.get_tokenized()))

In [None]:
train_ds = BlogDataset(train)
val_ds = BlogDataset(dev)
test_ds = BlogDataset(test)

In [None]:
def pad_collate_classifier(batch):
    (xx, yy, aa, bb, cc) = zip(*batch)
    xx_pad = pad_sequence(xx, batch_first=True, padding_value=0)
    yy_stack = torch.stack(yy, dim=0)
    aa_stack = torch.stack(aa, dim=0)
    bb_stack = torch.stack(bb, dim=0)
    cc_stack = torch.stack(cc, dim=0)
    return xx_pad, yy_stack, aa_stack, bb_stack, cc_stack

train_dl = DataLoader(train_ds, batch_size=256, shuffle=True, drop_last=True, collate_fn=pad_collate_classifier) 
val_dl = DataLoader(val_ds, batch_size=256, shuffle=False, drop_last=False, collate_fn=pad_collate_classifier)
test_dl = DataLoader(test_ds, batch_size=256, shuffle=False, drop_last=False, collate_fn=pad_collate_classifier) 

In [None]:
# Base LSTM Class
class EncoderLSTM(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, padding_idx: int):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.padding_idx = padding_idx
        
        self.dropout = nn.Dropout(0.2)
        self.embeddings = nn.Embedding(self.input_size, self.hidden_size, padding_idx=self.padding_idx)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, num_layers=1, batch_first=True, bidirectional=True)

    def forward(self, input_seqs: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
        input_lengths = []
        for review in input_seqs:
            count = 0
            for word in review:
                if word == self.padding_idx:
                    break
                count += 1
            input_lengths.append(count)

        embeddings = self.embeddings(input_seqs)
        embeddings =self.dropout(embeddings)
        
        inputs = pack_padded_sequence(embeddings, input_lengths, batch_first=True, enforce_sorted=False)
        packed_output, (ht, ct) = self.lstm(inputs)
        return (ht[0], ct[0], ht[1], ct[1])

In [None]:
# LSTM Embedding + Variational Encoder
class Encoder(nn.Module):
  def __init__(self, input_size: int, embedding_size: int, hidden_size: int, padding_idx: int, dropout=0.0, num_layers=1, bidirectional=True):
        super().__init__()
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.padding_idx = padding_idx
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(dropout)
        # LSTM Embeddings
        self.encoder = EncoderLSTM(self.input_size, self.hidden_size, self.padding_idx)

  def encode(self, input_sequence):
      (ht0, ct0, ht1, ct1)= self.encoder(input_sequence)
      hidden = torch.cat((ht0, ct0, ht1, ct1), dim=1)
      return hidden
  
  def forward(self, input_sequence):
      return self.encode(input_sequence)

In [None]:
# Base classifier network
class Classifier(torch.nn.Module):
  def __init__(self, dims=[2000,1024,512,20], dropout=0.0):
    super().__init__()
    self.latent_size = dims[0]
    self.output_size = dims[-1]

    self.classifier = nn.ModuleDict({
            f'layer_{i}':nn.Linear(dims[i],dims[i+1]) 
            for i in range(len(dims)-1)
        })
    self.dropout = nn.Dropout(p=dropout)

  def forward(self, hidden):
    hid = hidden
    for i,layer in self.classifier.items():
        hid = self.dropout(layer(hid))
    return hid

State of the Art Model: Coavaux et. al 2018

In [None]:
# Baseline #1 State of the Art Model - LSTM Encoder + Adversarial Learning
class SOTA(nn.Module):
  def __init__(self, num_target_labels, num_sensitive_labels):
    super().__init__()

    self.num_target_labels = num_target_labels
    self.num_sensitive_labels = num_sensitive_labels
    self.vocab_size = get_hidden_size()
    self.padding_idx = 0

    self.embedding_size = 400
    self.hidden_size = 16

    self.encoder_layers = Encoder(self.vocab_size, self.embedding_size, self.hidden_size, self.padding_idx, dropout=0.2)

    self.target_server = Classifier(dims=[64, 32, num_target_labels], dropout=0.2)
    self.target_adversary = Classifier(dims=[64, 32, num_sensitive_labels], dropout=0.2)

    self.loss_fn = nn.CrossEntropyLoss()

  def encoder(self, x):
    # Return mean and covariance matrix from variational encoder
    return self.encoder_layers(x)
  

  def forward_adversaries(self, x):
    # First forward step: Optimize the adveraries
    hidden = self.encoder(x)
    target_adversary_logits = self.target_adversary(hidden)

    return target_adversary_logits
  
  def forward_all(self, x):
    hidden = self.encoder(x)
    target_server_logits = self.target_server(hidden)
    target_adversary_logits = self.target_adversary(hidden)

    return target_server_logits, target_adversary_logits
  
  def compute_loss_adversaries(self, target_adversary_logits, y_target, y_sensitive):
    # Compute the loss of the adversary networks
    target_adversary_loss = self.loss_fn(target_adversary_logits, y_sensitive)
    return train_params['target_adversary_weight'] * target_adversary_loss
  
  def compute_classifier_loss(self, target_server_logits, target_adversary_logits, y_target, y_sensitive):
    # Compute Target Server Loss, Combined Adversary Loss, and Sensitive Adversary Loss
    target_adversary_loss = self.compute_loss_adversaries(target_adversary_logits, y_target, y_sensitive)
    target_server_loss = self.loss_fn(target_server_logits, y_target)
    return target_server_loss, target_adversary_loss

  def compute_total_loss(self, target_server_logits, target_adversary_logits, y_target, y_sensitive):
    # Function to compute total loss for second stage of the network
    target_server_loss, target_adversary_loss = self.compute_classifier_loss(target_server_logits, target_adversary_logits, y_target, y_sensitive)
    # Computes KL Divergence from N(0, I)

    return  train_params['target_server_weight'] * target_server_loss + train_params['target_adversary_weight'] * target_adversary_loss

  def test_metrics(self, dl, adversary=False):
    # Helper function to print out relevant metrics of our train loop
    self.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    loss_fn = nn.CrossEntropyLoss()
    
    for inputs, targets, age, gender, lengths in dl:
      if adversary:
          targets = gender.cuda()
      else:
          targets = targets.cuda()
      inputs = inputs.cuda()
      lengths = lengths.cuda()
      hidden = self.encoder(inputs)

      if adversary:
          model = self.target_adversary
      else:
          model = self.target_server
      logits = model(hidden)
      
      loss = loss_fn(logits, targets)
      sigmoid= torch.softmax(logits, dim=1)
      pred = torch.argmax(sigmoid, dim=1)
      correct += (pred.squeeze() == targets).sum()
      total += targets.shape[0]
      sum_loss += loss.item()*targets.shape[0]
    return sum_loss/total, correct.item()/total

  def train_all(self, train_params):
    # Adversary Parameters and Optimizer: We have two optimizers, one for each step of our training
    adversary_optimizer = optim.Adam(self.target_adversary.parameters(), lr=train_params['adv_lr'], weight_decay=train_params['adv_weight_decay'])

    # Parameters and Optimizer for the rest of our network
    disentangler_parameters = chain(self.encoder_layers.parameters(), self.target_server.parameters())
    disentangler_optimizer = optim.Adam(disentangler_parameters, lr=train_params['dis_lr'], weight_decay=train_params['dis_weight_decay'])

    epochs = train_params['epochs']
    for epoch in range(epochs):
      self.train()
      for i, (inputs, targets, age, gender, lengths) in enumerate(train_dl):
        sensitive = gender.cuda()
        inputs = inputs.cuda()
        lengths = lengths.cuda()
        targets = targets.cuda()

        #Optimize Adversary
        adversary_optimizer.zero_grad()
        target_adversary_logits = self.forward_adversaries(inputs)
        adversary_loss = self.compute_loss_adversaries(target_adversary_logits, targets, sensitive)

        adversary_loss.backward()
        adversary_optimizer.step()

        # Optimize Rest of our Network
        disentangler_optimizer.zero_grad()
        target_server_logits, target_adversary_logits = self.forward_all(inputs)
        loss = self.compute_total_loss(target_server_logits, target_adversary_logits, targets, sensitive)
        loss.backward()
        disentangler_optimizer.step()

      server_loss, server_acc = self.test_metrics(val_dl, adversary= False)
      adv_loss, adv_acc = self.test_metrics(val_dl, adversary= True)
      
      print(f"Target - Epoch {epoch}: SL {server_loss}, SA {server_acc}, AL {adv_loss}, AA {adv_acc}")
      print()

In [None]:
d = SOTA(10, 2).cuda()
# Hyperparameters
train_params = {
    'epochs': 100,
    'adv_lr': 1e-4,
    'adv_weight_decay': 1e-3,
    'dis_lr': 1e-3,
    'dis_weight_decay': 1e-4,
    'kl_weight': 1e-3,
    'target_server_weight': 1,
    'target_adversary_weight': 1,
    'sensitive_server_weight': 1,
    'sensitive_adversary_weight': 1
}
d.train_all(train_params)

Baseline: Our model without the Variational Encoder

In [35]:
# Our entire network
class DisentanglerNoVE(nn.Module):
  def __init__(self, num_target_labels, num_sensitive_labels):
    super().__init__()

    self.num_target_labels = num_target_labels
    self.num_sensitive_labels = num_sensitive_labels
    self.vocab_size = get_hidden_size()
    self.padding_idx = 0

    self.embedding_size = 400
    self.hidden_size = 32

    self.mu = None
    self.sigma = None

    self.encoder_layers = Encoder(self.vocab_size, self.embedding_size, self.hidden_size, self.padding_idx, dropout=0.2)

    self.target_server = Classifier(dims=[64, 32, num_target_labels], dropout=0.2)
    self.target_adversary = Classifier(dims=[64, 32, num_sensitive_labels], dropout=0.2)

    self.sensitive_server = Classifier(dims=[64, 32, num_target_labels], dropout=0.2)
    self.sensitive_adversary = Classifier(dims=[64, 32, num_sensitive_labels], dropout=0.2)

    self.loss_fn = nn.CrossEntropyLoss()

  def encoder(self, x):
    # Return mean and covariance matrix from variational encoder
    return self.encoder_layers(x)

  def forward_adversaries(self, x):
    # First forward step: Optimize the adveraries
    hidden = self.encoder(x)

    server_hidden, adv_hidden = torch.split(hidden, 64, dim=1)

    target_adversary_logits = self.target_adversary(server_hidden)
    sensitive_server_logits = self.sensitive_server(adv_hidden)

    return target_adversary_logits, sensitive_server_logits
  
  def forward_all(self, x):
    # Forward step for entire network
    hidden = self.encoder(x)

    # Split Hidden State in half, one is target hidden and one is sensitive hidden
    server_hidden, adv_hidden = torch.split(hidden, 64, dim=1)

    target_server_logits = self.target_server(server_hidden)
    target_adversary_logits = self.target_adversary(server_hidden)

    sensitive_server_logits = self.sensitive_server(adv_hidden)
    sensitive_adversary_logits = self.sensitive_adversary(adv_hidden)

    return target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits
  
  def compute_loss_adversaries(self, target_adversary_logits, sensitive_server_logits, y_target, y_sensitive):
    # Compute the loss of the adversary networks
    target_adversary_loss = self.loss_fn(target_adversary_logits, y_sensitive)
    sensitive_server_loss = self.loss_fn(sensitive_server_logits, y_target)
    return train_params['target_adversary_weight'] * target_adversary_loss + train_params['sensitive_server_weight'] * sensitive_server_loss
  
  def compute_classifier_loss(self, target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits, y_target, y_sensitive):
    # Compute Target Server Loss, Combined Adversary Loss, and Sensitive Adversary Loss
    combined_adversary_loss = self.compute_loss_adversaries(target_adversary_logits, sensitive_server_logits, y_target, y_sensitive)
    target_server_loss = self.loss_fn(target_server_logits, y_target)
    sensitive_adversary_loss = self.loss_fn(sensitive_adversary_logits, y_sensitive)
    return target_server_loss, combined_adversary_loss, sensitive_adversary_loss

  def compute_total_loss(self, target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits, y_target, y_sensitive):
    # Function to compute total loss for second stage of the network
    target_server_loss, combined_adversary_loss, sensitive_adversary_loss = self.compute_classifier_loss(target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits, y_target, y_sensitive)

    return  train_params['target_server_weight'] * target_server_loss + train_params['sensitive_adversary_weight'] * sensitive_adversary_loss - combined_adversary_loss

  def test_metrics(self, dl, adversary=False, sensitive=False):
    # Helper function to print out relevant metrics of our train loop
    self.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    loss_fn = nn.CrossEntropyLoss()
    
    for inputs, targets, age, gender, lengths in dl:
      if adversary:
        if not sensitive:
          targets = gender.cuda()
        else:
          targets = targets.cuda()
      else:
        if not sensitive:
          targets = targets.cuda()
        else:
          targets = gender.cuda()
      inputs = inputs.cuda()
      lengths = lengths.cuda()
      
      hidden = self.encoder(inputs)
      server_hidden, adv_hidden = torch.split(hidden, 64, dim=1)

      if adversary:
        if not sensitive:
          model = self.target_adversary
        else:
          model = self.sensitive_server
      else:
        if not sensitive:
          model = self.target_server
        else:
          model = self.sensitive_adversary
      if sensitive:
        hidden = adv_hidden
      else:
        hidden = server_hidden
      logits = model(server_hidden)
      
      loss = loss_fn(logits, targets)
      sigmoid= torch.softmax(logits, dim=1)
      
      pred = torch.argmax(sigmoid, dim=1)
      correct += (pred.squeeze() == targets).sum()
      total += targets.shape[0]
      sum_loss += loss.item()*targets.shape[0]
    return sum_loss/total, correct.item()/total

  def train_all(self, train_params):
    # Adversary Parameters and Optimizer: We have two optimizers, one for each step of our training
    adversary_parameters = chain(self.target_adversary.parameters(), self.sensitive_server.parameters())
    adversary_optimizer = optim.Adam(adversary_parameters, lr=train_params['adv_lr'], weight_decay=train_params['adv_weight_decay'])

    # Parameters and Optimizer for the rest of our network
    disentangler_parameters = chain(self.encoder_layers.parameters(), self.target_server.parameters(), self.sensitive_adversary.parameters())
    disentangler_optimizer = optim.Adam(disentangler_parameters, lr=train_params['dis_lr'], weight_decay=train_params['dis_weight_decay'])

    epochs = train_params['epochs']
    for epoch in range(epochs):
      self.train()
      for i, (inputs, targets, age, gender, lengths) in enumerate(train_dl):
        sensitive = gender.cuda()
        inputs = inputs.cuda()
        lengths = lengths.cuda()
        targets = targets.cuda()

        #Optimize Adversary
        adversary_optimizer.zero_grad()
        target_adversary_logits, sensitive_server_logits = self.forward_adversaries(inputs)
        adversary_loss = self.compute_loss_adversaries(target_adversary_logits, sensitive_server_logits, targets, sensitive)

        adversary_loss.backward()
        adversary_optimizer.step()

        # Optimize Rest of our Network
        disentangler_optimizer.zero_grad()
        target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits = self.forward_all(inputs)
        loss = self.compute_total_loss(target_server_logits, target_adversary_logits, sensitive_server_logits, sensitive_adversary_logits, targets, sensitive)
        loss.backward()
        disentangler_optimizer.step()

      server_loss, server_acc = self.test_metrics(val_dl, adversary= False)
      adv_loss, adv_acc = self.test_metrics(val_dl, adversary= True)
      
      server_loss2, server_acc2 = self.test_metrics(val_dl, adversary= True, sensitive=True)
      adv_loss2, adv_acc2 = self.test_metrics(val_dl, adversary= False, sensitive=True)

      print(f"Target - Epoch {epoch}: SL {server_loss}, SA {server_acc}, AL {adv_loss}, AA {adv_acc}")
      print(f"Sensitive - Epoch {epoch}: SL {server_loss2}, SA {server_acc2}, AL {adv_loss2}, AA {adv_acc2}")
      print()

In [None]:
d = DisentanglerNoVE(10, 2).cuda()
# Hyperparameters
train_params = {
    'epochs': 100,
    'adv_lr': 1e-4,
    'adv_weight_decay': 1e-3,
    'dis_lr': 1e-3,
    'dis_weight_decay': 1e-4,
    'kl_weight': 1e-3,
    'target_server_weight': 1,
    'target_adversary_weight': 1,
    'sensitive_server_weight': 1,
    'sensitive_adversary_weight': 1
}
d.train_all(train_params)

Single Classification Problem: No Obfuscation

In [None]:
class LSTMClassifier(torch.nn.Module):
    def __init__(self, input_size: int, hidden_size: int, output_size: int, padding_idx: int):
      super().__init__()
      self.padding_idx = padding_idx
      self.hidden_size = hidden_size
      self.input_size = input_size
      self.output_size = output_size
      
      self.encoder = EncoderLSTM(self.input_size, self.hidden_size, self.padding_idx)\
      
      self.fc1 = nn.Linear(4 * hidden_size, 64)
      self.dropout1 = nn.Dropout(0.3)
      self.fc2 = nn.Linear(64, 32)
      self.dropout2 = nn.Dropout(0.3)
      self.fc3 = nn.Linear(32, output_size)

    def forward(self, input_seqs: Tensor) -> Tensor:
        (ht0, ct0, ht1, ct1) = self.encoder(input_seqs)
        x = self.fc3(self.dropout2(self.fc2(self.dropout1((self.fc1(torch.cat((ht0, ct0, ht1, ct1), dim=1)))))))
        return x

In [None]:
def test_metrics(model, test_dl, adversary=False):
  model.eval()
  correct = 0
  total = 0
  sum_loss = 0.0
  loss_fn = nn.CrossEntropyLoss()
  
  for inputs, targets, age, gender, lengths in test_dl:
    torch.cuda.empty_cache()
    inputs = inputs.cuda()
    if adversary:
      targets = gender.cuda()
    else:
      targets=targets.cuda()
    logits = model(inputs)
    
    loss = loss_fn(logits.squeeze(), targets)
    sigmoid= torch.softmax(logits, dim=1)
    pred = torch.argmax(sigmoid, dim=1)
    correct += (pred.squeeze() == targets).sum()
    total += targets.shape[0]
    sum_loss += loss.item()*targets.shape[0]
  return sum_loss/total, correct.item()/total

Sensitive Performance: No Obfuscation

In [None]:
train_losses = []
train_acces = []
val_losses =[]
val_acces = []

adv = LSTMClassifier(get_hidden_size(), 16, 2, 0)
adv.to('cuda')
parameters = []
for p in adv.parameters():
    if p.requires_grad:
        parameters.append(p)
optimizer = torch.optim.Adam(adv.parameters(), lr=0.001, weight_decay=1e-3)
for epoch in range(100):
  adv.train()
  for inputs, targets, age, gender, lengths in train_dl:
    inputs = inputs.cuda()
    targets = targets.cuda()
    gender = gender.cuda()
    optimizer.zero_grad()
    logits = adv(inputs)
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(logits.squeeze(), gender)
    loss.backward()
    optimizer.step()
  val_loss, val_acc = test_metrics(adv, val_dl, adversary=True)
  val_losses.append(val_loss)
  val_acces.append(val_acc)
  train_loss, train_acc = test_metrics(adv, train_dl, adversary=True)
  train_losses.append(train_loss)
  train_acces.append(train_acc)
  print(f"Epoch {epoch + 1}: " + "train loss %.3f, train_acc %.3f, val loss %.3f, val accuracy %.3f" % (train_loss, train_acc, val_loss, val_acc))

Target Performance: No Obfuscation

In [None]:
train_losses = []
train_acces = []
val_losses =[]
val_acces = []

adv = LSTMClassifier(get_hidden_size(), 16, 10, 0)
adv.to('cuda')
parameters = []
for p in adv.parameters():
    if p.requires_grad:
        parameters.append(p)
optimizer = torch.optim.Adam(adv.parameters(), lr=0.001, weight_decay=1e-3)
for epoch in range(100):
  adv.train()
  for inputs, targets, age, gender, lengths in train_dl:
    inputs = inputs.cuda()
    targets = targets.cuda()
    gender = gender.cuda()
    optimizer.zero_grad()
    logits = adv(inputs)
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(logits.squeeze(), targets)
    loss.backward()
    optimizer.step()
  val_loss, val_acc = test_metrics(adv, val_dl, adversary=False)
  val_losses.append(val_loss)
  val_acces.append(val_acc)
  train_loss, train_acc = test_metrics(adv, train_dl, adversary=False)
  train_losses.append(train_loss)
  train_acces.append(train_acc)
  print(f"Epoch {epoch + 1}: " + "train loss %.3f, train_acc %.3f, val loss %.3f, val accuracy %.3f" % (train_loss, train_acc, val_loss, val_acc))