# new Training classifier

This script can be used to train the classifier on original and adversarial samples.

In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
import time
import importlib
from copy import copy
from tqdm import tqdm 

device = torch.device('cuda:7' if torch.cuda.is_available() else 'cpu')
print(device) 

cuda:7


# Loading and transforming data into logits differences

The first step is transforming our dataframe into logits differences for each original and adversarial sentence. For this, it is required to execute the model for each sentence with substitutions as explained in the paper.

In [2]:
# Print available setups for testing
for i in os.listdir('../../Generating Adversarial Samples/Data'):
    if not i.startswith('.'): # Don't print system files
        print(i)

ag-news_alzantot_distilbert.csv
rotten-tomatoes_alzantot_distilbert.csv
rotten-tomatoes_alzantot_distilbert_logits_ordering.csv
imdb_bae_distilbert_logits.csv
imdb_pwws_distilbert_logits.csv
imdb_pwws_distilbert.csv
rotten-tomatoes_alzantot_distilbert_logits.csv
ag-news_textfooler_distilbert_logits_ordering.csv
ag-news_textfooler_distilbert_logits.csv
ag-news_alzantot_distilbert_logits_ordering.csv
imdb_bae_distilbert.csv
ag-news_textfooler_distilbert.csv
imdb_bae_distilbert_logits_ordering.csv
ag-news_alzantot_distilbert_logits.csv


In [3]:
# Select the configuration for training
test_config = 'imdb_pwws_distilbert_logits.csv' # or 'agnews_pwws_distilbert.csv'

# Loading data

Read into a dataframe your original and adversarial samples.

In [4]:
# Read the desired csv file previously generated
# no index column is needed for the csv file to be read correctly 
df = pd.read_csv(f'../../Generating Adversarial Samples/Data/{test_config}')
print(df.shape) 
df

(14000, 514)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,504,505,506,507,508,509,510,511,y_label,sentence
0,-1.624298,-1.267812,-1.124180,-1.105392,-1.083280,-0.992652,-0.926316,-0.936030,-0.940352,-0.852728,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,ane just didn't get this movie...Was it a musi...
1,-5.831964,-2.938801,4.312030,4.541301,5.168174,5.381170,5.540419,5.673717,5.725666,5.801096,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,This is an absolute expectant show. Jessica Al...
2,-3.836682,-3.758868,-2.357473,-1.965015,-1.683539,-1.404211,-1.350199,-1.286282,-0.964777,-0.876890,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Yet another venture into the realm of the teen...
3,-4.120091,-3.577592,-3.717308,-3.684467,-3.174225,-2.286835,-2.090955,-2.013556,-1.765473,-1.695364,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,and parading around a 14-year-old girl in a th...
4,-2.934218,1.903705,3.633454,4.177796,4.378959,4.511499,4.483936,4.764768,4.819135,4.857956,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,This movie has remained in my mind for years a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13995,-3.221572,-3.158003,-2.434023,-2.247374,-1.787426,-1.584601,-1.340585,-1.395013,-1.304172,-1.318179,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,"It commencement quite good, but after a while ..."
13996,1.234591,3.442929,3.721999,3.771651,4.012804,4.220390,4.520671,4.600287,4.602266,4.625516,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"if you are going to see this movie, by all mea..."
13997,5.727832,6.308583,6.333835,6.478086,6.474565,6.465682,6.460819,6.475653,6.426850,6.507056,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"First ever viewing: July 21, 2008 Very impres..."
13998,5.004248,5.579744,5.742004,5.888197,5.970396,5.962745,6.121992,6.133130,6.116953,6.176139,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,In answer to the person who made the comment a...


In [5]:
# Divide train and test set
df_train = df.head(3000)
df_test = df.tail(1360 * 2)
print(df_train.shape, df_test.shape) 

(3000, 514) (2720, 514)


In [6]:
y_train = df_train['y_label'].values
x_train = df_train.drop(columns=['y_label', 'sentence']).values

y_test = df_test['y_label'].values
x_test = df_test.drop(columns=['y_label', 'sentence']).values

# Model training and comparison

We train different models and compare their performance.

### Random forest

In [7]:
from sklearn.ensemble import RandomForestClassifier

# Create the model using best parameters found
model = RandomForestClassifier(n_estimators=1600,
                               min_samples_split=10,
                               min_samples_leaf=2,
                               # max_features='auto',
                               max_depth=None, 
                               bootstrap = True)
# Fit on training data
model.fit(x_train, y_train)

In [8]:
# Actual class predictions
rf_predictions = model.predict(x_test)

In [9]:
np.sum(rf_predictions==y_test)/len(y_test)

0.9213235294117647

In [10]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, rf_predictions, digits=3))
print(confusion_matrix(y_test, rf_predictions))

              precision    recall  f1-score   support

         0.0      0.927     0.915     0.921      1367
         1.0      0.915     0.928     0.921      1353

    accuracy                          0.921      2720
   macro avg      0.921     0.921     0.921      2720
weighted avg      0.921     0.921     0.921      2720

[[1251  116]
 [  98 1255]]


### XGBoost

Best performing model. Hyperparamter tuning done with Dataiku.

In [11]:
import xgboost as xgb

In [12]:
xgb_classifier = xgb.XGBClassifier(
                    max_depth=3,
                    learning_rate=0.34281802,
                    gamma=0.6770816,
                    min_child_weight=2.5520658,
                    max_delta_step=0.71469694,
                    subsample=0.61460966,
                    colsample_bytree=0.73929816,
                    colsample_bylevel=0.87191725,
                    reg_alpha=0.9064181,
                    reg_lambda=0.5686102,
                    n_estimators=29,
                    silent=0,
                    nthread=4,
                    scale_pos_weight=1.0,
                    base_score=0.5,
                    # missing=None,
                  )

In [13]:
xgb_classifier.fit(x_train, y_train)

Parameters: { "silent" } are not used.



In [14]:
xgb_predictions = xgb_classifier.predict(x_test)

In [15]:
print(classification_report(y_test, xgb_predictions, digits=3))
print(confusion_matrix(y_test, xgb_predictions))

              precision    recall  f1-score   support

         0.0      0.942     0.918     0.930      1367
         1.0      0.919     0.943     0.931      1353

    accuracy                          0.931      2720
   macro avg      0.931     0.931     0.931      2720
weighted avg      0.931     0.931     0.931      2720

[[1255  112]
 [  77 1276]]


### AdaBoost classifier

In [16]:
from sklearn.ensemble import AdaBoostClassifier

In [17]:
abc = AdaBoostClassifier()

In [18]:
abc.fit(x_train, y_train)



In [19]:
abc_predictions = abc.predict(x_test)

In [20]:
np.sum(abc_predictions==y_test)/len(y_test)

0.9279411764705883

In [21]:
print(classification_report(y_test, abc_predictions, digits=3))
print(confusion_matrix(y_test, abc_predictions))

              precision    recall  f1-score   support

         0.0      0.939     0.917     0.927      1367
         1.0      0.918     0.939     0.928      1353

    accuracy                          0.928      2720
   macro avg      0.928     0.928     0.928      2720
weighted avg      0.928     0.928     0.928      2720

[[1253  114]
 [  82 1271]]


### LightGBM

In [22]:
import lightgbm as lgb

In [23]:
parameters = {
    'objective': 'binary',
    'application': 'binary',
    'metric': ['binary_logloss'],
    'num_leaves': 35,
    'learning_rate': 0.13,
    'verbose': 1
}

In [24]:
train_data = lgb.Dataset(x_train, label=y_train)
test_data = lgb.Dataset(x_test, label=y_test)

In [25]:
lgbm_classifier = lgb.train(parameters,
                       train_data,
                       valid_sets=test_data,
                       num_boost_round=300)

[LightGBM] [Info] Number of positive: 1488, number of negative: 1512
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021106 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 72373
[LightGBM] [Info] Number of data points in the train set: 3000, number of used features: 512
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.496000 -> initscore=-0.016000
[LightGBM] [Info] Start training from score -0.016000


In [26]:
y_hat = lgbm_classifier.predict(x_test)

In [27]:
y_hat.round()

array([0., 1., 0., ..., 0., 0., 1.])

In [28]:
np.sum(y_hat.round()==y_test)/len(y_test)

0.9231617647058824

In [29]:
print(classification_report(y_test, y_hat.round(), digits=3))
print(confusion_matrix(y_test, y_hat.round()))

              precision    recall  f1-score   support

         0.0      0.929     0.917     0.923      1367
         1.0      0.918     0.929     0.923      1353

    accuracy                          0.923      2720
   macro avg      0.923     0.923     0.923      2720
weighted avg      0.923     0.923     0.923      2720

[[1254  113]
 [  96 1257]]


### SVM

In [30]:
from sklearn.svm import SVC
svm_clf = SVC(C=9.0622635,
          kernel='rbf',
          gamma='scale',
          coef0=0.0,
          tol=0.001,
          probability=True,
          max_iter=-1)

In [31]:
svm_clf.fit(x_train, y_train)

In [32]:
svm_pred = svm_clf.predict(x_test)

In [33]:
np.sum(svm_pred.round()==y_test)/len(y_test)

0.9268382352941177

In [34]:
print(classification_report(y_test, svm_pred.round(), digits=3))
print(confusion_matrix(y_test, svm_pred.round()))

              precision    recall  f1-score   support

         0.0      0.937     0.916     0.926      1367
         1.0      0.917     0.938     0.927      1353

    accuracy                          0.927      2720
   macro avg      0.927     0.927     0.927      2720
weighted avg      0.927     0.927     0.927      2720

[[1252  115]
 [  84 1269]]


### Perceptron NN

In [35]:
from torch.utils.data import Dataset, DataLoader
import sys
from torch.autograd import Variable

class Text(Dataset):
    def __init__(self, x , y):
        self.y = y
        self.x = x

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        data = torch.tensor(self.x[idx].astype('float32')).to(device)
        y = torch.tensor(self.y[idx].astype('float32')).unsqueeze(0).to(device)
        return data, y

In [36]:
train_ds = Text(x_train, y_train)
train_loader = DataLoader(dataset=train_ds, batch_size=128, shuffle=True)

In [37]:
import torch.nn as nn
import torch.nn.functional as F

class BasicModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BasicModel, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim  = output_dim

        self.fc1 = torch.nn.Linear(self.input_dim, self.hidden_dim)
        self.fc2 = torch.nn.Linear(self.hidden_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.sigmoid(x)

In [38]:
basic_classifier = BasicModel(input_dim=512*1, hidden_dim=50, output_dim=1).to(device)
c = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(basic_classifier.parameters(), lr=0.001)

train_loss_history = []
val_acc_history = []

In [39]:
iter_per_epoch = len(train_loader)
num_epochs = 3
initial_epoch = 1
log_nth = 2
storing_frequency = 15
checkpoints_path = "/content/drive/MyDrive/ExplainableAI/Model/Saliency/checkpoints"

for epoch in range(initial_epoch, initial_epoch+num_epochs):
    basic_classifier.train()
    epoch_losses = []
    for i, (data, y_label) in enumerate(train_loader):
      optimizer.zero_grad()
      out = basic_classifier(data)
      loss = c(out, y_label)
      epoch_losses.append(loss.item())
      loss.backward()
      optimizer.step()

      if (i+1) % log_nth == 0:        
          print ('Epoch [{}/{}], Step [{}/{}], Loss for last {} batches: {:.4f}' 
                  .format(epoch, num_epochs, i+1, iter_per_epoch, log_nth, np.mean(np.array(epoch_losses[-log_nth:]))))
          #print_time()
      
      if (i+1) % storing_frequency == 0:        
          print('Storing with loss for last {} batches = {}'.format(storing_frequency, np.mean(np.array(epoch_losses[-storing_frequency:]))))
          #print_time()
          #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}_{}.checkpoint".format(epoch, i+1))
  
    # Store after whole epoch
    print ('Epoch [{}/{}] finished with loss = {:.4f}'.format(epoch, num_epochs, np.mean(np.array(epoch_losses))))
    #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}.checkpoint".format(epoch))

Epoch [1/3], Step [2/24], Loss for last 2 batches: 0.7410
Epoch [1/3], Step [4/24], Loss for last 2 batches: 0.6339
Epoch [1/3], Step [6/24], Loss for last 2 batches: 0.6426
Epoch [1/3], Step [8/24], Loss for last 2 batches: 0.6257
Epoch [1/3], Step [10/24], Loss for last 2 batches: 0.6419
Epoch [1/3], Step [12/24], Loss for last 2 batches: 0.6313
Epoch [1/3], Step [14/24], Loss for last 2 batches: 0.6197
Storing with loss for last 15 batches = 0.6459575653076172
Epoch [1/3], Step [16/24], Loss for last 2 batches: 0.6191
Epoch [1/3], Step [18/24], Loss for last 2 batches: 0.6119
Epoch [1/3], Step [20/24], Loss for last 2 batches: 0.6138
Epoch [1/3], Step [22/24], Loss for last 2 batches: 0.5929
Epoch [1/3], Step [24/24], Loss for last 2 batches: 0.5964
Epoch [1/3] finished with loss = 0.6309
Epoch [2/3], Step [2/24], Loss for last 2 batches: 0.6034
Epoch [2/3], Step [4/24], Loss for last 2 batches: 0.5924
Epoch [2/3], Step [6/24], Loss for last 2 batches: 0.5954
Epoch [2/3], Step [8/24

In [40]:
nn_pred = basic_classifier(torch.tensor(x_test.astype('float32')).to(device))

In [41]:
nn_pred = nn_pred.flatten().detach().cpu().numpy().round()

In [42]:
np.sum(nn_pred==y_test)/len(y_test)

0.8827205882352941

In [43]:
print(classification_report(y_test, nn_pred, digits=3))
print(confusion_matrix(y_test, nn_pred))

              precision    recall  f1-score   support

         0.0      0.842     0.944     0.890      1367
         1.0      0.936     0.820     0.874      1353

    accuracy                          0.883      2720
   macro avg      0.889     0.882     0.882      2720
weighted avg      0.889     0.883     0.882      2720

[[1291   76]
 [ 243 1110]]
