<a href="https://colab.research.google.com/github/manashpratim/Bosch-Summer-Internship/blob/master/TextAttackV1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


### Data is available at https://drive.google.com/drive/folders/1NFYIaXjL8V5kvZo3g9JEafLQ3scslWic?usp=sharing

## **Loading Data**

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!wget --no-check-certificate \
      "http://immortal.multicomp.cs.cmu.edu/raw_datasets/CMU_MOSI.zip"\
      -O "/content/mosi.zip"

In [None]:
#Unzip the dataset
!unzip -q '/content/mosi.zip'

In [None]:
#Function to get the file names. Inputs are path and name of the file to be saved
def get_file_names(mypath,savefile):
  from os import listdir
  from os.path import isfile, join
  onlyfiles = [f[:f.find('.')] for f in listdir(mypath) if isfile(join(mypath, f))]
  with open(savefile, 'w') as f:
    for item in onlyfiles:
        f.write(item)
        f.write('\n')
  return onlyfiles

In [None]:
#specify the path and get the file
mypath = '/content/Raw/Transcript/Segmented'
files = get_file_names(mypath,'textfile.txt')

In [None]:
# Load the processed transcripts and the labels
import pickle
with open('/content/drive/My Drive/mosi_data/labels_joined.pickle', 'rb') as handle:
    label= pickle.load(handle)

with open('/content/drive/My Drive/mosi_data/text_data_joined.pickle', 'rb') as handle:
    dic = pickle.load(handle)

## **Preprocessing Data**

In [None]:
# Join all the segments of the text data into a numpy array
import numpy as np
review = []
for key in files:
  review+=dic[key]
review = np.array(review)

In [None]:
# Join all the labels into a numpy array
import numpy as np
y = []
for key in files:
    y+=label[key]
y = np.array(y)

y[y>0]=1        #Convert labels to binary
y[y<0]=0

y=y.astype(int)   

In [None]:
# Function to generate train-test split. Arguments are text data,labels and split_size (0.8 mean 80:20 train-test split)
def split_data(text,labels,split_size=0.8):
  import numpy as np  
  train_length =int(len(labels)*split_size)
  test_length =int(len(labels)-train_length)
  idx = np.random.permutation(labels.shape[0])
  text = text[idx]
  labels = labels[idx]
  text_train = text[:train_length]
  text_val = text[train_length:]
  labels_train = labels[:train_length]
  labels_val = labels[train_length:]
  
  return text_train,text_val,labels_train,labels_val

In [None]:
# Get train-test split
train_reviews,  val_reviews, train_labels, val_labels = split_data(review,y,0.9)

In [None]:
# Preprocess the text data. Similar to the audio data, segments of the text data are paddded to have same length
import tensorflow as tf
import numpy as np

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = 581
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
vocab_len=5000

tokenizer = Tokenizer(num_words=vocab_len+1,oov_token=oov_tok)
tokenizer.fit_on_texts(review)

word_index = tokenizer.word_index
vocab_size=len(word_index)
print('Size of Vocabulary: ',vocab_size)

train_sequences = tokenizer.texts_to_sequences(train_reviews)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

val_sequences = tokenizer.texts_to_sequences(val_reviews)
val_padded = pad_sequences(val_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

train_labels=np.expand_dims(train_labels, axis=1)
val_labels=np.expand_dims(val_labels, axis=1)

In [None]:
# Data Statistics
print('Dimension of Training  Text Data: ',train_padded.shape)
print('Dimension of Test Text Data: ',val_padded.shape)
print('Dimension of Training Labels: ',train_labels.shape)
print('Dimension of Validation Labels: ',val_labels.shape)

In [None]:
# Download Glove Wiki Embeddings
!wget --no-check-certificate \
      "http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip"\
      -O "/content/drive/My Drive/mosi_data/globe6B.zip"

In [None]:
# Unzip the downloaded embeddings
!unzip -q '/content/drive/My Drive/mosi_data/globe6B.zip'

In [None]:
# Load the embeddings. There are 4 dimensions to choose from. I used 300 dimensional embeddings. 
embeddings_index = {}
with open('/content/glove.6B.300d.txt') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

In [None]:
# Map the embeddings with the words of the text data
embedding_dim = 300
embeddings_matrix = np.zeros((vocab_size+1, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embeddings_matrix[i] = embedding_vector

## **Training**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output



In [None]:
class Transformer(layers.Layer):
    def __init__(self, maxlen, embed_dim, vocab_size,embeddings_matrix,num_heads):
        super(Transformer, self).__init__()
        
        self.embed = tf.keras.layers.Embedding(vocab_size+1, embed_dim,  input_length=maxlen, weights = [embeddings_matrix], trainable = False)
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)

        self.lstm1 = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))
        self.lstm2 = tf.keras.layers.Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))
        self.dropout1 = tf.keras.layers.Dropout(0.2)
        self.dropout2 = tf.keras.layers.Dropout(0.2)
        self.dropout3 = tf.keras.layers.Dropout(0.2)
        self.dropout4 = tf.keras.layers.Dropout(0.4)
        self.dropout5 = tf.keras.layers.Dropout(0.5)
        self.dropout6 = tf.keras.layers.Dropout(0.4)
        self.pool     =  tf.keras.layers.GlobalAveragePooling1D()
        self.dense1 = tf.keras.layers.Dense(128, activation="relu")
        self.dense2 = tf.keras.layers.Dense(64, activation="relu")
        self.out = tf.keras.layers.Dense(2, activation="softmax")
    
    def call(self, inputs):
        
        inputs = self.embed(inputs) 
        inputs = self.dropout6(inputs)
        attn_output = self.att(inputs)
        x = inputs + attn_output
        x = self.dropout1(x)
        x = self.lstm1(x)
        x = self.lstm2(x)
        x = self.dropout2(x)
        x = self.pool(x)
        x = self.dropout3(x)
        x = self.dense1(x)
        x = self.dropout3(x)
        x = self.dense2(x)
        x = self.dropout4(x)
        out = self.out(x)
        
        return out

In [None]:
vocab_size = 3108
maxlen = 581
embed_dim = 300  # Embedding size for each token
num_heads = 10  # Number of attention heads

inputs = layers.Input(shape=(maxlen,))
transformer_block = Transformer(maxlen, embed_dim, vocab_size,embeddings_matrix,num_heads)
outputs = transformer_block(inputs)
model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, mode='auto')
earlystopping = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True,monitor='val_accuracy', min_delta=1e-4,mode='max')
model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
history = model.fit(train_padded, train_labels, batch_size=32, epochs=50, validation_split=0.15,shuffle=True,callbacks=[reduce,earlystopping])

In [None]:
#Test Accuracy
results = model.evaluate(x=val_padded,y=val_labels)
print('Test Set Performance: ',results)

## **Attack**

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
stop_words = set(stopwords.words('english'))
from absl import logging
from tqdm import tqdm
import tensorflow as tf

import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import sklearn
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
mode = hub.load(module_url)
print ("module %s loaded" % module_url)

def embed(input):
  return mode(input)

In [None]:
from gensim.models import KeyedVectors
#model1 = KeyedVectors.load_word2vec_format('/content/drive/My Drive/GoogleNews-vectors-negative300.bin.gz (Unzipped Files)/GoogleNews-vectors-negative300.bin', binary=True)
!unzip -q '/content/drive/My Drive/mosi_data/fasttext_wiki.zip'
model1 = KeyedVectors.load_word2vec_format('/content/wiki-news-300d-1M.vec', binary=False)

In [None]:
def evaluate_word_saliency(model,val_reviews,val_padded,val_labels):
    candidates = {}
    outer = tqdm(total=len(val_reviews), desc='Evaluating Word Saliency', position=0)
    for j,sent in enumerate(val_reviews):
        outer.update(1)
        sent = sent.split()
        score = model.predict(val_padded[j].reshape(1,-1))[0]
        pred = np.argmax(score)

        if pred==val_labels[j]:
          mask = np.zeros(581)
          mask[:len(sent)]=1
          dic = {}
          for i,word in enumerate(sent):
            if word not in stop_words:
              mask[i] = 0
              inp = mask*val_padded[j]
              score_new = model.predict(inp.reshape(1,-1))[0]
              pred_new = np.argmax(score_new)
              if pred_new == pred:
                imp = score[pred] - score_new[pred_new]
              else:
                  imp = (score[pred] - score_new[pred]) + (score_new[pred_new] - score[pred_new])
              mask[i] = 1
              if imp>0:
                dic[word] = imp
          if dic:
            k = sorted(dic.keys(),key = dic.get,reverse=True)
            if len(k)>3:
              candidates[j] = k[:3]
            else:
              candidates[j] = k

    return candidates

In [None]:
def find_similar_words(model,val_reviews,val_padded,val_labels,candidates,model1,embed):
      
      new_candidates = {}

      outer = tqdm(total=len(candidates), desc='Finding Similar Words', position=0)
      for j,sent in enumerate(val_reviews):
        if j in candidates:
            outer.update(1)
            sent = sent.split()
            dic = {}
            score = model.predict(val_padded[j].reshape(1,-1))[0]
            pred = np.argmax(score)
            for i,word in enumerate(sent):
              if word in candidates[j]:
                n = []
                try:
                  l = model1.most_similar(word,topn=50)
                except:
                  pass

                mini = float('inf')
                we = None 
                for w in l:
                  if w[0].lower()!=word:
                    arr = sent[:]
                    arr[i] = w[0].lower()
                    k = tokenizer.texts_to_sequences([" ".join(arr)])
                    inp = pad_sequences(k, maxlen=581, padding='post', truncating='post')
                    score_new = model.predict(inp)[0]
                    pred_new = np.argmax(score_new)
                    if pred_new !=pred:
                      message_embeddings = embed([" ".join(arr)," ".join(sent)])
                      score1 =sklearn.metrics.pairwise.cosine_similarity(np.array(message_embeddings[0]).reshape(1,-1),np.array(message_embeddings[1]).reshape(1,-1))[0][0]
                      if score1>0.6:
                          n.append(w[0].lower())
                    else:
                      if score_new[pred]<mini:
                            mini = score_new[pred]
                            we = w[0].lower()
                if we:
                  arr = sent[:]
                  arr[i] = we
                  message_embeddings = embed([" ".join(arr)," ".join(sent)])
                  score1 =sklearn.metrics.pairwise.cosine_similarity(np.array(message_embeddings[0]).reshape(1,-1),np.array(message_embeddings[1]).reshape(1,-1))[0][0]
                  if score1>0.6:
                    n.append(we)
                if n:
                  dic[word] = n[:]
            if dic:
                new_candidates[j] = dic 

      return new_candidates

In [None]:
def generate_adversary(model,val_reviews,val_padded,val_labels,new_candidates,embed):
    adversary = {}

    outer = tqdm(total=len(new_candidates), desc='Training', position=0)
    for j,sent in enumerate(val_reviews):
      if j in new_candidates:
        outer.update(1)
        sent = sent.split()
        score = model.predict(val_padded[j].reshape(1,-1))[0]
        pred = np.argmax(score)
        a = []
        arr = sent[:]
        flag = 0
        for i,word in enumerate(sent):
          if word in new_candidates[j]:
            
            mini = float('inf')
            we = None
            
            for w in new_candidates[j][word]:
              if w:
                s = arr[i]
                arr[i] = w.lower()
                k = tokenizer.texts_to_sequences([" ".join(arr)])
                inp = pad_sequences(k, maxlen=581, padding='post', truncating='post')
                score_new = model.predict(inp)[0]
                pred_new = np.argmax(score_new)
                if pred_new !=pred:
                  arr1 = sent[:]
                  arr1[i] = w.lower()
                  message_embeddings = embed([" ".join(arr1)," ".join(sent)])
                  score1 =sklearn.metrics.pairwise.cosine_similarity(np.array(message_embeddings[0]).reshape(1,-1),np.array(message_embeddings[1]).reshape(1,-1))[0][0]
                  if score1>0.6:
                      arr = sent[:]
                      arr[i] = w.lower()
                      flag=1
                      break
                else:
                    
                    if  abs(score[pred] - score_new[pred])>0.4:
                        arr1 = sent[:]
                        arr1[i] = w.lower()
                        message_embeddings = embed([" ".join(arr1)," ".join(sent)])
                        score1 =sklearn.metrics.pairwise.cosine_similarity(np.array(message_embeddings[0]).reshape(1,-1),np.array(message_embeddings[1]).reshape(1,-1))[0][0]
                        if score1>0.6:
                              arr = sent[:]
                              arr[i] = w.lower()
                              flag = 1
                              break
                    else:
                      arr[i] = s
                      if score_new[pred]<mini:
                          mini = score_new[pred]
                          we = w.lower()
            if flag:
              break
            if we:
              arr[i] = we
        adversary[j] = " ".join(arr)

    adver = []

    for i in range(len(val_reviews)):
      if i in adversary:
        adver.append(adversary[i])
      else:
        adver.append(val_reviews[i])

    return adver

In [None]:
candidates = evaluate_word_saliency(model,val_reviews,val_padded,val_labels)
new_candidates = find_similar_words(model,val_reviews,val_padded,val_labels,candidates,model1,embed)
adversary = generate_adversary(model,val_reviews,val_padded,val_labels,new_candidates,embed)

In [None]:
val_seq = tokenizer.texts_to_sequences(adversary)
val_pad = pad_sequences(val_seq, maxlen=581, padding='post', truncating='post')

In [None]:
results = model.evaluate(x=val_padded,y=val_labels)
print('Test Set Performance: ',results)

In [None]:
results = model.evaluate(x=val_pad,y=val_labels)
print('Test Set Performance after Attack: 'results)