We created a separate notebook for each of our experiments. This is a cleaned version containing all the cells needed to run all of our experiments, which we essentially copy and pasted to different notebooks and changed "data", "size" and "p" parameters for different experiments.

Note that although we included code in the github to be able to run experiments via command line, we ended up choosing to use colab to be able to examine the augmented data and see progress (eg. time until dataset augmentation is completed), and only made sure that the colab method of carrying out experiments runs correctly.

## Setup

In [None]:
## Mount to drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
## requirements
## run this cell and restart runtime
!pip install -Uqq fastai fastbook
!pip install nltk
import nltk
nltk.download('stopwords')
!pip install "pandas>=1.2.0"
!pip install nlpaug
nltk.download('wordnet')

In [None]:
%cd /content/drive/MyDrive/COMP550/Final Project (Git)/src/

from utils import *
from data import *
from fastai.text.all import*

import random
set_seed(10, True)
random.seed(10)

# Individual DA Techniques


In [None]:
# Choose dataset size

data = 'imdb'   # or 'amz' (amazon), 'agnews', 'yelp', 'yahoo', 'sogou'
size = 's'      # or 'm' (medium), 'l' (large)

# Choose DA probability
p = 0.2

## Random Swap

In [None]:
#augment
train_df, test_df = get_dataset(data, size)
swapped = train_df.copy()
swapped['text'] = swapped['text'].apply(lambda row: random_swap(row, p))
train_augmented = pd.concat([train_df, swapped]).drop_duplicates()

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

# train
learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()
    
learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

#test
test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

#save trained model
learn.export(fname='/content/drive/MyDrive/COMP550/Final Project/trained_models/imdb_m_swapped_0.1.pkl')

## Random Deletion

In [None]:
#augment
train_df, test_df = get_dataset(data, size)
deleted = train_df.copy()
deleted['text'] = deleted['text'].apply(lambda row: random_deletion(row, p))
train_augmented = pd.concat([train_df, deleted])

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

#train
learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

#test
test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## Random Insertion

In [None]:
train_df, test_df = get_dataset(data, size)
randin = train_df.copy()
randin['text'] = randin['text'].apply(lambda row: random_insertion(row, p))
train_augmented = pd.concat([train_df, randin])

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## Synonym Replacement

In [None]:
train_df, test_df = get_dataset(data, size)
synrep = train_df.copy()
synrep['text'] = synrep['text'].apply(lambda row: synonym_replacement(row, p))
train_augmented = pd.concat([train_df, synrep])

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## Synthetic Noise

In [None]:
train_df, test_df = get_dataset(data, size)
synnoise = train_df.copy()
synnoise['text'] = synnoise['text'].apply(lambda row: syntheticnoise(row, p))
train_augmented = pd.concat([train_df, synnoise]).drop_duplicates()

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()
    
learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## EDA

In [None]:
train_df, test_df = get_dataset(data, size)
train_copy = train_df.copy()
augmented = []
for row in train_copy['text']:
  choice = random.choice(["del", "ins", "swap", "syn"])
  if choice == "del":
    augmented.append(random_deletion(row, p))
  elif choice == "ins":
    augmented.append(random_insertion(row, p))
  elif choice == "swap":
    augmented.append(random_swap(row, p))
  else:
    augmented.append(synonym_replacement(row, p))
train_copy['text'] = augmented 
train_augmented = pd.concat([train_df, train_copy]).drop_duplicates()

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## Contextual Word Embedding

In [None]:
!pip install -Uqq fastai fastbook
!pip install nltk nlpaug pickle5 transformers
!pip install "pandas>=1.2.0"
from tqdm import tqdm
from utils import Language, backtranslation, contextual_word_embeddings
tqdm.pandas()

In [None]:
augmentation = train_df.copy()
augmentation['text'] = augmentation.progress_apply(lambda row: contextual_word_embeddings(row['text']), axis=1)
train_lm = TextDataLoaders.from_df(train_df, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_df, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')

learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)

acc = learn.validate(dl = test_dl)[1]
print(acc)

## Backtranslation

In [None]:
augmentation = train_df.copy()
augmentation['text'] = augmentation.progress_apply(lambda row: backtranslation(row['text'], Language.German), axis=1)
train_lm = TextDataLoaders.from_df(train_df, text_col='text', is_lm=True)
train_lm.show_batch()

learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_df, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')

learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))

learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

test_dl = learn.dls.test_dl(test_df, with_labels=True)

acc = learn.validate(dl = test_dl)[1]
print(acc)

# DA Combinations

In [None]:
# choose dataset, dataset size and DA probability
data = 'agnews'
size = 's'

p = 0.2

## Combinations that augment an augmented dataset

resulting in doubling of training set size. The example we give is adding synthetic noise on top of a dataset augmented by EDA already.

In [None]:
train_df, test_df = get_dataset(data, size)
train_copy = train_df.copy()

# first augmentation here. The example given is EDA
augmented = []
for row in train_copy['text']:
  choice = random.choice(["del", "ins", "swap", "syn"])
  if choice == "del":
    augmented.append(random_deletion(row, p))
  elif choice == "ins":
    augmented.append(random_insertion(row, p))
  elif choice == "swap":
    augmented.append(random_swap(row, p))
  else:
    augmented.append(synonym_replacement(row, p))
train_copy['text'] = augmented 

# second augmentation here. The example given is synthetic noise
train_copy['text'] = train_copy['text'].apply(lambda row: syntheticnoise(row, p))

train_augmented = pd.concat([train_df, train_copy]).drop_duplicates()

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

# train
learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

#test
test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

## Combinations that augment the training set twice using 2 DA techniques
and combines all of them together resulting in augmented training set to be triple the original size. The example given is again EDA and Noise.

In [None]:
train_df, test_df = get_dataset(data, size)
train_copy = train_df.copy()
augmented = []

#augmentation set 1 
for row in train_copy['text']:
  choice = random.choice(["del", "ins", "swap", "syn"])
  if choice == "del":
    augmented.append(random_deletion(row, p))
  elif choice == "ins":
    augmented.append(random_insertion(row, p))
  elif choice == "swap":
    augmented.append(random_swap(row, p))
  else:
    augmented.append(synonym_replacement(row, p))
train_copy['text'] = augmented 

#combining augmented set 1 + original training
first_augment = pd.concat([train_df, train_copy]).drop_duplicates()

#augmentation set 2 
synnoise = train_df.copy()
synnoise['text'] = synnoise['text'].apply(lambda row: syntheticnoise(row, p))

#combine total augmented ((original + augmentation 1) + augmentation 2)
train_augmented = pd.concat([first_augment, synnoise]).drop_duplicates()

train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
train_lm.show_batch()

#train 
learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()

learn.fit_one_cycle(1, 1e-2)
learn.unfreeze()
learn.fit_one_cycle(5, 1e-3)

learn.save_encoder('finetuned')

train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
train_class.show_batch()

learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
learn = learn.load_encoder('finetuned')
    
learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
    
learn.freeze_to(-3)
learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))

#test
test_dl = learn.dls.test_dl(test_df, with_labels=True)
acc = learn.validate(dl = test_dl)[1]
print(acc)

# Getting Plots

In [None]:
ranswap_accs = []
randel_accs = []
randin_accs = []
synrep_accs = []
eda_accs = []
probs = [0.1, 0.2, 0.3, 0.4, 0.5]

for p in probs:
  data = 'imdb'
  size = 's'
  train_df, test_df = get_dataset(data, size)
  swapped = train_df.copy()
  swapped['text'] = swapped['text'].apply(lambda row: random_swap(row, p))
  train_augmented = pd.concat([train_df, swapped]).drop_duplicates()
  train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
  train_lm.show_batch()
  learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()
  learn.fit_one_cycle(1, 1e-2)
  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-3)
  learn.save_encoder('finetuned')
  train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
  train_class.show_batch()
  learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
  learn = learn.load_encoder('finetuned')
  learn.freeze_to(-2)
  learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
  learn.freeze_to(-3)
  learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))
  test_dl = learn.dls.test_dl(test_df, with_labels=True)
  acc = learn.validate(dl = test_dl)[1]
  ranswap_accs.append(acc)

  train_df, test_df = get_dataset(data, size)
  deleted = train_df.copy()
  deleted['text'] = deleted['text'].apply(lambda row: random_deletion(row, p))
  train_augmented = pd.concat([train_df, deleted]).drop_duplicates()
  train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
  train_lm.show_batch()
  learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()
  learn.fit_one_cycle(1, 1e-2)
  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-3)
  learn.save_encoder('finetuned')
  train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
  train_class.show_batch()
  learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
  learn = learn.load_encoder('finetuned')
  learn.freeze_to(-2)
  learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
  learn.freeze_to(-3)
  learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))
  test_dl = learn.dls.test_dl(test_df, with_labels=True)
  acc = learn.validate(dl = test_dl)[1]
  randel_accs.append(acc)

  train_df, test_df = get_dataset(data, size)
  inserted = train_df.copy()
  inserted['text'] = inserted['text'].apply(lambda row: random_insertion(row, p))
  train_augmented = pd.concat([train_df, inserted]).drop_duplicates()
  train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
  train_lm.show_batch()
  learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()
  learn.fit_one_cycle(1, 1e-2)
  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-3)
  learn.save_encoder('finetuned')
  train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
  train_class.show_batch()
  learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
  learn = learn.load_encoder('finetuned')
  learn.freeze_to(-2)
  learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
  learn.freeze_to(-3)
  learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))
  test_dl = learn.dls.test_dl(test_df, with_labels=True)
  acc = learn.validate(dl = test_dl)[1]
  randin_accs.append(acc)

  train_df, test_df = get_dataset(data, size)
  replaced = train_df.copy()
  replaced['text'] = replaced['text'].apply(lambda row: synonym_replacement(row, p))
  train_augmented = pd.concat([train_df, inserted]).drop_duplicates()
  train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
  train_lm.show_batch()
  learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()
  learn.fit_one_cycle(1, 1e-2)
  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-3)
  learn.save_encoder('finetuned')
  train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
  train_class.show_batch()
  learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
  learn = learn.load_encoder('finetuned')
  learn.freeze_to(-2)
  learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
  learn.freeze_to(-3)
  learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))
  test_dl = learn.dls.test_dl(test_df, with_labels=True)
  acc = learn.validate(dl = test_dl)[1]
  synrep_accs.append(acc)

  train_df, test_df = get_dataset(data, size)
  train_copy = train_df.copy()
  augmented = []
  for row in train_copy['text']:
    choice = random.choice(["del", "ins", "swap", "syn"])
    if choice == "del":
      augmented.append(random_deletion(row, p))
    elif choice == "ins":
      augmented.append(random_insertion(row, p))
    elif choice == "swap":
      augmented.append(random_swap(row, p))
    else:
      augmented.append(synonym_replacement(row, p))
  train_copy['text'] = augmented 
  train_augmented = pd.concat([train_df, train_copy]).drop_duplicates()
  train_lm = TextDataLoaders.from_df(train_augmented, text_col='text', is_lm=True)
  train_lm.show_batch()
  learn = language_model_learner(train_lm, AWD_LSTM, metrics=[accuracy, Perplexity()], wd=0.1).to_fp16()
  learn.fit_one_cycle(1, 1e-2)
  learn.unfreeze()
  learn.fit_one_cycle(5, 1e-3)
  learn.save_encoder('finetuned')
  train_class = TextDataLoaders.from_df(train_augmented, text_col='text', label_col='label' ,text_vocab=train_lm.vocab)
  train_class.show_batch()
  learn = text_classifier_learner(train_class, AWD_LSTM, drop_mult=0.5, metrics=accuracy)
  learn = learn.load_encoder('finetuned')
  learn.freeze_to(-2)
  learn.fit_one_cycle(1, slice(1e-2/(2.6**4),1e-2))
  learn.freeze_to(-3)
  learn.fit_one_cycle(1, slice(5e-3/(2.6**4),5e-3))
  test_dl = learn.dls.test_dl(test_df, with_labels=True)
  acc = learn.validate(dl = test_dl)[1]
  eda_accs.append(acc)

In [None]:
import matplotlib.pyplot as plt
plt.plot(probs, randel_accs, label='Random Deletion')
plt.plot(probs,randin_accs, label='Random Insertion')
plt.plot(probs,ranswap_accs, label='Random Swap')
plt.plot(probs,synrep_accs, label='Synonym Replacement')
plt.plot(probs,eda_accs, label='EDA')
plt.xlabel('DA probability p')
plt.ylabel('Accuracy')
plt.title('IMDB S Accuracies for Individual DA with Various Probabilities')
plt.legend()