# Training Sequence Classifier Head

In [None]:
from adaptnlp import EasyDocumentEmbeddings, SequenceClassifierTrainer
from flair.datasets import TREC_6

### 1. Initialize corpus, output directory for the model, and document embeddings

In [None]:
corpus = TREC_6() # Or path to directory of train.csv, test.csv, dev.csv files at "Path/to/data/directory" 
OUTPUT_DIR = "Path/to/model/output/directory" 
doc_embeddings = EasyDocumentEmbeddings("bert-base-cased", methods = ["rnn"])

### 2. Initialize Sequence Classification Trainer

In [None]:
sc_configs = {
              "corpus": corpus,
              "encoder": doc_embeddings,
              "column_name_map": {0: "text", 1: "label"},
              "corpus_in_memory": True,
              "predictive_head": "flair",
             }
sc_trainer = SequenceClassifierTrainer(**sc_configs)

### 3. Find Learning Rate with automated LR finder

In [None]:
sc_lr_configs = {
        "output_dir": OUTPUT_DIR,
        "file_name": "learning_rate.tsv",
        "start_learning_rate": 1e-8,
        "end_learning_rate": 10,
        "iterations": 100,
        "mini_batch_size": 32,
        "stop_early": True,
        "smoothing_factor": 0.8,
        "plot_learning_rate": True,
}
learning_rate = sc_trainer.find_learning_rate(**sc_lr_configs)

### 4. Train Sequence Classifier

In [None]:
sc_train_configs = {
        "output_dir": OUTPUT_DIR,
        "learning_rate": learning_rate,
        "mini_batch_size": 32,
        "anneal_factor": 0.5,
        "patience": 5,
        "max_epochs": 150,
        "plot_weights": False,
        "batch_growth_annealing": False,
}
sc_trainer.train(**sc_train_configs)

### 5. Load and Predict

In [None]:
from adaptnlp import EasySequenceClassifier
# Set example text and instantiate tagger instance
example_text = '''Where was the Queen's wedding held? '''

classifier = EasySequenceClassifier()

sentences = classifier.tag_text(example_text, model_name_or_path="../5. Finetuning and Training (Advanced)/resources/taggers/bert-base-cased-control-trec_6-finetuned/final-model.pt")
print("Label output:\n")
for sentence in sentences:
    print(sentence.labels)

# Training Sequence Classifier on Finetuned Model

In [None]:
corpus = TREC_6() # Or path to directory of train.csv, test.csv, dev.csv files at "Path/to/data/directory" 
OUTPUT_DIR = "Path/to/model/output/directory"
FINETUNED_MODEL_DIR = "Path/to/finetuned/model/directory"
doc_embeddings = EasyDocumentEmbeddings(FINETUNED_MODEL_DIR, methods = ["rnn"])

In [None]:
sc_configs = {
              "corpus": corpus,
              "encoder": doc_embeddings,
              "column_name_map": {0: "text", 1: "label"},
              "corpus_in_memory": True,
              "predictive_head": "flair",
             }
sc_trainer = SequenceClassifierTrainer(**sc_configs)

In [None]:
sc_lr_configs = {
        "output_dir": OUTPUT_DIR,
        "start_learning_rate": 1e-8,
        "end_learning_rate": 10,
        "iterations": 100,
        "mini_batch_size": 32,
        "stop_early": True,
        "smoothing_factor": 0.8,
        "plot_learning_rate": True,
}
learning_rate = sc_trainer.find_learning_rate(**sc_lr_configs)

In [None]:
sc_train_configs = {
        "output_dir": OUTPUT_DIR,
        "learning_rate": learning_rate,
        "mini_batch_size": 32,
        "anneal_factor": 0.5,
        "patience": 5,
        "max_epochs": 150,
        "plot_weights": False,
        "batch_growth_annealing": False,
}
sc_trainer.train(**sc_train_configs)