[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tobhey/NoRBERT/blob/master/Code/Task4_relabeled_Promise_NFR_dataset/Task4_Classify_Functional_and_Quality_aspects.ipynb)

# Binary Classification of Functional Aspects in Requirments Elements of Traceability Link Recovery Datasets

This notebook includes all code needed to train and evaluate binary classifiers for predicting the functional aspects present in a requirement element.

Note: some cells are hidden and only the title is shown. To display the code, double-click the cell to switch the display mode.

## Prepare
Install required libraries and import

In [None]:
#@title Install needed libraries {display-mode: "form"}
!pip install fastai==1.0.61 fastcore==1.3.29 fastprogress==1.0.3 pytorch-transformers==1.2.0 sklearn==0.0 spacy==3.6.1

In [2]:
#@title Import python packages
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import os

from fastai import *
from fastai.text import *
from fastai.callbacks import *
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, precision_recall_fscore_support
from sklearn.utils.multiclass import unique_labels

from pytorch_transformers import BertTokenizer, BertPreTrainedModel, BertModel, BertConfig
from pytorch_transformers import AdamW

from fastprogress import master_bar, progress_bar
from datetime import datetime

In [None]:
#@title Check, if and what kind of GPU is used
def get_memory_usage():
    return torch.cuda.memory_allocated(device)/1000000

def get_memory_usage_str():
    return 'Memory usage: {:.2f} MB'.format(get_memory_usage())

cuda_available = torch.cuda.is_available()
if cuda_available:
    curr_device = torch.cuda.current_device()
    print(torch.cuda.get_device_name(curr_device))
device = torch.device("cuda" if cuda_available else "cpu")
device

### Define configuration used in this experiment run

Create config and set hyperparameters.
One can configure:


*   BERT model to use (model_name)
*   Learning Rate to use (max_lr)
*   Momentum (moms)
*   Epoch number for training (epochs)
*   Badge size for training (bs)
*   Weight decay for training (weight_decay)
*   Maximal sequence length (max_seq_len)
*   Train size used for both test/train and train/validation split (train_size)
*   Loss function used for training (loss_func)
*   The random seed used for shuffling, sampling and splitting (seed)
*   Whether, or not to use early stopping (es)
*   The minimal delta used to indicate early stopping (min_delta)
*   The number of epochs that need to undergo this delta to early stop training (patience)
*   The way of folding used for this experiment (either test/train split (No), ten-fold cross validation (TenFold), or project specific folding (ProjFold))
*   Which kind of sampling to use (either OverSampling minority class, UnderSampling majority class, or NoSampling at all)

*   Which class to predict (clazz)

Further one can configure, where to get the dataset from and where to save log, result and model files.
By setting the classes Array one can decide which binary classifiers to train and evaluate in one experiment run.
One boolean is provided to decide whether to save the model file.



In [4]:
class Config(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for k, v in kwargs.items():
            setattr(self, k, v)
    
    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)

class Fold(Enum):
  No = 1
  TenFold = 2
  ProjFold = 3

class Sampling(Enum):
  NoSampling = 1
  UnderSampling = 2
  OverSampling = 3

config = Config(
    num_labels = 2, # will be set automatically afterwards
    model_name="bert-large-cased", # bert_base_uncased, bert_large_cased, bert_large_uncased
    max_lr=2e-5, # default: 2e-5
    moms=(0.8, 0.7), # default: (0.8, 0.7); alt.(0.95, 0.85)
    epochs=16, # 10, 16, 32, 50
    bs=16, # default: 16
    weight_decay = 0.01,
    max_seq_len=128, # 50, 128
    train_size=0.75, # 0.8
    loss_func=nn.CrossEntropyLoss(),
    seed=904727489, #default: 904727489, 42 (as in Dalpiaz) or None
    es = False, # True
    min_delta = 0.01,
    patience = 3,
    fold = Fold.ProjFold, # Fold.No, Fold.TenFold, Fold.ProjFold
    sampling = Sampling.OverSampling, #Sampling.UnderSampling, Sampling.NoSampling, Sampling.OverSampling
)

clazz = 'F' # 'F', 'Q', 'OnlyF', 'OnlyQ' class to train classification on

config_data = Config(
    root_folder = '.', # where is the root folder? Keep it that way if you want to load from Google Drive
    data_folder = '/', # where is the folder containing the datasets; relative to root
    train_data = ['classification_dataset.csv'],
    label_column = clazz,
    log_folder_name = '/log/',
    log_file = clazz + '_' + Fold(config.fold).name + '_' + Sampling(config.sampling).name + '_classifierPredictions_' + datetime.now().strftime('%Y%m%d-%H%M') + '.txt', # log-file name (make sure log folder exists)
    result_file = clazz + '_' + Fold(config.fold).name + '_' + Sampling(config.sampling).name + '_classifierResults_' + datetime.now().strftime('%Y%m%d-%H%M') + '.txt', # result-file name (make sure log folder exists)
    model_path = '/models/', # where is the folder for the model(s); relative to the root
    model_name = 'NoRBERT.pkl', # what is the model name? 
   
    # Project split to use, either p-fold (as in Dalpiaz) or loPo
    #project_fold = [[3, 9, 11], [1, 5, 12], [6, 10, 13], [1, 8, 14], [3, 12, 15], [2, 5, 11], [6, 9, 14], [7, 8, 13], [2, 4, 15], [4, 7, 10] ], # p-fold
    project_fold = [['eTour'], ['iTrust'], ['SMOS'], ['eAnci'], ['LibEST'] ], # loPo
    classes= ['F', 'Function', 'Behavior', 'Data' , 'UserRelated'], # this array defines which classes are trained and evaluated in one run
   
)

save_model = False # True, if you want to use save the model file (make sure model folder exists)
input_col = 'text'

In [5]:
#@title Prepare data loading:  {display-mode: "form"}
data_path = config_data.train_data[0]
data_file = Path(data_path)


In [None]:
#@title Define logging functions and seed generation {display-mode: "form"}
def initLog(firstRun):
    logfolder = config_data.root_folder + config_data.log_folder_name
   
    if not os.path.isdir(logfolder):
      print("Log folder does not exist, trying to create folder.")
      try:
        os.mkdir(logfolder)
      except OSError:
        print ("Creation of the directory %s failed" % logfolder)
      else:
        print ("Successfully created the directory %s" % logfolder)
    logfile = logfolder + config_data.log_file
    log_txt = datetime.now().strftime('%Y-%m-%d %H:%M') + ' ' + get_info()
    if firstRun:
        with open(logfile, 'w') as log:
            log.write(log_txt + '\n')
    else:
        with open(logfile, 'a') as log:
            log.write(log_txt + '\n')
    

def logLine(line):
    logfile = config_data.root_folder + config_data.log_folder_name  + config_data.log_file
    with open(logfile, 'a') as log:
        log.write(line + '\n')

def logResult(result):
    logfile = config_data.root_folder + config_data.log_folder_name + config_data.result_file
    with open(logfile, 'a') as log:
        log.write(get_info() + '\n')
        log.write(result + '\n')

def get_info():
     model_config = 'model: {}, max_lr: {}, epochs: {}, bs: {}, train_size: {}, weight decay: {},  Seed: {}, Data: {}, Column: {}, EarlyStopping: {}:{};pat:{}'.format(config.model_name, config.max_lr, config.epochs, config.bs, config.train_size, config.weight_decay, config.seed, config_data.train_data, config_data.label_column, config.es, config.min_delta, config.patience)
     return model_config

def set_seed(seed):
    if seed is None:
        seed = random.randint(0, 2**31)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    return seed

set_seed(config.seed)

## Learner


In [7]:
#@title Create proper tokenizer for our data (adapting FastAiTokenizer to use BertTokenizer) {display-mode: "form"}
class FastAiBertTokenizer(BaseTokenizer):
    """Wrapper around BertTokenizer to be compatible with fast.ai"""
    def __init__(self, tokenizer: BertTokenizer, max_seq_len: int=512, **kwargs):
        self._pretrained_tokenizer = tokenizer
        self.max_seq_len = max_seq_len

    def __call__(self, *args, **kwargs):
        return self

    def tokenizer(self, t:str):
        """Limits the maximum sequence length. Prepend with [CLS] and append [SEP]"""
        return ["[CLS]"] + self._pretrained_tokenizer.tokenize(t)[:self.max_seq_len - 2] + ["[SEP]"]



Now, we can create our own databunch using the tokenizer above. Notice we're passing the include_bos=False and include_eos=False options. This is to prevent fastai from adding its own SOS/EOS tokens that will interfere with BERT's SOS/EOS tokens.

We can pass our own list of Preprocessors to the databunch.

In [8]:
#@title Define Processors and Databunch {display-mode: "form"}
class BertTokenizeProcessor(TokenizeProcessor):
    """Special Tokenizer, where we remove sos/eos tokens since we add that ourselves in the tokenizer."""
    def __init__(self, tokenizer):
        super().__init__(tokenizer=tokenizer, include_bos=False, include_eos=False)

class BertNumericalizeProcessor(NumericalizeProcessor):
    """Use a custom vocabulary to match the original BERT model."""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, vocab=Vocab(list(bert_tok.vocab.keys())), **kwargs)

def get_bert_processor(tokenizer:Tokenizer=None, vocab:Vocab=None):
    return [BertTokenizeProcessor(tokenizer=tokenizer),
            NumericalizeProcessor(vocab=vocab)]

class BertDataBunch(TextDataBunch):
    @classmethod
    def from_df(cls, path:PathOrStr, train_df:DataFrame, valid_df:DataFrame, test_df:Optional[DataFrame]=None,
              tokenizer:Tokenizer=None, vocab:Vocab=None, classes:Collection[str]=None, text_cols:IntsOrStrs=1,
              label_cols:IntsOrStrs=0, **kwargs) -> DataBunch:
        "Create a `TextDataBunch` from DataFrames."
        p_kwargs, kwargs = split_kwargs_by_func(kwargs, get_bert_processor)
        # use our custom processors while taking tokenizer and vocab as kwargs
        processor = get_bert_processor(tokenizer=tokenizer, vocab=vocab, **p_kwargs)
        if classes is None and is_listy(label_cols) and len(label_cols) > 1: classes = label_cols
        src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
                      TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
        src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes)
        if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
        return src.databunch(**kwargs)

In [9]:
#@title Define own BertTextClassifier class{display-mode: "form"}
class BertTextClassifier(BertPreTrainedModel):
    def __init__(self, model_name, num_labels):
        config = BertConfig.from_pretrained(model_name)
        super(BertTextClassifier, self).__init__(config)
        self.num_labels = num_labels
        
        self.bert = BertModel.from_pretrained(model_name, config=config)
        
        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.classifier = nn.Linear(self.config.hidden_size, num_labels)

    
    def forward(self, tokens, labels=None, position_ids=None, token_type_ids=None, attention_mask=None, head_mask=None):
        outputs = self.bert(tokens, position_ids=position_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, head_mask=head_mask)
        
        pooled_output = outputs[1]

        dropout_output = self.dropout(pooled_output)
        logits = self.classifier(dropout_output)

        activation = nn.Softmax(dim=1)
        probs = activation(logits)   

        return logits

## Data

Load the dataset

In [10]:
#@title Define functions to load data {display-mode: "form"}
def load_data(filename):
    fpath = config_data.root_folder + config_data.data_folder + filename
    print(fpath)
    df = pd.read_csv(fpath, delimiter=',', header=0, encoding='utf8', names=['ProjectID', 'file', 'ID', 'text', 'functional', 'Function', 'Behavior', 'Data', 'OnlyF', 'F', 'OnlyQ', 'Q', 'UserRelated'], dtype= {'UserRelated':int})
    df = df.dropna()  
    df = df.reset_index()
    return df

def load_all_data(filenames, label_column):
    df = load_data(filenames[0])
    for i in range(1, len(filenames)):
        df = df.append(load_data(filenames[i]))

    # shuffle the dataset a bit and get the amount of classes
    df = df.sample(frac=1, axis=0, random_state = config.seed)
    config.num_labels = df[label_column].nunique()

    print(df.shape)
    print(df[label_column].value_counts())
    return df

In [None]:
#@title Actually load the dataset{display-mode: "form"}
# load the train datasets
df = load_data(config_data.train_data[0])
# shuffle the dataset a bit
df = df.sample(frac=1, axis=0, random_state = config.seed)

print(df.shape)
display(df)

In [None]:
#@title Prepare dataframe to put results in {display-mode: "form"}
df_result = df.copy()
cols = ['functional', 'Function', 'Behavior', 'Data', 'OnlyF', 'F', 'OnlyQ', 'Q', 'UserRelated']
df_result = df_result.assign(**{c:0 for c in cols})
display(df_result)

In [13]:
#@title Create the dictionary that contains the labels along with their indices. This is useful for evaluation and similar. {display-mode: "form"}
def create_label_indices(df):
    #prepare label
    labels = ['not_' + config_data.label_column, config_data.label_column]
  
    #create dict
    labelDict = dict()
    for i in range (0, len(labels)):
        labelDict[i] = labels[i]
    return labelDict

In [14]:
#@title Define functions for under-/oversample dataset {display-mode: "form"}
def undersample(df_trn, major_label, minor_label):
  sample_size = sum(df_trn[config_data.label_column] == minor_label)
  majority_indices = df_trn[df_trn[config_data.label_column] == major_label].index
  random_indices = np.random.choice(majority_indices, sample_size, replace=False)
  sample = df_trn.loc[random_indices]
  sample = sample.append(df_trn[df_trn[config_data.label_column] == minor_label])
  df_trn = sample
  df_trn = df_trn.sample(frac=1, axis=0, random_state = config.seed)
  print(df_trn[config_data.label_column].value_counts())
  return df_trn

def oversample(df_trn, major_label, minor_label):
  minor_size = sum(df_trn[config_data.label_column] == minor_label)
  major_size = sum(df_trn[config_data.label_column] == major_label)
  multiplier = major_size//minor_size
  sample = df_trn
  minority_indices = df_trn[df_trn[config_data.label_column] == minor_label].index
  diff = major_size - (multiplier * minor_size)     
  random_indices = np.random.choice(minority_indices, diff, replace=False)
  sample = pd.concat([df_trn.loc[random_indices], sample], ignore_index=True)
  for i in range(multiplier - 1):
    sample = pd.concat([sample, df_trn[df_trn[config_data.label_column] == minor_label]], ignore_index=True)
  df_trn = sample
  df_trn = df_trn.sample(frac=1, axis=0, random_state = config.seed)
  print(df_trn[config_data.label_column].value_counts())
  return df_trn

In [15]:
#@title Function to split dataframe according to Sampling strategy and train size {display-mode: "form"}
def split_dataframe(df, train_size = 0.8, random_state = None):
    # split data into training and validation set
    df_trn, df_valid = train_test_split(df, stratify = df[config_data.label_column], train_size = train_size, random_state = random_state)
    # apply sample strategy
    sizeOne = sum(df_trn[config_data.label_column] == 1)
    sizeZero = sum(df_trn[config_data.label_column] == 0)
    major_label = 0
    minor_label = 1
    if sizeOne > sizeZero:
      major_label = 1
      minor_label = 0
    if config.sampling == Sampling.UnderSampling:
      df_trn = undersample(df_trn, major_label, minor_label)
    elif config.sampling == Sampling.OverSampling:
      df_trn = oversample(df_trn, major_label, minor_label)
    return df_trn, df_valid

## Predictor


In [16]:
#@title Create a predictor class{display-mode: "form"}
class Predictor:
    def __init__(self, classifier):
        self.classifier = classifier
        self.classes = self.classifier.data.classes

    def predict(self, text):
        prediction = self.classifier.predict(text)
        prediction_class = prediction[1]
        return self.classes[prediction_class]  

## Create and train the learner/classifier


In [17]:
#@title Define functions to create databunch, learner and actual classifier{display-mode: "form"}
def create_databunch(config, df_trn, df_valid):
    bert_tok = BertTokenizer.from_pretrained(config.model_name,)
    fastai_tokenizer = Tokenizer(tok_func=FastAiBertTokenizer(bert_tok, max_seq_len=config.max_seq_len), pre_rules=[], post_rules=[])
    fastai_bert_vocab = Vocab(list(bert_tok.vocab.keys()))
    return BertDataBunch.from_df(".", 
                   train_df=df_trn,
                   valid_df=df_valid,
                   tokenizer=fastai_tokenizer,
                   vocab=fastai_bert_vocab,
                   bs=config.bs,
                   text_cols=input_col,
                   label_cols=config_data.label_column,
                   collate_fn=partial(pad_collate, pad_first=False, pad_idx=0),
              )


def create_learner(config, databunch):
    model = BertTextClassifier(config.model_name, config.num_labels)

    optimizer = partial(AdamW)
    if config.es:
      learner = Learner(
        databunch, model,
        optimizer,
        wd = config.weight_decay,
        metrics=FBeta(beta=1), #accuracy, (metric to optimize on)
        loss_func=config.loss_func, callback_fns=[partial(EarlyStoppingCallback, monitor='f_beta', min_delta=config.min_delta, patience=config.patience)]
      )
    else:
      learner = Learner(
        databunch, model,
        optimizer,
        wd = config.weight_decay,
        metrics=FBeta(beta=1), #accuracy, (metric to optimize on)
        loss_func=config.loss_func,
      )
    
    return learner

# Create the classifier
def create_classifier(config, df):
  df_trn, df_valid = split_dataframe(df, train_size = config.train_size, random_state = config.seed)
  databunch = create_databunch(config, df_trn, df_valid)

  return create_learner(config, databunch)

In [18]:
#@title Define predict loop {display-mode: "form"}
def predict_and_log_result(classifier, df_eval):
  predictor = Predictor(classifier)
  flat_predictions, flat_true_labels = [], []
  column_index = df_eval.columns.get_loc(config_data.label_column)
  for row in progress_bar(df_eval.itertuples(), total=len(df_eval)):
      class_text = row.text
      class_label = row[column_index+1]
      flat_true_labels.append(class_label)
      prediction = predictor.predict(class_text)
      flat_predictions.append(prediction)
      df_result.loc[df_result['index'] == row.index, config_data.label_column] = prediction

      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))
      logLine(log_text)
  
  # get labels in correct order
  target_names = []
  test_labels = unique_labels(flat_true_labels, flat_predictions) 
  test_labels = np.sort(test_labels)
  for x in test_labels:
    target_names.append(label_indices.get(x))

  result = classification_report(flat_true_labels, flat_predictions, target_names=target_names, digits = 5)
  logResult(result)
  print(result)
  return flat_predictions, flat_true_labels

In [19]:
#@title Define train and test loop{display-mode: "form"}
def train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results):
  classifier = create_classifier(config, df_train)
  # Train the classifier on train set
  print(classifier.fit_one_cycle(config.epochs, max_lr=config.max_lr, moms=config.moms, wd=config.weight_decay))
  #Predict on test set
  flat_predictions, flat_true_labels = predict_and_log_result(classifier, df_eval)
  overall_flat_predictions.extend(flat_predictions)
  overall_flat_true_labels.extend(flat_true_labels)
  test_labels = df_eval[config_data.label_column].unique()
  test_labels = np.sort(test_labels)
  results.extend(precision_recall_fscore_support(flat_true_labels, flat_predictions, labels = test_labels))
  return classifier, overall_flat_predictions, overall_flat_true_labels, results

In [20]:
#@title Define function to calculate averaged metric results {display-mode: "form"}
def calcAverageMetrics(results):
  precisions, recalls, fscores = [], [], []
  for i in range(int(len(results)/4)):
    precisions.append(results[i*4])
    recalls.append(results[i*4+1])
    fscores.append(results[i*4+2])
  precision = [0]*len(precisions[0])
  recall = [0]*len(recalls[0])
  fscore = [0]*len(fscores[0])
  for i in range(len(precisions)):
    precision = precision + precisions[i]
    recall = recall + recalls[i]
    fscore = fscore + fscores[i]
  precision = precision / int(len(results)/4)
  recall = recall / int(len(results)/4)
  fscore = fscore / int(len(results)/4)
  return precision, recall, fscore

In [None]:
#@title Decide how to fold and train the classifier {display-mode: "form"}
# run train/eval loop for each in classes defined class subsequently
firstRun = True
for cl in config_data.classes:
    set_seed(config.seed)
    config_data.label_column = cl
    # load the train dataset
    label_indices = create_label_indices(df)
    print(label_indices)

    overall_flat_predictions, overall_flat_true_labels, results = [], [], []
    initLog(firstRun)
    if config.fold == Fold.TenFold:
        skf = StratifiedKFold(n_splits=10)
        fold_number = 1
        for train, test in skf.split(df, df[config_data.label_column]):
            df_train = df.iloc[train]
            df_eval = df.iloc[test]
            log_text = '/////////////////////// Fold: {} of {} /////////////////////////////'.format(fold_number,10)
            logLine(log_text)
            classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)
            fold_number = fold_number + 1
    elif config.fold == Fold.ProjFold:     
        for k in config_data.project_fold:
            train = df.loc[~df['ProjectID'].isin(k)].index
            test = df.loc[df['ProjectID'].isin(k)].index
            if k == "UserRelated":
                k = ["UserRelated", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
                train = df.loc[~df['ProjectID'].isin(k)].index
            df_train = df.loc[train]
            df_eval = df.loc[test]
            log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)
            logLine(log_text)
            classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)
    else:
        df_train, df_eval = train_test_split(df,stratify=df[config_data.label_column], train_size=config.train_size, random_state= config.seed)
        classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)

    target_names = []
    test_labels = df[config_data.label_column].unique()

    test_labels = np.sort(test_labels)
    for x in test_labels:
        target_names.append(label_indices.get(x))

    print('/////////////////////// Aggregated Predictions Result /////////////////////////////')
    resultText = '/////////////////////// Aggregated Predictions Result /////////////////////////////\n'
    result = classification_report(overall_flat_true_labels, overall_flat_predictions, target_names=target_names, digits = 5)
    resultText += result
    print(result)
    print('/////////////////////// Averaged Metrics Result /////////////////////////////')
    resultText += '/////////////////////// Averaged Metrics Result /////////////////////////////\n'
    precision, recall, fscore = calcAverageMetrics(results)
    print("              precision    recall  f1-score")
    resultText += "              precision    recall  f1-score\n"
    for i in range(len(precision)):
        print('{:<14}'.format(target_names[i]) + '  {:.5f}'.format(precision[i]) + '   {:.5f}'.format(recall[i]) + '   {:.5f}'.format(fscore[i]))
        resultText += '{:<14}'.format(target_names[i]) + '  {:.5f}'.format(precision[i]) + '   {:.5f}'.format(recall[i]) + '   {:.5f}'.format(fscore[i]) + '\n'
    logResult(resultText)
    firstRun = False

In [None]:
#@title Display and log overall evaluation results {display-mode: "form"}
target_names = []
test_labels = df_eval[config_data.label_column].unique()

test_labels = np.sort(test_labels)
for x in test_labels:
  target_names.append(label_indices.get(x))

print('/////////////////////// Aggregated Predictions Result /////////////////////////////')
resultText = '/////////////////////// Aggregated Predictions Result /////////////////////////////\n'
result = classification_report(overall_flat_true_labels, overall_flat_predictions, target_names=target_names, digits = 5)
resultText += result
print(result)
print('/////////////////////// Averaged Metrics Result /////////////////////////////')
resultText += '/////////////////////// Averaged Metrics Result /////////////////////////////\n'
precision, recall, fscore = calcAverageMetrics(results)
print("              precision    recall  f1-score")
resultText += "              precision    recall  f1-score\n"
for i in range(len(precision)):
    print('{:<14}'.format(target_names[i]) + '  {:.5f}'.format(precision[i]) + '   {:.5f}'.format(recall[i]) + '   {:.5f}'.format(fscore[i]))
    resultText += '{:<14}'.format(target_names[i]) + '  {:.5f}'.format(precision[i]) + '   {:.5f}'.format(recall[i]) + '   {:.5f}'.format(fscore[i]) + '\n'
logResult(resultText)

# Save Result

In [23]:
#@title Safe result dataframe {display-mode: "form"}
def create_result_csv_name():
    name = 'classification_result_{fold}_{model}_e{epochs}_{sampling}_{clasz}.csv'.format(fold=Fold(config.fold).name,model=config.model_name,epochs=str(config.epochs),sampling=Sampling(config.sampling).name,clasz=clazz)
    return name

df_result.sort_index(inplace=True)
df_result.drop(['index'],axis=1,inplace=True)
df_result.to_csv(config_data.root_folder + config_data.data_folder + create_result_csv_name + ".csv" , index=False)

## Save Model

In [24]:
#@title Save the model along with its config
def create_model_name():
    name = 'NoRBERT_{clasz}_e{epochs}_{sampling}'.format(clasz=clazz, epochs=str(config.epochs),sampling=Sampling(config.sampling).name)
    return name

def save_config(model_save_path, model_name):
    settings = ''
    for item in config.__dict__:
        value = config[item]
        setting = '{item}={value},\n'.format(item=item, value=value)
        settings += setting
    save_path = model_save_path + model_name + '.config'
    with open(save_path, 'w', encoding='utf-8') as out:
        out.write(settings)

if save_model:
  model_name = create_model_name()
  model_save_path = config_data.root_folder + config_data.model_path
  if not os.path.isdir(model_save_path):
    print("Models folder does not exist, trying to create folder.")
    try:
      os.mkdir(model_save_path)
    except OSError:
      print ("Creation of the directory %s failed" % model_save_path)
    else:
      print ("Successfully created the directory %s" % model_save_path)
  save_config(model_save_path, model_name)
  model_save_file = model_save_path + model_name + '.pkl'
  classifier.export(file = model_save_file)