In [None]:
from datasets import Dataset
from transformers import (TextClassificationPipeline,
                          RobertaTokenizerFast, RobertaForSequenceClassification)
import pickle
import pandas as pd
from tqdm.auto import tqdm

Initialize the models

In [None]:
general_model_path = ...
discourse_model_path = ...
grammar_model_path = ...
lexical_model_path = ...

In [None]:
general = TextClassificationPipeline(
    model=RobertaForSequenceClassification.from_pretrained(general_model_path),
    tokenizer=RobertaTokenizerFast.from_pretrained(general_model_path),
    top_k=None
)

discourse = TextClassificationPipeline(
    model=RobertaForSequenceClassification.from_pretrained(discourse_model_path),
    tokenizer=RobertaTokenizerFast.from_pretrained(discourse_model_path),
)

grammar = TextClassificationPipeline(
    model=RobertaForSequenceClassification.from_pretrained(grammar_model_path),
    tokenizer=RobertaTokenizerFast.from_pretrained(grammar_model_path),
)

lexical = TextClassificationPipeline(
    model=RobertaForSequenceClassification.from_pretrained(lexical_model_path),
    tokenizer=RobertaTokenizerFast.from_pretrained(lexical_model_path),
)

Fetch data to be predicted

In [None]:
train = pickle.load(open(path/to/train, 'rb'))

data = pd.DataFrame(train, columns=['text', 'label'])
dataset = Dataset.from_pandas(data[['text', 'label']])
dataset = dataset.train_test_split(test_size=0.2, seed=42)
df = dataset['test'].to_pandas()
df

In [None]:
sents, labels = df['text'].tolist(), df['label'].tolist()

In the original Grammar model, there's an issue where the id2label does not match actual values. This dictionary was introduced to adjust the model's predictions to the actual classes; if the model is trained from anew, however, there is no need for the dictionary.

In [None]:
real = {
 'Verb_pattern': 'Verb_pattern',
 'Confusion_of_structures': 'Confusion_of_structures',
 'Comparison_degree': 'Voice',
 'Formational_affixes': 'Comparison_degree',
 'Prepositions': 'Formational_affixes',
 'Category_confusion': 'Prepositions',
 'Agreement_errors': 'Category_confusion',
 'Numerals': 'Agreement_errors',
 'Tense_form': 'Numerals',
 'Voice': 'Relative_clause',
 'Relative_clause': 'Tense_form'
}

Predict general classes; then, for 'uncertain' predictions make 3 predictions and remember 3 scores, for 'certain' predictions just one.

In [None]:
certain_answers = []
uncertain_answers = []

for n, sent in enumerate(tqdm(sents)):
    general_class = general(sent)[0]
    general_label = general_class[0]['label']
    
    # certainty threshold: 0.85
    if general_class[0]['score'] > 0.85:
        if general_label[0] == 'l':
            pred = lexical(sent)[0]
            label, score = pred['label'], pred['score']
        elif general_label[0] == 'd':
            pred = discourse(sent)[0]
            label, score = pred['label'], pred['score']
        else:
            pred = grammar(sent)[0]
            label, score = real[pred['label']], pred['score']
            
        certain_answers.append((sent, labels[n], general_label, general_class[0]['score'], label, score))
        
    else:
        lex = lexical(sent)[0]
        disc = discourse(sent)[0]
        gram = grammar(sent)[0]
        
        general_classes = {i['label']:i['score'] for i in general_class}
        
        uncertain_answers.append((sent, labels[n],
                                general_classes['lexical'],
                                general_classes['discourse'],
                                general_classes['gram'],
                                lex['label'], lex['score'],
                                disc['label'], disc['score'],
                                real[gram['label']], gram['score']))

In [None]:
certain = pd.DataFrame(certain_answers, columns=['error', 'target', 'general', 'general score', 'predicted', 'score'])
uncertain = pd.DataFrame(uncertain_answers, columns=['error', 'target',
                                                    'lexical gen', 'discourse gen', 'grammar gen',
                                                    'lexical pred', 'lexical score',
                                                    'discourse pred', 'discourse score',
                                                    'grammar pred', 'grammar score'])

For uncertain errors, choose the best fit

In [None]:
uncertain['disc likelihood'] = uncertain['discourse gen'] * uncertain['discourse score']
uncertain['grammar likelihood'] = uncertain['grammar gen'] * uncertain['grammar score']
uncertain['lexical likelihood'] = uncertain['lexical gen'] * uncertain['lexical score']

In [None]:
uncertain['candidate class'] = uncertain[['disc likelihood', 'grammar likelihood', 'lexical likelihood']].idxmax(axis=1)
uncertain['candidate prob'] = uncertain[['disc likelihood', 'grammar likelihood', 'lexical likelihood']].max(axis=1)

In [None]:
def candidate(r):
    
    if r['candidate class'][0] == 'd':
        return r['discourse pred']
    elif r['candidate class'][0] == 'g':
        return r['grammar pred']
    else:
        return r['lexical pred']

In [None]:
uncertain['predicted'] = uncertain.apply(candidate, axis=1)

Filter out predictions we're not sure in.

In [None]:
certain_threshold = 0.7
uncertain_threshold = 0.63

In [None]:
uncertain['accepted'] = uncertain.apply(lambda x: x['candidate prob'] > uncertain_threshold, axis=1)

In [None]:
certain['accepted'] = certain.apply(lambda x: x['score'] > certain_threshold, axis=1)

Save predictions.

In [None]:
uncertain.to_excel('uncertain.xlsx')
certain.to_excel('certain.xlsx')

Two files are saved: in both, the column called 'predicted' contains the prediction. The column called 'accepted' contains a Boolean value on whether it passes the respective threshold or now