In [1]:
import os
import pandas as pd

# Summary of all trained models

In [2]:
path_working_dir = "/content/drive/MyDrive/NLP/innoscripta"
path_models_dir = os.path.join(path_working_dir, "models")
print("{}:\n{}".format(path_models_dir, os.listdir(path_models_dir)))

/content/drive/MyDrive/NLP/innoscripta/models:
['deepset-gbert-base', 'deepset-gbert-base-epochs2', 'distilbert-base-german-cased', 'distilbert-base-german-cased-epochs3', 'deepset-gbert-base-epochs3', 'deepset-gbert-base-test1']


In [3]:
def print_file(file_path):
    with open(file_path, 'r') as f:
        print(f.read())

In [4]:
for model_dir in os.listdir(path_models_dir):
    print("../" + model_dir)
    model_path = os.path.join(path_models_dir, model_dir)
    for file in os.listdir(model_path):
        if file.endswith("eval_results.txt"):
            results = os.path.join(model_path, file)
            print_file(results)

../deepset-gbert-base
eval_loss = 0.21460278927521548
f1_score = 0.8010357500835282
precision = 0.7567866161616161
recall = 0.8507806955287438

../deepset-gbert-base-epochs2
eval_loss = 0.1716558334064386
f1_score = 0.8548085106382978
precision = 0.8130160271976687
recall = 0.9011304503857887

../distilbert-base-german-cased
eval_loss = 0.29372823588197877
f1_score = 0.6966057441253264
precision = 0.6860963483627636
recall = 0.7074421071239173

../distilbert-base-german-cased-epochs3
eval_loss = 0.29535324154889797
f1_score = 0.7427078064407351
precision = 0.7099113618049959
recall = 0.7786812798302988

../deepset-gbert-base-epochs3
eval_loss = 0.24315613271401348
f1_score = 0.8371616428998377
precision = 0.7986314760508308
recall = 0.879598062085053

../deepset-gbert-base-test1
eval_loss = 0.40298482527212703
f1_score = 0.8401639344262295
precision = 0.795601552393273
recall = 0.8900144717800289



# Load and run inference on the model

In [None]:
!pip install simpletransformers
from simpletransformers.ner import NERModel, NERArgs
import warnings
warnings.filterwarnings('ignore')

In [None]:
model_dir = os.path.join(path_models_dir, "deepset-gbert-base-epochs3")
model = NERModel('bert', model_name = model_dir)

In [None]:
test_data_name = "test_df.pkl"
path_data_dir = os.path.join(path_working_dir, "data")
df_test = pd.read_pickle(os.path.join(path_data_dir, test_data_name))
df_test.shape

(31422, 3)

In [None]:
result, model_outputs, preds_list = model.eval_model(df_test)
result

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=6265.0), HTML(value='')))




HBox(children=(HTML(value='Running Evaluation'), FloatProgress(value=0.0, max=784.0), HTML(value='')))




{'eval_loss': 0.24315613271401348,
 'f1_score': 0.8371616428998377,
 'precision': 0.7986314760508308,
 'recall': 0.879598062085053}

In [None]:
my_text = "Gebäudevermessung von Mehrfamilienhäusern Einkaufszentren Außenanlagen und bereiten ihn auf 3D Scan"
prediction, model_output = model.predict([my_text])
prediction

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(HTML(value='Running Prediction'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




[[{'Gebäudevermessung': 'B-KEY'},
  {'von': 'O'},
  {'Mehrfamilienhäusern': 'O'},
  {'Einkaufszentren': 'O'},
  {'Außenanlagen': 'O'},
  {'und': 'O'},
  {'bereiten': 'O'},
  {'ihn': 'O'},
  {'auf': 'O'},
  {'3D': 'B-KEY'},
  {'Scan': 'I-KEY'}]]