In [1]:
import sys
# print(sys.executable)

In [2]:
import os
from pathlib import Path
import pickle
import json
import re
import pandas as pd
import numpy as np
from gensim.models import KeyedVectors
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from keras import layers
from keras.models import model_from_json
from tensorflow.keras.layers import Embedding, Dense, TimeDistributed, SpatialDropout1D, LSTM, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping
from seqeval.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score, recall_score, f1_score
import matplotlib.pyplot as plt


2023-09-07 17:46:52.708951: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


#### Check device placement: GPU / CPU

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if not tf.config.experimental.list_physical_devices('GPU'):
    raise SystemError("GPU device not found!")
    
    
print(tf.config.list_physical_devices('GPU'), '\n')

Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')] 



2023-09-07 17:46:54.225972: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-07 17:46:54.242326: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-07 17:46:54.242461: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


# Load NER dataset

In [4]:
import os
from pathlib import Path
import pandas as pd


In [5]:
data_path= Path("/data/")
display(os.listdir(data_path ))

['ner-data.csv',
 'unique_tokens.pkl',
 'raw-data.csv',
 'README.md',
 'Duplex_A_20110907.ifc',
 'ifcner-paper-data.csv',
 'word_index_cbow_d100_w10.pkl',
 'ifcner-paper-data-revised.csv',
 'embeddings']

In [6]:
df=pd.read_csv(data_path / 'ifcner-paper-data-revised.csv')
df

Unnamed: 0,token,tag
0,highlight,O
1,all,B_quantity
2,slab,B_built_obj
3,highlight all slab,sentence
4,pick,O
...,...,...
23631,modeled,O
23632,within,O
23633,room,B_loc_space
23634,100,B_name


In [7]:
df['tag'].value_counts()

O              12930
sentence        2380
B_mep_obj       1763
B_quantity      1511
I_mep_obj       1015
B_loc_space      880
B_built_obj      810
B_name           700
B_loc_level      625
B_ordinal        375
B_number         250
I_built_obj      235
I_loc_space      101
I_ordinal         61
Name: tag, dtype: int64

In [8]:
df_tokens = df.loc[(df['tag'])!='sentence']
df_tokens


Unnamed: 0,token,tag
0,highlight,O
1,all,B_quantity
2,slab,B_built_obj
4,pick,O
5,each,B_quantity
...,...,...
23630,",",O
23631,modeled,O
23632,within,O
23633,room,B_loc_space


In [9]:
unique_tokens = []

for token in df_tokens['token']:
    if token not in unique_tokens:
        unique_tokens.append(token)

len(unique_tokens)

698

## Prepare data for 10-fold cross-val 

In [10]:
df = df.reset_index(drop=True)

sentence_idx=df[df['tag'] == 'sentence' ].index
data_sentences=[df['token'][idx] for idx in sentence_idx]

data_tags=[]
for i in range(len(sentence_idx)):
    if i==0:
        data_tags.append(df['tag'][0:sentence_idx[i]].to_list())
    else:
        data_tags.append(df['tag'][sentence_idx[i-1]+1 :sentence_idx[i]].to_list())

        
        
if len(data_sentences)==len(data_tags):
    print('### %d tagged data_sentences found ###\n' % (len(data_tags)))


for s,t in zip(data_sentences[:10], data_tags[:10]) :
    print(s,'\n', t, '\n')

### 2380 tagged data_sentences found ###

highlight all slab 
 ['O', 'B_quantity', 'B_built_obj'] 

pick each plates items 
 ['O', 'B_quantity', 'B_built_obj', 'O'] 

find 61 railings 
 ['O', 'B_quantity', 'B_built_obj'] 

show me 100 doorway objects 
 ['O', 'O', 'B_quantity', 'B_built_obj', 'O'] 

for 61 staircases 
 ['O', 'B_quantity', 'B_built_obj'] 

for 8 footings objects 
 ['O', 'B_quantity', 'B_built_obj', 'O'] 

find me column 
 ['O', 'O', 'B_built_obj'] 

choose staircases objects 
 ['O', 'B_built_obj', 'O'] 

show two beam 
 ['O', 'B_quantity', 'B_built_obj'] 

find 5 beams instances 
 ['O', 'B_quantity', 'B_built_obj', 'O'] 



In [11]:
data = pd.DataFrame({'sentence': data_sentences, 'tags':data_tags})

data['word_labels'] = data['tags'].transform(lambda x: ','.join(x))
data = data.drop(columns=['tags'])
data

Unnamed: 0,sentence,word_labels
0,highlight all slab,"O,B_quantity,B_built_obj"
1,pick each plates items,"O,B_quantity,B_built_obj,O"
2,find 61 railings,"O,B_quantity,B_built_obj"
3,show me 100 doorway objects,"O,O,B_quantity,B_built_obj,O"
4,for 61 staircases,"O,B_quantity,B_built_obj"
...,...,...
2375,contained inside level four give me the number...,"O,O,B_loc_level,B_number,O,O,O,O,O,B_mep_obj,I..."
2376,contained inside floor ten what is the number ...,"O,O,B_loc_level,B_number,O,O,O,O,O,O,O,B_mep_o..."
2377,"contained in level 1 , give me the total numbe...","O,O,B_loc_level,B_number,O,O,O,O,O,O,O,O,O,B_m..."
2378,at story 8 find me the quantity of stack termi...,"O,B_loc_level,B_number,O,O,O,O,O,B_mep_obj,I_m..."


In [12]:
tags_labels=[tag for tag in list(df['tag'].unique()) if type(tag)==str and tag!='sentence']
tag_index={tag:idx for idx,tag in enumerate(tags_labels)}
tag_dic = {v:k for k,v in tag_index.items()}
print(tag_dic)

## save tag dictionary
# f = open("tag_dic.pkl", "wb")
# pickle.dump(tag_dic, f)
# f.close()

# with open("tag_dic.json", "w") as tag_file:
#     json.dump(tag_dic, tag_file, indent = 4)

{0: 'O', 1: 'B_quantity', 2: 'B_built_obj', 3: 'I_built_obj', 4: 'B_mep_obj', 5: 'I_mep_obj', 6: 'B_ordinal', 7: 'B_loc_level', 8: 'I_ordinal', 9: 'B_number', 10: 'B_loc_space', 11: 'I_loc_space', 12: 'B_name'}


### Embeddings utils:  *load/create word_index & embedding_matrix*

In [13]:
embeddings_path = Path(str(data_path) + '/embeddings')

display(os.listdir(embeddings_path))

['glove_6B_100d_w2v.txt',
 'archive',
 'cbow_d100_w10.bin',
 'glove_42B_300d_w2v.txt',
 'skip_d300_w10.bin',
 'glove_6B_300d_w2v.txt',
 'cbow_d300_w10_neg10.bin']

In [14]:
## transform Glove to word2vec format for Gensim compatibility

from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec

glove_file = datapath(embeddings_path / 'glove.42B.300d.txt')
tmp_file = get_tmpfile(embeddings_path / "glove.42B.300d-w2v.txt")
_ = glove2word2vec(glove_file, tmp_file)
embeddings_glove_42B_300d = KeyedVectors.load_word2vec_format(tmp_file)


glove_file = datapath(embeddings_path / 'glove.6B.300d.txt')
tmp_file = get_tmpfile(embeddings_path / "glove-6B-300d-w2v.txt")
_ = glove2word2vec(glove_file, tmp_file)
embeddings_glove_42B_300d = KeyedVectors.load_word2vec_format(tmp_file)


glove_file = datapath(embeddings_path / 'glove.6B.100d.txt')
tmp_file = get_tmpfile(embeddings_path / "glove-6B-100d-w2v.txt")
_ = glove2word2vec(glove_file, tmp_file)
embeddings_glove_42B_300d = KeyedVectors.load_word2vec_format(tmp_file)


# CBOW_d100 experiments

In [13]:
cbow_d100 = KeyedVectors.load_word2vec_format(
    embeddings_path / 'cbow_d100_w10.bin',
    binary=True,
    unicode_errors='ignore'
)

print("cbow_d100: {:,}".format(len(cbow_d100.key_to_index)))

cbow_d100: 1,497,376


In [17]:
## create word_index

word_index_cbow_d100_50k={'':0, '[UNK]':1}
for i,(k,v) in enumerate(cbow_d100.key_to_index.items()):
    if i < 10000:
        word_index_cbow_d100_50k[k]=i+2  

        

## add unique tokens and list missing tokens

cbow_d100_50k_misses=[]

for i,token in enumerate(unique_tokens):
    if token.lower() not in list(word_index_cbow_d100_50k.keys()):
        if token.lower() in list(cbow_d100.key_to_index.keys()): 
            word_index_cbow_d100_50k[token.lower()]=i+len(word_index_cbow_d100_50k) 
        else:
            cbow_d100_50k_misses.append(token)
        
df[df['token'].isin(cbow_d100_50k_misses)]['tag'].value_counts()

B_name         700
B_quantity     638
B_ordinal      200
B_number       131
B_mep_obj       15
B_loc_space      8
Name: tag, dtype: int64

In [18]:
num_tokens =len(word_index_cbow_d100_50k)
embedding_dim =cbow_d100.vector_size
not_found = []


embedding_matrix_cbow_d100 = np.zeros((num_tokens, embedding_dim))


for word, i in word_index_cbow_d100_50k.items(): 
    if word.lower() in cbow_d100.index_to_key and i<num_tokens:       # important: convert word to lower-case
        embedding_matrix_cbow_d100[i]= cbow_d100[word.lower()]
           
    if word.lower() not in cbow_d100.index_to_key:   
        embedding_matrix_cbow_d100[i]= np.zeros((embedding_dim))
        not_found.append(word.lower())

        
print(' embedding_matrix_cbow_d100 shape:  ', embedding_matrix_cbow_d100.shape)

 embedding_matrix_cbow_d100 shape:   (10107, 100)


In [22]:
def sentence_encoder(sentence=str, vocabs=[]):
    encoded_sentence=[]
    for token in sentence.split():
        if token.lower() in vocabs:
            encoded_sentence.append(word_index_cbow_d100_50k[token.lower()])
        else:
            encoded_sentence.append(1)
    return encoded_sentence


vocabs=list(word_index_cbow_d100_50k.keys())



X=[]

for s in data_sentences:
    X.append(sentence_encoder(s, vocabs))

    
X=pad_sequences(maxlen=150,padding='post',sequences=X)


y=[[tag_index[tag] for tag in item] for item in data_tags]
y=pad_sequences(maxlen=150,padding='post',sequences=y)
y=to_categorical(y)


print('data ==> ', 'X shape: ', X.shape, 'Y shape: ',y.shape)



data ==>  X shape:  (2380, 150) Y shape:  (2380, 150, 13)


In [23]:
data_sentences[0]

'highlight all slab'

In [24]:
word_index_cbow_d100_50k['slab']

10004

In [25]:
X[0]

array([ 7671,    63, 10004,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,     0,     0,     0,     0,     0,     0,     0,
           0,     0,

In [26]:
y[0].shape

(150, 13)

In [27]:
num_tokens =len(word_index_cbow_d100_50k)
n_tags=len(tag_index)
embedding_dim=embedding_matrix_cbow_d100.shape[1]


embedding_layer = Embedding(
    num_tokens,
    embedding_dim,
    embeddings_initializer=keras.initializers.Constant(embedding_matrix_cbow_d100),
    trainable=False,
)


int_sequences_input = keras.Input(shape=(None,), dtype="int64")
embedded_sequences = embedding_layer(int_sequences_input)
x = SpatialDropout1D(0.1)(embedded_sequences)
x = Bidirectional(LSTM(units=150,return_sequences=True, recurrent_dropout=0.1))(x)
preds = TimeDistributed(Dense(n_tags, activation="softmax"))(x)



2023-09-06 19:57:52.688821: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-06 19:57:52.704932: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2023-09-06 19:57:52.704954: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-09-06 19:57:52.705615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neur

### Train NER model

In [30]:
data

Unnamed: 0,sentence,word_labels
0,highlight all slab,"O,B_quantity,B_built_obj"
1,pick each plates items,"O,B_quantity,B_built_obj,O"
2,find 61 railings,"O,B_quantity,B_built_obj"
3,show me 100 doorway objects,"O,O,B_quantity,B_built_obj,O"
4,for 61 staircases,"O,B_quantity,B_built_obj"
...,...,...
2375,contained inside level four give me the number...,"O,O,B_loc_level,B_number,O,O,O,O,O,B_mep_obj,I..."
2376,contained inside floor ten what is the number ...,"O,O,B_loc_level,B_number,O,O,O,O,O,O,O,B_mep_o..."
2377,"contained in level 1 , give me the total numbe...","O,O,B_loc_level,B_number,O,O,O,O,O,O,O,O,O,B_m..."
2378,at story 8 find me the quantity of stack termi...,"O,B_loc_level,B_number,O,O,O,O,O,B_mep_obj,I_m..."


In [34]:
not_test_data_idx = not_test_data.index
len(not_test_data_idx)

2142

In [42]:

seeds = [0, 42, 123]
n_splits = 10

for seed in seeds:

    kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
    
    for i, (not_test_idx, test_idx) in enumerate(kf.split(data)):
        
        val_size = len(test_data)
        val_idx = not_test_idx[:val_size]
        train_idx = not_test_idx[val_size:]
        
        print("\n\n"+"*"*10, f'\tCROSS-VAL iteration with fold {i+1}/{n_splits} (seed {seed})\t', "*"*10+"\n")
#         print("Train size:", len(train_idx))
#         print("Validation size:", len(val_idx))
#         print("Test size:", len(test_idx))
        
        
        X_train, X_val, X_test = X[train_idx], X[val_idx], X[test_idx]
        y_train, y_val, y_test = y[train_idx], y[val_idx], y[test_idx]
 


        print('train data shape ==> ', 'X_train: ', X_train.shape, 'y_train: ',y_train.shape)
        print('validation data shape==> ', 'X_val: ', X_val.shape, 'Y_val: ',y_val.shape)
        print('test data shape==> ', 'X_test: ', X_test.shape, 'Y_test: ',y_test.shape)

        




********** 	CROSS-VAL iteration with fold 1/10 (seed 0)	 **********

train data shape ==>  X_train:  (1904, 150) y_train:  (1904, 150, 13)
validation data shape==>  X_val:  (238, 150) Y_val:  (238, 150, 13)
test data shape==>  X_test:  (238, 150) Y_test:  (238, 150, 13)


********** 	CROSS-VAL iteration with fold 2/10 (seed 0)	 **********

train data shape ==>  X_train:  (1904, 150) y_train:  (1904, 150, 13)
validation data shape==>  X_val:  (238, 150) Y_val:  (238, 150, 13)
test data shape==>  X_test:  (238, 150) Y_test:  (238, 150, 13)


********** 	CROSS-VAL iteration with fold 3/10 (seed 0)	 **********

train data shape ==>  X_train:  (1904, 150) y_train:  (1904, 150, 13)
validation data shape==>  X_val:  (238, 150) Y_val:  (238, 150, 13)
test data shape==>  X_test:  (238, 150) Y_test:  (238, 150, 13)


********** 	CROSS-VAL iteration with fold 4/10 (seed 0)	 **********

train data shape ==>  X_train:  (1904, 150) y_train:  (1904, 150, 13)
validation data shape==>  X_val:  (238, 

In [24]:
%%time



# Set up early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=0,
    mode="auto",
    verbose=1,           
    restore_best_weights=True  
)



seeds = [0, 42, 123]
n_splits = 10



eval_reports, model_histories, trained_epochs = [], [], []





for seed in seeds:

    kf = KFold(n_splits=n_splits, shuffle=True, random_state=seed)
    
    for i, (not_test_idx, test_idx) in enumerate(kf.split(data)):
        
        val_size = len(test_data)
        val_idx = not_test_idx[:val_size]
        train_idx = not_test_idx[val_size:]
        
        print("\n\n"+"*"*10, f'\tCROSS-VAL iteration with fold {i+1}/{n_splits} (seed {seed})\t', "*"*10+"\n")

        
        X_train, X_val, X_test = X[train_idx], X[val_idx], X[test_idx]
        y_train, y_val, y_test = y[train_idx], y[val_idx], y[test_idx]


        print('train data shape ==> ', 'X_train: ', X_train.shape, 'y_train: ',y_train.shape)
        print('validation data shape==> ', 'X_val: ', X_val.shape, 'Y_val: ',y_val.shape)
        print('test data shape==> ', 'X_test: ', X_test.shape, 'Y_test: ',y_test.shape)




        # re-initialize model for new folds
        model = keras.Model(int_sequences_input, preds)
        model.summary()
        model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
     
    
        # train model
        history = model.fit(
            X_train_fold, y_train_fold,
            validation_data=(X_valid_fold, y_valid_fold),
            epochs=100,
            batch_size=4,
            callbacks=[early_stopping],
            verbose=1
        )
        
        
        model_histories.append(history)
        trained_epochs.append(early_stopping.stopped_epoch)
        
        
        
        # Predict classes (instead of probabilities)
        y_pred = np.argmax(model.predict(X_test), axis=-1)
        
        
        # Assing true and predicted labels 
        true_labels, pred_labels = [], []

        for item in y_test:
            tag_indices = np.argmax(item, axis=-1)           
            true_labels += [tag_dic[index] for index in tag_indices]

        for item in y_pred:
            pred_labels += [tag_dic[index] for index in item]

        
        
        # Compute metrics for the current fold and seed
        eval_reports.append(classification_report([true_labels], [pred_labels],  digits=4))  
    

Starting iteration with seed 0 and fold 1
Model: "model_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         1010700   
                                                                 
 spatial_dropout1d (SpatialD  (None, None, 100)        0         
 ropout1D)                                                       
                                                                 
 bidirectional (Bidirectiona  (None, None, 300)        301200    
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                         
                

                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                         
                                                                 
Total params: 1,315,813
Trainable params: 305,113
Non-trainable params: 1,010,700
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 2: early stopping
Starting iteration with seed 0 and fold 6
Model: "model_36"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         1010700   
                                                                 
 spatial_dropout1d (SpatialD  (None, None, 100)        0         
 ropout1D)        

Epoch 2: early stopping
Starting iteration with seed 0 and fold 10
Model: "model_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         1010700   
                                                                 
 spatial_dropout1d (SpatialD  (None, None, 100)        0         
 ropout1D)                                                       
                                                                 
 bidirectional (Bidirectiona  (None, None, 300)        301200    
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                 

 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                         
                                                                 
Total params: 1,315,813
Trainable params: 305,113
Non-trainable params: 1,010,700
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 2: early stopping
Starting iteration with seed 42 and fold 5
Model: "model_45"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         1010700   
                                                                 
 spatial_dropout1

Epoch 2: early stopping
Starting iteration with seed 42 and fold 9
Model: "model_49"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         1010700   
                                                                 
 spatial_dropout1d (SpatialD  (None, None, 100)        0         
 ropout1D)                                                       
                                                                 
 bidirectional (Bidirectiona  (None, None, 300)        301200    
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                 

 ropout1D)                                                       
                                                                 
 bidirectional (Bidirectiona  (None, None, 300)        301200    
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, None, 13)         3913      
 ibuted)                                                         
                                                                 
Total params: 1,315,813
Trainable params: 305,113
Non-trainable params: 1,010,700
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 2: early stopping
Starting iteration with seed 123 and fold 4
Model: "model_54"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                

In [25]:
def extract_weighted_avg(eval_reports=[]):
    precision_scores = []
    recall_scores = []
    f1_scores = []

    
    pattern = r'weighted avg\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)'
    
    
    for report in eval_reports:
        match = re.search(pattern, report)
        
        if match:           
            precision_scores.append(float(match.group(1)))
            recall_scores.append(float(match.group(2)))
            f1_scores.append(float(match.group(3)))
        else:
            print('Erro encountered when parsing evaluation report str')
            
        scores= {'precision':precision_scores, 'recall':recall_scores, 'f1':f1_scores}
 
        
    return scores



scores = extract_weighted_avg(eval_reports)

precision_avg = np.mean(scores['precision'])
precision_std = np.std(scores['precision'])

recall_avg = np.mean(scores['recall'])
recall_std = np.std(scores['recall'])

f1_avg = np.mean(scores['f1'])
f1_std = np.std(scores['f1'])


print(f"Precision: {precision_avg:.4f} ± {precision_std:.4f}")
print(f"Recall: {recall_avg:.4f} ± {recall_std:.4f}")
print(f"F1 Score: {f1_avg:.4f} ± {f1_std:.4f}")

Precision: 0.9814 ± 0.0047
Recall: 0.9813 ± 0.0057
F1 Score: 0.9811 ± 0.0050
