In [2]:
import os
os.chdir("..")

In [3]:
print(os.path.abspath(os.curdir))

/home/admin-user/swardi/drone-definition


In [40]:
import csv
import pandas as pd
# Define the columns in the CSV file
WORD_COLUMN = 'words'
ACTUAL_LABEL_COLUMN = 'actual_class'
PREDICTED_LABEL_COLUMN = 'predicted_class'


def span_evaluation(filename):
    actual_spans = []
    predicted_spans = []
    model_name = filename.split('/')[1].split('-')[0]
    predicted_df = pd.read_csv(filename)
    for index, row in predicted_df.iterrows():
        # reader = csv.DictReader(file)
        current_actual_span = []
        current_predicted_span = []
        for row in reader:
            word = row[WORD_COLUMN]
            actual_label = row[ACTUAL_LABEL_COLUMN]
            predicted_label = row[PREDICTED_LABEL_COLUMN]
            predicted_label = predicted_label[2:] if predicted_label != 'O' else predicted_label
            # print(f"actual_label: {actual_label}, predicted_label: {predicted_label}")
            # Check if it's the beginning of a new entity span
            if actual_label.startswith('B-'):
                if current_actual_span:
                    actual_spans.append(current_actual_span)
                    predicted_spans.append(current_predicted_span)
                    current_actual_span = []
                    current_predicted_span = []

            # Check if it's the continuation of an entity span or a non-entity token
            if actual_label.startswith(('B-', 'I-')):
                current_actual_span.append((word, actual_label[2:]))
                current_predicted_span.append((word, predicted_label))

        # Append the last span if it exists
        if current_actual_span:
            actual_spans.append(current_actual_span)
            predicted_spans.append(current_predicted_span)

    # Print the constructed entity spans
    # print("Actual Spans:")
    # for span in actual_spans:
    #     print(span)
    # print()
    # print("Predicted Spans:")
    # for span in predicted_spans:
    #     print(span)

    # Calculate TP, FP, and FN
    tp = 0
    fp = 0
    fn = 0

    for actual_span, predicted_span in zip(actual_spans, predicted_spans):
        if actual_span == predicted_span:
            tp += 1
        else:
            # print("Actual: {}, Predicted: {}".format(actual_span, predicted_span))
            fn += 1
            fp += 1

    # Calculate precision, recall, and F1-score for each entity type
    entity_types = set()
    entity_tp = {}
    entity_fp = {}
    entity_fn = {}

    # Compute the TP and FN
    for actual_span, predicted_span in zip(actual_spans, predicted_spans):
        actual_entity = actual_span[0][1]
        # for term, entity_type in actual_span:
        entity_types.add(actual_entity)
        if actual_span == predicted_span:
            entity_tp[actual_entity] = entity_tp.get(actual_entity, 0) + 1
        else:
            entity_fn[actual_entity] = entity_fn.get(actual_entity, 0) + 1

    # Compute the FP
    for actual_span, predicted_span in zip(actual_spans, predicted_spans):
        predicted_entity_types = set()
        predicted_entities = {}
        for term, entity_type in predicted_span:
            # Ignore the 'O' tags
            if entity_type in entity_types:
                predicted_entity_types.add(entity_type)
                predicted_entities[entity_type] = predicted_entities.get(entity_type, 0) + 1
        
        # Remove the actual entity type from the predicted_entities dict (partial match)
        # to prevent the actual entity type being selected in the majority vote
        actual_entity = actual_span[0][1]
        if actual_entity in predicted_entities:
            del predicted_entities[actual_entity]

        # Skip if no predicted entities are valid entity types
        if len(predicted_entities) == 0:
            continue

        # Get the majority vote of the predicted entity types
        predicted_entity = max(predicted_entities, key=predicted_entities.get)
        if predicted_span not in actual_spans:
            # print("Predicted: {}".format(predicted_span))
            # print(f"entity_type: {predicted_entity}")
            entity_fp[predicted_entity] = entity_fp.get(predicted_entity, 0) + 1

    # Calculate micro-average precision, recall, and F1-score
    micro_tp = sum(entity_tp.values())
    micro_fp = sum(entity_fp.values())
    micro_fn = sum(entity_fn.values())
    # micro_tp = tp
    # micro_fp = fp
    # micro_fn = fn

    micro_precision = micro_tp / (micro_tp + micro_fp) if (micro_tp + micro_fp) > 0 else 0
    micro_recall = micro_tp / (micro_tp + micro_fn) if (micro_tp + micro_fn) > 0 else 0
    micro_f1_score = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0
    micro_f1_swardi = micro_tp / (micro_tp + ((micro_fp + micro_fn) / 2))

    # Print evaluation results
    # print(f"TP: {micro_tp}")
    # print(f"FP: {micro_fp}")
    # print(f"FN: {micro_fn}")
    # print()

    print(f"Model: {model_name}")
    print(f"TP: {entity_tp}")
    print(f"FP: {entity_fp}")
    print(f"FN: {entity_fn}")
    for entity_type in entity_types:
        entity_precision = entity_tp.get(entity_type, 0) / (entity_tp.get(entity_type, 0) + entity_fp.get(entity_type, 0)) if (entity_tp.get(entity_type, 0) + entity_fp.get(entity_type, 0)) > 0 else 0
        entity_recall = entity_tp.get(entity_type, 0) / (entity_tp.get(entity_type, 0) + entity_fn.get(entity_type, 0)) if (entity_tp.get(entity_type, 0) + entity_fn.get(entity_type, 0)) > 0 else 0
        entity_f1_score = (2 * entity_precision * entity_recall) / (entity_precision + entity_recall) if (entity_precision + entity_recall) > 0 else 0

        print(f"Entity Type: {entity_type}")
        print(f"Precision: {(entity_precision*100):.3f}")
        print(f"Recall: {(entity_recall*100):.3f}")
        print(f"F1-Score: {(entity_f1_score*100):.3f}")
        print()
    # Investigate the Confusion Matrix
    print(f"Micro-Average Precision: {(micro_precision*100):.3f}")
    print(f"Micro-Average Recall: {(micro_recall*100):.3f}")
    print(f"Micro-Average F1-Score: {(micro_f1_score*100):.3f}")
    print(f"Micro-Average F1-Sward: {(micro_f1_swardi*100):.3f}")
    print()


In [41]:
# Read the CSV file
filenames = ['ner_results/xlnet-base-cased/prediction_xlnet-base-cased.csv',
             'ner_results/bert-base-cased/prediction_bert-base-cased.csv',
             'ner_results/distilbert-base-cased/prediction_distilbert-base-cased.csv',
             'ner_results/distilroberta-base/prediction_distilroberta-base.csv',
             'ner_results/electra-base/prediction_electra-base-discriminator.csv',
             'ner_results/roberta/prediction_roberta-base.csv',
             ]

for filename in filenames:
    span_evaluation(filename)

Model: xlnet
TP: {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 220, 'FUNCTION': 102, 'ISSUE': 253, 'ACTION': 108}
FP: {'ISSUE': 3, 'COMPONENT': 10, 'STATE': 2, 'PARAMETER': 5, 'FUNCTION': 6, 'ACTION': 1}
FN: {'FUNCTION': 9, 'COMPONENT': 5, 'ACTION': 16, 'STATE': 4, 'ISSUE': 9, 'PARAMETER': 3}
Entity Type: FUNCTION
Precision: 94.444
Recall: 91.892
F1-Score: 93.151

Entity Type: COMPONENT
Precision: 95.413
Recall: 97.653
F1-Score: 96.520

Entity Type: PARAMETER
Precision: 97.778
Recall: 98.655
F1-Score: 98.214

Entity Type: ACTION
Precision: 99.083
Recall: 87.097
F1-Score: 92.704

Entity Type: ISSUE
Precision: 98.828
Recall: 96.565
F1-Score: 97.683

Entity Type: STATE
Precision: 96.774
Recall: 93.750
F1-Score: 95.238

Micro-Average Precision: 97.239
Micro-Average Recall: 95.386
Micro-Average F1-Score: 96.304
Micro-Average F1-Sward: 96.304

Model: bert
TP: {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 218, 'FUNCTION': 102, 'ACTION': 108, 'ISSUE': 253}
FP: {'ISSUE': 5, 'FUNCTION': 9, 'COMPON

In [33]:
import pandas as pd

# Experiment results
results = [
    {
        'Model': 'xlnet',
        'TP': {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 220, 'FUNCTION': 102, 'ISSUE': 253, 'ACTION': 108},
        'FP': {'ISSUE': 3, 'COMPONENT': 10, 'STATE': 2, 'PARAMETER': 5, 'FUNCTION': 6, 'ACTION': 1},
        'FN': {'FUNCTION': 9, 'COMPONENT': 5, 'ACTION': 16, 'STATE': 4, 'ISSUE': 9, 'PARAMETER': 3},
        'Precision': 97.239,
        'Recall': 95.386,
        'F1': 96.304
    },
    {
        'Model': 'bert',
        'TP': {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 218, 'FUNCTION': 102, 'ACTION': 108, 'ISSUE': 253},
        'FP': {'ISSUE': 5, 'FUNCTION': 9, 'COMPONENT': 11, 'STATE': 1, 'PARAMETER': 2, 'ACTION': 2},
        'FN': {'FUNCTION': 9, 'ISSUE': 9, 'ACTION': 16, 'STATE': 4, 'PARAMETER': 5, 'COMPONENT': 5},
        'Precision': 96.936,
        'Recall': 95.186,
        'F1': 96.053
    },
    {
        'Model': 'distilbert',
        'TP': {'COMPONENT': 207, 'STATE': 60, 'FUNCTION': 99, 'PARAMETER': 218, 'ISSUE': 253, 'ACTION': 107},
        'FP': {'FUNCTION': 9, 'COMPONENT': 10, 'ACTION': 2, 'STATE': 3, 'ISSUE': 6, 'PARAMETER': 6},
        'FN': {'ISSUE': 9, 'ACTION': 17, 'STATE': 4, 'FUNCTION': 12, 'PARAMETER': 5, 'COMPONENT': 6},
        'Precision': 96.327,
        'Recall': 94.684,
        'F1': 95.498
    },
    {
        'Model': 'distilroberta',
        'TP': {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 217, 'FUNCTION': 100, 'ACTION': 108, 'ISSUE': 252},
        'FP': {'ISSUE': 6, 'COMPONENT': 9, 'FUNCTION': 9, 'STATE': 5, 'ACTION': 1},
        'FN': {'FUNCTION': 11, 'ISSUE': 10, 'ACTION': 16, 'STATE': 4, 'PARAMETER': 6, 'COMPONENT': 5},
        'Precision': 96.923,
        'Recall': 94.784,
        'F1': 95.842
    },
    {
        'Model': 'electra',
        'TP': {'COMPONENT': 173, 'PARAMETER': 180, 'FUNCTION': 94, 'ISSUE': 231, 'STATE': 51, 'ACTION': 98},
        'FP': {'ISSUE': 69, 'PARAMETER': 14, 'COMPONENT': 16, 'FUNCTION': 19, 'ACTION': 14, 'STATE': 3},
        'FN': {'STATE': 13, 'FUNCTION': 17, 'COMPONENT': 40, 'ACTION': 26, 'ISSUE': 31, 'PARAMETER': 43},
        'Precision': 85.967,
        'Recall': 82.949,
        'F1': 84.431
    },
    {
        'Model': 'roberta',
        'TP': {'COMPONENT': 211, 'STATE': 60, 'FUNCTION': 101, 'PARAMETER': 215, 'ISSUE': 252, 'ACTION': 108},
        'FP': {'COMPONENT': 11, 'FUNCTION': 9, 'STATE': 2, 'ISSUE': 2, 'PARAMETER': 1, 'ACTION': 2},
        'FN': {'ACTION': 16, 'STATE': 4, 'FUNCTION': 10, 'ISSUE': 10, 'PARAMETER': 8, 'COMPONENT': 2},
        'Precision': 97.228,
        'Recall': 94.985,
        'F1': 96.093
    }
]

# Recap into pandas DataFrame
recap_data = []
for result in results:
    tp_sum = sum(result['TP'].values())
    fp_sum = sum(result['FP'].values())
    fn_sum = sum(result['FN'].values())
    
    recap_data.append({
        'Model': result['Model'],
        'TP': tp_sum,
        'FP': fp_sum,
        'FN': fn_sum,
        'Precision': result['Precision'],
        'Recall': result['Recall'],
        'F1': result['F1']
    })

recap_df = pd.DataFrame(recap_data)

# Display the recap DataFrame
print(recap_df)

           Model   TP   FP   FN  Precision  Recall      F1
0          xlnet  951   27   46     97.239  95.386  96.304
1           bert  949   30   48     96.936  95.186  96.053
2     distilbert  944   36   53     96.327  94.684  95.498
3  distilroberta  945   30   52     96.923  94.784  95.842
4        electra  827  135  170     85.967  82.949  84.431
5        roberta  947   27   50     97.228  94.985  96.093


In [47]:
import pandas as pd

# Experiment results
results = [
    {
        'Model': 'xlnet',
        'TP': {'COMPONENT': 208, 'STATE': 60, 'PARAMETER': 220, 'FUNCTION': 102, 'ISSUE': 253, 'ACTION': 108},
        'FP': {'ISSUE': 3, 'COMPONENT': 10, 'STATE': 2, 'PARAMETER': 5, 'FUNCTION': 6, 'ACTION': 1},
        'FN': {'FUNCTION': 9, 'COMPONENT': 5, 'ACTION': 16, 'STATE': 4, 'ISSUE': 9, 'PARAMETER': 3},
        'Precision': 97.239,
        'Recall': 95.386,
        'F1-Score': 96.304
    }
]

# Initialize lists to store recap data
recap_data = []
entity_types = ['FUNCTION', 'COMPONENT', 'PARAMETER', 'ACTION', 'ISSUE', 'STATE']

# Iterate over entity types
for entity_type in entity_types:
    # Extract TP, FP, and FN values from the dictionaries
    tp = results[0]['TP'].get(entity_type, 0)
    fp = results[0]['FP'].get(entity_type, 0)
    fn = results[0]['FN'].get(entity_type, 0)

    # Calculate precision, recall, and F1-score
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    # Append recap data for the entity type
    recap_data.append({
        'Entity Type': entity_type,
        'TP': tp,
        'FP': fp,
        'FN': fn,
        'Precision': (precision*100),
        'Recall': recall*100,
        'F1-Score': f1_score*100
    })

# Create the DataFrame
df = pd.DataFrame(recap_data)
for column in df.columns[4:]:
    df[column] = df[column].map('{:.3f}'.format)

# Print the DataFrame
print(df)

  Entity Type   TP  FP  FN Precision  Recall F1-Score
0    FUNCTION  102   6   9    94.444  91.892   93.151
1   COMPONENT  208  10   5    95.413  97.653   96.520
2   PARAMETER  220   5   3    97.778  98.655   98.214
3      ACTION  108   1  16    99.083  87.097   92.704
4       ISSUE  253   3   9    98.828  96.565   97.683
5       STATE   60   2   4    96.774  93.750   95.238
