In [1]:
def read_conll_file(file_path):
    """Reads a CoNLL format file and returns a list of token-label pairs for each sentence."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        sentence = []
        for line in f:
            line = line.strip()
            if not line:
                if sentence:
                    data.append(sentence)
                    sentence = []
            else:
                token, label = line.split('\t')
                sentence.append((token, label))
        if sentence:
            data.append(sentence)
    return data

def convert_json_to_conll(system_output_json):
    """Converts system output from JSON format to CoNLL format."""
    sentence = [(token, label) for label, token in system_output_json.items()]
    return [sentence]

def convert_annotation_to_conll(annotation):
    """Converts annotation from the provided format to CoNLL format."""
    sentences = []
    sentence = []
    for line in annotation.strip().split('\n'):
        if line:
            parts = line.split()
            label = parts[1]
            start, end = int(parts[2]), int(parts[3])
            token = parts[4]
            sentence.append((token, label))
        else:
            if sentence:
                sentences.append(sentence)
                sentence = []
    if sentence:
        sentences.append(sentence)
    return sentences

def compute_metrics(system_output, annotations):
    """Computes precision, recall, F1 score, and accuracy."""
    true_positives = sum(1 for s1, s2 in zip(system_output, annotations) for t1, l1 in s1 for t2, l2 in s2 if t1 == t2 and l1 == l2)
    predicted_entities = sum(len(s) for s in system_output)
    true_entities = sum(len(s) for s in annotations)

    precision = true_positives / predicted_entities if predicted_entities > 0 else 0
    recall = true_positives / true_entities if true_entities > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    accuracy = true_positives / sum(len(s) for s in annotations) if annotations else 0

    return precision, recall, f1_score, accuracy

# Sample system output in JSON format
system_output_json = {
    "task": "statistical sentence generation",
    "dataset": None,
    "method": "packed sets of trees"
}

# Sample annotation in the provided format
annotation_sample = """
T1	Generic 27 35	approach
T2	Task 40 71	statistical sentence generation
T3	OtherScientificTerm 139 144	trees
T4	OtherScientificTerm 151 158	forests
T5	Generic 220 234	representation
T6	OtherScientificTerm 301 322	syntactic information
T7	Generic 325 327	It
T8	Method 361 380	statistical ranking
T9	Generic 398 406	approach
T10	Task 411 433	statistical generation
"""

# Convert the system output JSON to CoNLL format
system_output_conll = convert_json_to_conll(system_output_json)

# Convert annotation to CoNLL format
annotations_conll = convert_annotation_to_conll(annotation_sample)

# Compute metrics
precision, recall, f1_score, accuracy = compute_metrics(system_output_conll, annotations_conll)

# Print the computed metrics
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1_score)
print('Accuracy:', accuracy)


Precision: 0.0
Recall: 0.0
F1 Score: 0
Accuracy: 0.0


In [2]:
system_output_conll

[[('statistical sentence generation', 'task'),
  (None, 'dataset'),
  ('packed sets of trees', 'method')]]

In [3]:
annotations_conll

[[('approach', 'Generic'),
  ('statistical', 'Task'),
  ('trees', 'OtherScientificTerm'),
  ('forests', 'OtherScientificTerm'),
  ('representation', 'Generic'),
  ('syntactic', 'OtherScientificTerm'),
  ('It', 'Generic'),
  ('statistical', 'Method'),
  ('approach', 'Generic'),
  ('statistical', 'Task')]]