In [9]:
import csv
import sys
import os
import pandas as pd
from tabulate import tabulate
from sklearn.metrics import classification_report, confusion_matrix

In [12]:
def read_file(file):
    df = pd.read_csv(file, sep=',' , encoding = "utf-8",
                     keep_default_na=False, quotechar='|', skip_blank_lines=False)
    predictions = df['predict']
    gold_labels = df['gold']
    
    return gold_labels, predictions
    
def generate_confusion_matrix(predictions, gold_labels):
    """Generate a confusion matrix."""
    
    labels = sorted(set(gold_labels))
    cf_matrix = confusion_matrix(gold_labels, predictions, labels=labels)
    # transform confusion matrix into a dataframe
    df_cf_matrix = pd.DataFrame(cf_matrix, index=labels, columns=labels)

    return df_cf_matrix

def calculate_precision_recall_f1_score(predictions, gold_labels, digits=3):
    """Calculate evaluation metrics."""

    # get the report in dictionary form
    report = classification_report(gold_labels, predictions, zero_division=0, output_dict=True)
    # remove unwanted metrics
    report.pop('accuracy')
    report.pop('weighted avg')
    # transform dictionary into a dataframe and round the results
    df_report = pd.DataFrame(report).transpose()
    df_report = df_report.round(digits)
    df_report['support'] = df_report['support'].astype(int)

    return df_report 

def evaluate_classifier(file, name):
    """Produce full evaluation of classifier."""
    gold_labels, predictions = read_file(file)
    print(f"Evaluating {name.replace('_', ' ')}: ")
    print()

    cf_matrix = generate_confusion_matrix(predictions, gold_labels)
    report = calculate_precision_recall_f1_score(predictions, gold_labels)

    print(cf_matrix)
    print()
    # print(cf_matrix.to_latex())  # print and paste to Overleaf

    print(report)
    print()
    # print(report.to_latex())  # print and paste to Overleaf

selected_files = ['arg_classification.csv', 'pred_identification.csv','arg_identification.csv' ]
for file in selected_files:
    name = file.replace('.csv', '')
    evaluate_classifier(file, name)

Evaluating arg classification: 

            ARG0  ARG1  ARG1-DSP  ARG2  ARG3  ARG4  ARG5  ARGA  ARGM-ADJ  \
ARG0         116    72         0     3     0     0     0     0         0   
ARG1          58   227         0    14     0     0     0     0         0   
ARG1-DSP       0     0         0     0     0     0     0     0         0   
ARG2           9    25         0    20     1     0     0     0         0   
ARG3           0     1         0     2     1     0     0     0         0   
ARG4           0     0         0     1     0     0     0     0         0   
ARG5           0     0         0     0     0     0     0     0         0   
ARGA           0     0         0     0     0     0     0     0         0   
ARGM-ADJ       0     0         0     0     0     0     0     0         4   
ARGM-ADV       3     6         0     0     0     0     0     0         0   
ARGM-CAU       0     1         0     0     0     0     0     0         0   
ARGM-COM       0     0         0     0     0     0     