In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [2]:
# load json test_data_annotations.json
with open('test_data_annotations.json') as f:
    data = json.load(f)
data

{'0': ['3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes'],
 '1': ['3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d_printed_clothes',
  '3d

In [3]:
# load predictions.jsonl from current folder
with open("predictions.jsonl") as f:
    preds = [json.loads(line) for line in f]
preds = {list(p.keys())[0]:p[list(p.keys())[0]] for p in preds}
new_rows = []
for article_id, section_predictions in preds.items():
    for section_pred in section_predictions:
        new_row = {
            "article_id": article_id,
            "section_id": section_pred["section_id"],
            "section_category": section_pred["category"],
            "text": section_pred["text"],
            "reasoning": section_pred["reasoning"],
            "explanation": section_pred["explanation"],
        }
        new_rows.append(new_row)
predictions = pd.DataFrame(new_rows)
# sort by article id and section id
predictions = predictions.sort_values(by=["article_id", "section_id"]).reset_index(drop=True)
predictions.head(n=10)

Unnamed: 0,article_id,section_id,section_category,text,reasoning,explanation
0,0,0,3d_printed_apparel,When the fashion trio threeASFOUR debuted its...,The section clearly discusses the debut collec...,{'1': {'prediction_1': {'category': '3d_printe...
1,0,1,3d_printed_apparel,When the fashion trio threeASFOUR debuted its ...,The section discusses the debut of a collectio...,"{'0': {'prediction_1': {'category': 'unsure', ..."
2,0,7,3d_printed_apparel,The trio at its helm — Gabi Asfour and his des...,The article segment clearly discusses the crea...,{'1': {'prediction_1': {'category': '3d_printe...
3,0,8,3d_printed_apparel,They wanted to do the opposite: stretch clothi...,The section describes the ambition to 3D-print...,{'1': {'prediction_1': {'category': '3d_printe...
4,0,9,3d_printed_apparel,They dreamed of 3D-printing textiles that were...,The article segment discusses the dream of cre...,{'1': {'prediction_1': {'category': '3d_printe...
5,0,11,sustainable_fabrics,At a moment when Silicon Valley’s elite are bu...,The article section discusses the efficiency a...,{'0': {'prediction_1': {'category': 'sustainab...
6,0,12,3d_printed_apparel,But where hoarding cans of Doomsday beans is w...,The section discusses the challenges and innov...,{'1': {'prediction_1': {'category': '3d_printe...
7,0,13,3d_printed_apparel,Thousands of years of refinement have allowed ...,The article segment emphasizes the potential t...,{'1': {'prediction_1': {'category': '3d_printe...
8,0,14,3d_printed_apparel,But that hasn’t stopped the team behind threeA...,The section discusses the potential to control...,{'0': {'prediction_1': {'category': '3d_printe...
9,0,15,3d_printed_apparel,“With fashion there is the potential to contro...,The article segment discusses the potential of...,{'1': {'prediction_1': {'category': '3d_printe...


In [4]:
dfs = []
for article_id, true_labels in data.items():
    predictions_sub = predictions[predictions["article_id"] == article_id].copy()
    predictions_sub.loc[:, "true_section_category"] = true_labels
    dfs.append(predictions_sub)
predictions = pd.concat(dfs)
predictions.head(n=10)
# rename all elements of "3d_printed_clothes" to "3d_printed_apparel" in true_section_category
predictions["true_section_category"] = predictions["true_section_category"].apply(lambda x: "3d_printed_apparel" if x == "3d_printed_clothes" else x)

In [5]:
true_categories = predictions["true_section_category"].unique()
# get all unique predictions that are not in true_categories
unique_non_existing_predictions = predictions[~predictions["section_category"].isin(true_categories)]["section_category"].unique()
predictions["section_category"] = predictions["section_category"].apply(lambda x: x if x in true_categories else "other")
unique_non_existing_predictions

array(['3d_printed_fashion', '3d_printing', 'none', '3d_printed_food',
       'public_transport', 'autonomous transport',
       'smart_collaboration_tools', 'smart_energy_grids',
       'autonomous_driving', 'low_impact_living', 'low_energy_demand',
       'sustainable_water_production_treatment', 'sustainable_forestry',
       'sustainable_transport', 'sharing_economy',
       'sustainable_agriculture_production', 'carbon_price',
       'chemicals_production', 'solar_energy', 'sustainable_washing',
       'dematerialisation', 'sustainable_construction_materials',
       'nanotechnology', 'drones', 'modular_design', 'unsure',
       'scope_for_avoided_emissions', 'air_quality_pollution_control',
       'health_outcomes_measurement', 'videoconferencing', 'teleworking',
       'telemedicine', 'blockchain', 'universal_basic_income',
       'digital_solutions_to_regulate_food_intake_and_nutrition',
       'healthcare_technology', 'electric_vehicles', 'circular_economy',
       'electric c

In [6]:
confusion_matrix = pd.crosstab(predictions["true_section_category"], predictions["section_category"])
# compute precision, recall, and f1 score and accuracy for each category
precisions = {}
recalls = {}
f1_scores = {}
accuracies = {}
for category in true_categories:
    tp = confusion_matrix.loc[category, category]
    fp = confusion_matrix.loc[category, :].sum() - tp
    fn = confusion_matrix.loc[:, category].sum() - tp
    tn = confusion_matrix.loc[:, :].sum().sum() - tp - fp - fn
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1_score = 2 * (precision * recall) / (precision + recall)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    precisions[category] = precision
    recalls[category] = recall
    f1_scores[category] = f1_score
    accuracies[category] = accuracy
# pretty print all results
for category in true_categories:
    print(f"Category: {category}")
    print(f"Precision: {precisions[category]:.2f}")
    print(f"Recall: {recalls[category]:.2f}")
    print(f"F1 Score: {f1_scores[category]:.2f}")
    print(f"Accuracy: {accuracies[category]:.2f}")
    print()

Category: 3d_printed_apparel
Precision: 0.79
Recall: 0.97
F1 Score: 0.87
Accuracy: 0.98

Category: autonomous_transport
Precision: 0.87
Recall: 0.87
F1 Score: 0.87
Accuracy: 0.97

Category: biking
Precision: 0.88
Recall: 0.98
F1 Score: 0.93
Accuracy: 0.99

Category: capsule_wardrobe
Precision: 0.94
Recall: 1.00
F1 Score: 0.97
Accuracy: 1.00

Category: car_sharing
Precision: 0.68
Recall: 1.00
F1 Score: 0.81
Accuracy: 0.98

Category: cement_production
Precision: 0.83
Recall: 1.00
F1 Score: 0.91
Accuracy: 1.00

Category: drone
Precision: 0.64
Recall: 1.00
F1 Score: 0.78
Accuracy: 0.98

Category: e_health
Precision: 0.82
Recall: 1.00
F1 Score: 0.90
Accuracy: 0.97

Category: electric_car
Precision: 0.78
Recall: 1.00
F1 Score: 0.88
Accuracy: 0.99

Category: fiber_optic_cables
Precision: 0.62
Recall: 1.00
F1 Score: 0.76
Accuracy: 0.97

Category: shipping
Precision: 0.86
Recall: 0.91
F1 Score: 0.88
Accuracy: 0.99

Category: clothes_designed_for_a_circular_economy
Precision: 0.22
Recall: 1.00
F