In [6]:
import json
import os
import numpy as np
import pandas as pd
# This is the base directory where your fold reports are located.
dataset_name = 'phee'
model_name='bioclinicalbert'
base_dir = f'analysis/{model_name}/{dataset_name}/reports'  # Replace with your actual path

# Initialize dictionaries to hold all the metric values from each fold.
binary_metrics = {
    'O': {'precision': [], 'recall': [], 'f1-score': []},
    'I': {'precision': [], 'recall': [], 'f1-score': []},
    'macro avg': {'precision': [], 'recall': [], 'f1-score': []},
    'weighted avg': {'precision': [], 'recall': [], 'f1-score': []}
}

multiclass_metrics = {
    "macro avg": {'precision': [], 'recall': [], 'f1-score': []},
    "weighted avg": {'precision': [], 'recall': [], 'f1-score': []},
    "macro_wo_O": {'precision': [], 'recall': [], 'f1-score': []}

}

# Process each fold
for fold in range(5):
    fold_dir = os.path.join(base_dir, f'fold{fold}')

    # Load binary classification report
    with open(os.path.join(fold_dir, 'binary_classification_report.json'), 'r') as f:
        binary_report = json.load(f)
        for category in binary_metrics.keys():
            for metric in binary_metrics[category].keys():
                binary_metrics[category][metric].append(binary_report[category][metric])

    # Load multiclass classification report
    multi_df = pd.read_json(os.path.join(fold_dir, 'multiclass_classification_report.json'))
  
    # with open(os.path.join(fold_dir, 'multiclass_classification_report.json'), 'r') as f:
    multiclass_report = multi_df.T.to_dict()
    print(multiclass_report)

    for category in multiclass_metrics.keys():

        for metric in multiclass_metrics[category].keys():
            multiclass_metrics[category][metric].append(multiclass_report[category][metric])

# Calculate mean and standard deviation for binary metrics
binary_results = {}
for category, metrics in binary_metrics.items():
 
    binary_results[category] = {}
    for metric, values in metrics.items():
        binary_results[category][metric] = {
            'mean': round(np.mean(values), 3),
            'std': round(np.std(values), 3)
        }

# Do the same for multiclass metrics
multiclass_results = {}
for category, metrics in multiclass_metrics.items():
    multiclass_results[category] = {}
    for metric, values in metrics.items():
        multiclass_results[category][metric] = {
            # round to 3 decimal places
            'mean': round(np.mean(values), 3),
            'std': round(np.std(values), 3)
        }

# Now you have binary_results and multiclass_results with the mean and std dev of each metric.
print('Binary results:')
# dict to dataframe
binary_results = pd.DataFrame(binary_results)    
binary_results.T




{'I-Background': {'precision': 0.871, 'recall': 0.835, 'f1-score': 0.853, 'support': 776.0}, 'I-Other': {'precision': 0.723, 'recall': 0.781, 'f1-score': 0.751, 'support': 274.0}, 'I-Problem': {'precision': 0.878, 'recall': 0.899, 'f1-score': 0.888, 'support': 3889.0}, 'I-Test': {'precision': 0.5710000000000001, 'recall': 0.5750000000000001, 'f1-score': 0.5730000000000001, 'support': 160.0}, 'I-Treatment': {'precision': 0.861, 'recall': 0.874, 'f1-score': 0.867, 'support': 2680.0}, 'O': {'precision': 0.9410000000000001, 'recall': 0.932, 'f1-score': 0.936, 'support': 12477.0}, 'accuracy': {'precision': 0.909, 'recall': 0.909, 'f1-score': 0.909, 'support': 0.909}, 'macro avg': {'precision': 0.808, 'recall': 0.8160000000000001, 'f1-score': 0.811, 'support': 20256.0}, 'weighted avg': {'precision': 0.91, 'recall': 0.909, 'f1-score': 0.909, 'support': 20256.0}, 'macro_wo_O': {'precision': 0.781, 'recall': 0.793, 'f1-score': 0.786, 'support': 7779.0}}
{'I-Background': {'precision': 0.837, 're

Unnamed: 0,precision,recall,f1-score
O,"{'mean': 0.938, 'std': 0.004}","{'mean': 0.932, 'std': 0.005}","{'mean': 0.935, 'std': 0.003}"
I,"{'mean': 0.889, 'std': 0.008}","{'mean': 0.9, 'std': 0.007}","{'mean': 0.894, 'std': 0.005}"
macro avg,"{'mean': 0.914, 'std': 0.004}","{'mean': 0.916, 'std': 0.004}","{'mean': 0.915, 'std': 0.004}"
weighted avg,"{'mean': 0.92, 'std': 0.004}","{'mean': 0.919, 'std': 0.004}","{'mean': 0.92, 'std': 0.004}"


In [8]:
print('Multiclass results:')
# dict to dataframe
multiclass_results = pd.DataFrame(multiclass_results)
multiclass_results.T

Multiclass results:


Unnamed: 0,precision,recall,f1-score
macro avg,"{'mean': 0.81, 'std': 0.004}","{'mean': 0.811, 'std': 0.008}","{'mean': 0.809, 'std': 0.006}"
weighted avg,"{'mean': 0.907, 'std': 0.003}","{'mean': 0.906, 'std': 0.003}","{'mean': 0.906, 'std': 0.004}"
macro_wo_O,"{'mean': 0.784, 'std': 0.004}","{'mean': 0.787, 'std': 0.011}","{'mean': 0.784, 'std': 0.007}"


In [5]:
import os
import json
import pandas as pd

# Define the model names and dataset names
model_names = ['scibert-mt', 'bluebert-mt', 'bert-mt', 'bertclinical-mt', 'bioclinicalbert-mt']
dataset_names = ['mtsamples', 'mtsamples2', 'doc-patient', 'cross-validation']

# Initialize an empty list to store the results
results = []

# Iterate over each model and dataset to read the respective JSON files
for model_name in model_names:
    for dataset_name in dataset_names:
        file_path = f'analysis/{model_name}/{dataset_name}/reports/fold0/multiclass_classification_report.json'
        if os.path.exists(file_path):
            with open(file_path, 'r') as file:
                try:
                    report = json.load(file)
                    # Extract the macro average values for precision, recall, and f1-score
                    macro_avg = {
                        'Model': model_name,
                        'Dataset': dataset_name,
                        'Macro Precision': report['precision']['macro_wo_O'],
                        'Macro Recall': report['recall']['macro_wo_O'],
                        'Macro F1-Score': report['f1-score']['macro_wo_O']
                    }
                    results.append(macro_avg)
                except json.JSONDecodeError:
                    print(f"Error decoding JSON from file: {file_path}")
        else:
            print(f"File not found: {file_path}")

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df


File not found: analysis/scibert-mt/mtsamples/reports/fold0/multiclass_classification_report.json
File not found: analysis/scibert-mt/mtsamples2/reports/fold0/multiclass_classification_report.json
File not found: analysis/scibert-mt/doc-patient/reports/fold0/multiclass_classification_report.json
File not found: analysis/bluebert-mt/mtsamples/reports/fold0/multiclass_classification_report.json
File not found: analysis/bluebert-mt/mtsamples2/reports/fold0/multiclass_classification_report.json
File not found: analysis/bluebert-mt/doc-patient/reports/fold0/multiclass_classification_report.json
File not found: analysis/bert-mt/mtsamples/reports/fold0/multiclass_classification_report.json
File not found: analysis/bert-mt/mtsamples2/reports/fold0/multiclass_classification_report.json
File not found: analysis/bert-mt/doc-patient/reports/fold0/multiclass_classification_report.json
File not found: analysis/bertclinical-mt/mtsamples/reports/fold0/multiclass_classification_report.json
File not fou

Unnamed: 0,Model,Dataset,Macro Precision,Macro Recall,Macro F1-Score
0,scibert-mt,cross-validation,0.814,0.762,0.782
1,bluebert-mt,cross-validation,0.799,0.732,0.762
2,bert-mt,cross-validation,0.766,0.691,0.719
3,bertclinical-mt,cross-validation,0.798,0.786,0.787
4,bioclinicalbert-mt,cross-validation,0.791,0.758,0.771


# Binary Classification Table


In [4]:
import os
import json
import pandas as pd

# Define the model names and dataset names
model_names = ['scibert-mt', 'bluebert-mt', 'bert-mt', 'bertclinical-mt', 'bioclinicalbert-mt']
dataset_names = ['mtsamples', 'mtsamples2', 'doc-patient', 'cross-validation']

# Initialize an empty list to store the results
results = []

# Iterate over each model and dataset to read the respective JSON files
for model_name in model_names:
    for dataset_name in dataset_names:
        file_path = f'analysis/{model_name}/{dataset_name}/reports/fold0/binary_classification_report.json'
        if os.path.exists(file_path):
            with open(file_path, 'r') as file:
                try:
                    report = json.load(file)
                    # Extract the macro average values for precision, recall, and f1-score
                    macro_avg = {
                        'Model': model_name,
                        'Dataset': dataset_name,
                        'Macro Precision': report['macro avg']['precision'],
                        'Macro Recall': report['macro avg']['recall'],
                        'Macro F1-Score': report['macro avg']['f1-score']
                    }
                    results.append(macro_avg)
                except json.JSONDecodeError:
                    print(f"Error decoding JSON from file: {file_path}")
        else:
            print(f"File not found: {file_path}")

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Display the results DataFrame
results_df


File not found: analysis/scibert-mt/mtsamples/reports/fold0/binary_classification_report.json
File not found: analysis/scibert-mt/mtsamples2/reports/fold0/binary_classification_report.json
File not found: analysis/scibert-mt/doc-patient/reports/fold0/binary_classification_report.json
File not found: analysis/bluebert-mt/mtsamples/reports/fold0/binary_classification_report.json
File not found: analysis/bluebert-mt/mtsamples2/reports/fold0/binary_classification_report.json
File not found: analysis/bluebert-mt/doc-patient/reports/fold0/binary_classification_report.json
File not found: analysis/bert-mt/mtsamples/reports/fold0/binary_classification_report.json
File not found: analysis/bert-mt/mtsamples2/reports/fold0/binary_classification_report.json
File not found: analysis/bert-mt/doc-patient/reports/fold0/binary_classification_report.json
File not found: analysis/bertclinical-mt/mtsamples/reports/fold0/binary_classification_report.json
File not found: analysis/bertclinical-mt/mtsamples2/

Unnamed: 0,Model,Dataset,Macro Precision,Macro Recall,Macro F1-Score
0,scibert-mt,cross-validation,0.917757,0.914834,0.916176
1,bluebert-mt,cross-validation,0.910461,0.905167,0.907449
2,bert-mt,cross-validation,0.914517,0.909594,0.91174
3,bertclinical-mt,cross-validation,0.909428,0.907194,0.908237
4,bioclinicalbert-mt,cross-validation,0.911766,0.911766,0.911766


# Table with mean and std

In [7]:
import os
import json
import pandas as pd
import numpy as np

# Define the model names and dataset names
model_names = ['scibert-mt', 'bluebert-mt', 'bert-mt', 'bertclinical-mt', 'bioclinicalbert-mt']
dataset_names = ['cross-validation']

# Initialize an empty list to store the results
results = []

# Iterate over each model and dataset to read the respective JSON files
for model_name in model_names:
    for dataset_name in dataset_names:
        precisions = []
        recalls = []
        f1_scores = []
        for fold in range(10):
            file_path = f'analysis/{model_name}/{dataset_name}/reports/fold{fold}/multiclass_classification_report.json'
            if os.path.exists(file_path):
                with open(file_path, 'r') as file:
                    try:
                        report = json.load(file)
                        precisions.append(report['precision']['macro_wo_O'])
                        recalls.append(report['recall']['macro_wo_O'])
                        f1_scores.append(report['f1-score']['macro_wo_O'])
                    except json.JSONDecodeError:
                        print(f"Error decoding JSON from file: {file_path}")
            else:
                print(f"File not found: {file_path}")
        
        if precisions and recalls and f1_scores:
            # Calculate mean and std for each metric
            precision_mean = np.mean(precisions)
            precision_std = np.std(precisions)
            recall_mean = np.mean(recalls)
            recall_std = np.std(recalls)
            f1_mean = np.mean(f1_scores)
            f1_std = np.std(f1_scores)
            
            # Store the results in the list
            results.append({
                'Model': model_name,
                'Dataset': dataset_name,
                'Macro Precision': f"{precision_mean:.4f} ± {precision_std:.4f}",
                'Macro Recall': f"{recall_mean:.4f} ± {recall_std:.4f}",
                'Macro F1-Score': f"{f1_mean:.4f} ± {f1_std:.4f}"
            })

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)
results_df


Unnamed: 0,Model,Dataset,Macro Precision,Macro Recall,Macro F1-Score
0,scibert-mt,cross-validation,0.7769 ± 0.0398,0.7570 ± 0.0355,0.7590 ± 0.0224
1,bluebert-mt,cross-validation,0.7680 ± 0.0373,0.7353 ± 0.0354,0.7424 ± 0.0222
2,bert-mt,cross-validation,0.7357 ± 0.0288,0.7006 ± 0.0433,0.7100 ± 0.0302
3,bertclinical-mt,cross-validation,0.7813 ± 0.0337,0.7588 ± 0.0350,0.7617 ± 0.0278
4,bioclinicalbert-mt,cross-validation,0.7911 ± 0.0222,0.7381 ± 0.0331,0.7569 ± 0.0170
