In [13]:
# 1. Import training data
# split into train and test sets
import pandas as pd
import csv
from train_inlegalbert_xgboost import load_text_data, split_data
train_texts, test_texts, train_labels, test_labels = load_text_data('complete_training_data_7200_cases.csv')
model_path = Path('inlegal_bert_xgboost_classifier.json')

In [14]:
# 2. Write test data to file
# Path to the output CSV file
csv_file_path = 'test_set_1451.csv'

# Ensure both lists have the same length
if len(test_texts) != len(test_labels):
    raise ValueError("Both lists must have the same length")

# Writing to the CSV file
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(["sentence", "ground_truth"])
    # Write the rows
    for item1, item2 in zip(test_texts, test_labels):
        writer.writerow([item1, item2])

print(f"Data written to {csv_file_path} successfully.")

Data written to test_set_1451.csv successfully.


In [15]:
# 3. Execute dependency parser classification on test set (offline using rule-based-classification.py script)

In [16]:
# 4. Load classified results
classified_df = pd.read_csv('rule_classified_1451.csv')

In [17]:
# 5. Print performance metrics
from performance_metrics import print_performance_metrics
print_performance_metrics(classified_df['ground_truth'].tolist(), classified_df['regulatory_according_to_rule'].tolist(), None, [0,1])

   recall  precision    f1
0    0.89       0.76  0.82
1    0.70       0.86  0.77
                    score
accuracy             0.80
krippendorff alpha   0.59
        Predicted:     
                 0    1
True: 0        663   80
      1        211  497


In [18]:
# 6. Add ML model classification results as well
from train_inlegalbert_xgboost import class_names
import json
from pathlib import Path
import numpy as np
import pandas as pd
from classify_text_with_inlegal_bert_xgboost import classify_texts
from dianna.utils.tokenizers import SpacyTokenizer

class StatementClassifier:
    def __init__(self):
        self.tokenizer = SpacyTokenizer(name='en_core_web_sm')

    def __call__(self, sentences):
        # ensure the input has a batch axis
        if isinstance(sentences, str):
            sentences = [sentences]

        probs = classify_texts(sentences, model_path, return_proba=True)

        return np.transpose([(probs[:, 0]), (1 - probs[:, 0])])

In [19]:
model_runner = StatementClassifier()

In [21]:
prediction = model_runner(classified_df['sent'].tolist())
ml_model_classification_results = [m for m in np.argmax(prediction, axis=1)]

Some weights of the model checkpoint at law-ai/InLegalBERT were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Creating features: 100%|███████████████████████████████████████████████| 1451/1451 [01:00<00:00, 23.89it/s]


In [22]:
classified_df['ml_model_result'] = ml_model_classification_results
classified_df.to_csv('all_models_and_algorithms_combined_results_test_set_1451.csv', index=False)