In [2]:
# add the parent directory to the system path
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [3]:
from src.jointlearning.model import JointCausalModel
from src.jointlearning.config import MODEL_CONFIG, id2label_bio, id2label_rel, id2label_cls
import torch
from transformers import AutoTokenizer
import json
import pandas as pd
from analysis.causal_eval import evaluate, display_results
from src.causal_pseudo_labeling.llm2doccano import convert_llm_output_to_doccano

In [4]:
test_set_dir = "C:\\Users\\norouzin\\Desktop\\JointLearning\\datasets\\expert_multi_task_data\\test.csv"
test_sets = pd.read_csv(test_set_dir)
texts = test_sets['text'].tolist()
print("Number of test samples:", len(texts))

Number of test samples: 452


In [6]:
SAVE_DIR = r"C:\Users\norouzin\Desktop\JointLearning\predictions"

In [7]:
MODEL_PATH = r"C:\Users\norouzin\Desktop\JointLearning\src\jointlearning\expert_bert_softmax\expert_bert_softmax_model.pt"  # <-- CHANGE THIS to your model path or repo
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_CONFIG["encoder_name"])
model = JointCausalModel(**MODEL_CONFIG)
model.load_state_dict(torch.load(MODEL_PATH,map_location=DEVICE))
model.to(DEVICE).eval(); 

MRO for JointCausalModel: (<class 'src.jointlearning.model.JointCausalModel'>, <class 'torch.nn.modules.module.Module'>, <class 'huggingface_hub.hub_mixin.PyTorchModelHubMixin'>, <class 'huggingface_hub.hub_mixin.ModelHubMixin'>, <class 'object'>)


### CLS+SPAN scenario

In [22]:
batch_size = 32  # You can adjust this as needed
all_results = []
num_batches = (len(texts) + batch_size - 1) // batch_size
for i in range(num_batches):
    batch = texts[i * batch_size : (i + 1) * batch_size]
    batch_results = model.predict(
        batch,
        tokenizer=TOKENIZER,
        rel_mode="auto",           # or "auto"
        rel_threshold=0.5,         # adjust as needed
        cause_decision="cls+span" # or "cls_only", "span_only"
    )
    all_results.extend(batch_results)
    print(f"Processed batch {i+1}/{num_batches} ({len(batch)} sentences)")

Processed batch 1/15 (32 sentences)
Processed batch 2/15 (32 sentences)
Processed batch 3/15 (32 sentences)
Processed batch 4/15 (32 sentences)
Processed batch 5/15 (32 sentences)
Processed batch 6/15 (32 sentences)
Processed batch 7/15 (32 sentences)
Processed batch 8/15 (32 sentences)
Processed batch 9/15 (32 sentences)
Processed batch 10/15 (32 sentences)
Processed batch 11/15 (32 sentences)
Processed batch 12/15 (32 sentences)
Processed batch 13/15 (32 sentences)
Processed batch 14/15 (32 sentences)
Processed batch 15/15 (4 sentences)


In [23]:
print(json.dumps(all_results, indent=2, ensure_ascii=False))

[
  {
    "text": "in an explicit task, social impression assessment is more accurate, participants are more confident, and they are quicker in responding to noncooperative proper-round pictures.;;",
    "causal": true,
    "relations": [
      {
        "cause": "explicit",
        "effect": "social impression assessment",
        "type": "Rel_CE"
      },
      {
        "cause": "explicit",
        "effect": "accurate,",
        "type": "Rel_CE"
      },
      {
        "cause": "explicit",
        "effect": "participants",
        "type": "Rel_CE"
      },
      {
        "cause": "explicit",
        "effect": "confident",
        "type": "Rel_CE"
      },
      {
        "cause": "explicit",
        "effect": "quicker in responding to noncooperative proper-round pictures",
        "type": "Rel_CE"
      }
    ]
  },
  {
    "text": "broadening the motivation to cooperate: revisiting the role of sanctions in social dilemmas.;;",
    "causal": false,
    "relations": [],
    "spans"

In [34]:
seat = convert_llm_output_to_doccano(
    all_results,
    SAVE_DIR+"/expert_bert_softmax_cls+span_doccano.jsonl",
)

Converting 452 samples from list input to Doccano format...
Processing sample 0/452
Writing 452 samples to output file...

CONVERSION COMPLETED SUCCESSFULLY
Total samples processed: 452
Causal samples: 195 (43.1%)
Non-causal samples: 257 (56.9%)
Error samples: 0 (0.0%)
Total entities created: 1259
Total relations created: 376
Output saved to: C:\Users\norouzin\Desktop\JointLearning\predictions/expert_bert_softmax_cls+span_doccano.jsonl


In [8]:
test_dir = "C:\\Users\\norouzin\\Desktop\\JointLearning\\datasets\\expert_multi_task_data\\doccano_test.jsonl"
pred_dir = SAVE_DIR + "/expert_bert_softmax_cls+span_doccano.jsonl"
results_A = evaluate(test_dir, pred_dir, scenario='A')
results_B = evaluate(test_dir, pred_dir, scenario='B')

In [9]:
display_results(results_A, scenario="A")


                          Scenario A Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             166
FP             |              29
FN             |              55
TN             |             202
Precision      |          0.8513
Recall         |          0.7511
F1             |          0.7981
Accuracy       |          0.8142
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.7008
  - Recall |          0.6284
  - F1     |          0.6626
  - TP_precision |             171
  - TP_recall |             164
  - FP     |              73
  - FN     |              97
effect      

In [10]:
display_results(results_B, scenario="B")


                          Scenario B Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             166
FP             |              29
FN             |              55
TN             |             202
Precision      |          0.8513
Recall         |          0.7511
F1             |          0.7981
Accuracy       |          0.8142
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.8104
  - Recall |          0.8079
  - F1     |          0.8092
  - TP_precision |             171
  - TP_recall |             164
  - FP     |              40
  - FN     |              39
effect      

# SPAN ONLY Scenario

In [5]:
batch_size = 32  # You can adjust this as needed
all_results = []
num_batches = (len(texts) + batch_size - 1) // batch_size
for i in range(num_batches):
    batch = texts[i * batch_size : (i + 1) * batch_size]
    batch_results = model.predict(
        batch,
        tokenizer=TOKENIZER,
        rel_mode="auto",           # or "auto"
        rel_threshold=0.5,         # adjust as needed
        cause_decision="span_only" # or "cls_only", "span_only"
    )
    all_results.extend(batch_results)
    print(f"Processed batch {i+1}/{num_batches} ({len(batch)} sentences)")

Processed batch 1/15 (32 sentences)
Processed batch 2/15 (32 sentences)
Processed batch 3/15 (32 sentences)
Processed batch 4/15 (32 sentences)
Processed batch 5/15 (32 sentences)
Processed batch 6/15 (32 sentences)
Processed batch 7/15 (32 sentences)
Processed batch 8/15 (32 sentences)
Processed batch 9/15 (32 sentences)
Processed batch 10/15 (32 sentences)
Processed batch 11/15 (32 sentences)
Processed batch 12/15 (32 sentences)
Processed batch 13/15 (32 sentences)
Processed batch 14/15 (32 sentences)
Processed batch 15/15 (4 sentences)


In [6]:
seat = convert_llm_output_to_doccano(
    all_results,
    SAVE_DIR+"/expert_bert_softmax_span_only_doccano.jsonl",
)

Converting 452 samples from list input to Doccano format...
Processing sample 0/452
Writing 452 samples to output file...

CONVERSION COMPLETED SUCCESSFULLY
Total samples processed: 452
Causal samples: 206 (45.6%)
Non-causal samples: 246 (54.4%)
Error samples: 0 (0.0%)
Total entities created: 1274
Total relations created: 392
Output saved to: C:\Users\norouzin\Desktop\JointLearning\predictions/expert_bert_softmax_span_only_doccano.jsonl


In [8]:
test_dir = "C:\\Users\\norouzin\\Desktop\\JointLearning\\datasets\\expert_multi_task_data\\doccano_test.jsonl"
pred_dir = SAVE_DIR + "/expert_bert_softmax_span_only_doccano.jsonl"
results_A = evaluate(test_dir, pred_dir, scenario='A')
results_B = evaluate(test_dir, pred_dir, scenario='B')

In [9]:
display_results(results_A, scenario="A")


                          Scenario A Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             172
FP             |              34
FN             |              49
TN             |             197
Precision      |          0.8350
Recall         |          0.7783
F1             |          0.8056
Accuracy       |          0.8164
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.6969
  - Recall |          0.6513
  - F1     |          0.6733
  - TP_precision |             177
  - TP_recall |             170
  - FP     |              77
  - FN     |              91
effect      

In [10]:
display_results(results_B, scenario="B")


                          Scenario B Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             172
FP             |              34
FN             |              49
TN             |             197
Precision      |          0.8350
Recall         |          0.7783
F1             |          0.8056
Accuracy       |          0.8164
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.8157
  - Recall |          0.8095
  - F1     |          0.8126
  - TP_precision |             177
  - TP_recall |             170
  - FP     |              40
  - FN     |              40
effect      

# CLS ONLY Scenario

In [5]:
batch_size = 32  # You can adjust this as needed
all_results = []
num_batches = (len(texts) + batch_size - 1) // batch_size
for i in range(num_batches):
    batch = texts[i * batch_size : (i + 1) * batch_size]
    batch_results = model.predict(
        batch,
        tokenizer=TOKENIZER,
        rel_mode="auto",           # or "auto"
        rel_threshold=0.5,         # adjust as needed
        cause_decision="cls_only" # or "cls_only", "span_only"
    )
    all_results.extend(batch_results)
    print(f"Processed batch {i+1}/{num_batches} ({len(batch)} sentences)")

Processed batch 1/15 (32 sentences)
Processed batch 2/15 (32 sentences)
Processed batch 3/15 (32 sentences)
Processed batch 4/15 (32 sentences)
Processed batch 5/15 (32 sentences)
Processed batch 6/15 (32 sentences)
Processed batch 7/15 (32 sentences)
Processed batch 8/15 (32 sentences)
Processed batch 9/15 (32 sentences)
Processed batch 10/15 (32 sentences)
Processed batch 11/15 (32 sentences)
Processed batch 12/15 (32 sentences)
Processed batch 13/15 (32 sentences)
Processed batch 14/15 (32 sentences)
Processed batch 15/15 (4 sentences)


In [6]:
seat = convert_llm_output_to_doccano(
    all_results,
    SAVE_DIR+"/expert_bert_softmax_cls_only_doccano.jsonl",
)

Converting 452 samples from list input to Doccano format...
Processing sample 0/452
Writing 452 samples to output file...

CONVERSION COMPLETED SUCCESSFULLY
Total samples processed: 452
Causal samples: 195 (43.1%)
Non-causal samples: 257 (56.9%)
Error samples: 0 (0.0%)
Total entities created: 1259
Total relations created: 376
Output saved to: C:\Users\norouzin\Desktop\JointLearning\predictions/expert_bert_softmax_cls_only_doccano.jsonl


In [7]:
test_dir = "C:\\Users\\norouzin\\Desktop\\JointLearning\\datasets\\expert_multi_task_data\\doccano_test.jsonl"
pred_dir = SAVE_DIR + "/expert_bert_softmax_cls_only_doccano.jsonl"
results_A = evaluate(test_dir, pred_dir, scenario='A')
results_B = evaluate(test_dir, pred_dir, scenario='B')

In [8]:
display_results(results_A, scenario="A")


                          Scenario A Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             166
FP             |              29
FN             |              55
TN             |             202
Precision      |          0.8513
Recall         |          0.7511
F1             |          0.7981
Accuracy       |          0.8142
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.7008
  - Recall |          0.6284
  - F1     |          0.6626
  - TP_precision |             171
  - TP_recall |             164
  - FP     |              73
  - FN     |              97
effect      

In [9]:
display_results(results_B, scenario="B")


                          Scenario B Results                          

【 Task1 】
------------------------------------------------------------
Metric         |           Value
------------------------------------------------------------
TP             |             166
FP             |              29
FN             |              55
TN             |             202
Precision      |          0.8513
Recall         |          0.7511
F1             |          0.7981
Accuracy       |          0.8142
N              |             452
------------------------------------------------------------

【 Task2 】
------------------------------------------------------------
Metric      |           Value
------------------------------------------------------------
cause       |
  - Precision |          0.8104
  - Recall |          0.8079
  - F1     |          0.8092
  - TP_precision |             171
  - TP_recall |             164
  - FP     |              40
  - FN     |              39
effect      