# Finetune CDCP dataset for ACC

## Libraries

In [1]:
# Run this cell only once to install LLaMA-Factory

# %cd ..
# %rm -rf LLaMA-Factory
# !git clone https://github.com/hiyouga/LLaMA-Factory.git
# %cd LLaMA-Factory
# %ls
# !pip install -e .[torch,bitsandbytes]

In [2]:
# !pip uninstall -y pydantic
# !pip install pydantic==1.10.9 # 

# !pip uninstall -y gradio
# !pip install gradio==3.48.0

# !pip uninstall -y bitsandbytes
# !pip install --upgrade bitsandbytes

# !pip install tqdm
# !pip install ipywidgets
# !pip install scikit-learn

# Restart kernel afterwards.

In [1]:
import os
import ast
import sys
import json
import torch
import pickle
import subprocess

sys.path.append('../')

import pandas as pd

from tqdm.notebook import tqdm
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
from sklearn.metrics import classification_report
from utils.post_processing import post_process_acc

In [2]:
try:    
    assert torch.cuda.is_available() is True
    
except AssertionError:
    
    print("Please set up a GPU before using LLaMA Factory...")

## Parameters

In [3]:
ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

In [4]:
DATASET_DIR = os.path.join(ROOT_DIR, "cdcp/datasets")

In [5]:
LLAMA_FACTORY_DIR = os.path.join(ROOT_DIR, "LLaMA-Factory")

In [16]:
BASE_MODEL = "unsloth/llama-3-8b-Instruct"

In [17]:
TASK = "acc"

In [18]:
# TAGS = 1
# TAGS = "wtags" if TAGS == 1 else "wotags"

In [19]:
# CONTEXT = "essay" # essay or paragraph

In [20]:
OUTPUT_DIR = os.path.join(ROOT_DIR, "finetuned_models_run2", f"""CDCP_{TASK}_{BASE_MODEL.split("/")[1]}""")

In [21]:
OUTPUT_DIR

'/Utilisateurs/umushtaq/am_work/coling_2025/cdcp/finetuned_models_run2/CDCP_acc_llama-3-8b-Instruct'

In [23]:
NB_EPOCHS = 5

### Training Args

## Post-processing

In [37]:
with open(os.path.join(OUTPUT_DIR, f"""CDCP_{TASK}_results_{NB_EPOCHS}.pickle"""), "rb") as fh:
        
        results = pickle.load(fh)

In [38]:
print(os.path.join(OUTPUT_DIR, f"""CDCP_{TASK}_results_{NB_EPOCHS}.pickle"""))

/Utilisateurs/umushtaq/am_work/coling_2025/cdcp/finetuned_models_run2/CDCP_acc_llama-3-8b-Instruct/CDCP_acc_results_5.pickle


In [39]:
grounds = results["ground_truths"]
preds = results["predictions"]

grounds = [json.loads(x)["component_types"] for x in grounds]  

In [41]:
preds = [x["content"] for x in preds]    

In [48]:
#preds[75] = '{"component_types": ["value", "value", "fact", "value", "value", "value", "value", "fact", "fact", "fact", "value", "value", "policy", "value", "value", "value", "value", "value", "policy", "policy", "policy", "policy", "policy", "policy", "policy", "policy", "policy", "policy", "policy", "value", "value", "fact", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value", "value"]}'

In [49]:
preds = [json.loads(x)["component_types"] for x in preds]

In [51]:
def opposite_acc(component_type):

    if component_type == "fact":
        return "value"
    elif component_type == "value":
        return "policy"
    elif component_type == "policy":
        return "value"
    elif component_type == "testimony":
        return "fact"
    elif component_type == "reference":
        return "policy"


In [52]:
def harmonize_preds_acc(grounds, preds):

    l1, l2 = len(preds), len(grounds)
    if l1 < l2:
        diff = l2 - l1
        preds = preds + [opposite_acc(x) for x in grounds[l1:]]
    else:
        preds = preds[:l2]
        
    return preds 

In [53]:
for i,(x,y) in enumerate(zip(grounds, preds)):
    
    if len(x) != len(y):
            
        preds[i] = harmonize_preds_acc(x, y)

In [54]:
task_preds = [item for row in preds for item in row]
task_grounds = [item for row in grounds for item in row]

In [55]:
# sanity check: 
len(task_preds) == len(task_grounds)

True

## Results

In [56]:
print(classification_report(task_grounds, task_preds, digits=3))

              precision    recall  f1-score   support

        fact      0.591     0.689     0.636       132
      policy      0.842     0.908     0.874       153
   reference      1.000     1.000     1.000         1
   testimony      0.924     0.898     0.911       244
       value      0.864     0.817     0.839       496

    accuracy                          0.833      1026
   macro avg      0.844     0.862     0.852      1026
weighted avg      0.840     0.833     0.836      1026



In [57]:
with open(f"""{OUTPUT_DIR}/classification_report.pickle""", 'wb') as fh:
    
    pickle.dump(classification_report(task_grounds, task_preds, output_dict=True), fh)