In [None]:
pip install tqdm


Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from tqdm import tqdm

# Load the Dataset with LDA Topics
file_path = 'labeled_comments_with_lda_topics.csv'
comments_df = pd.read_csv(file_path)

# Load gemma-2b model and tokenizer for comment classification (Intent)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b')
model = AutoModelForSequenceClassification.from_pretrained('google/gemma-2b').to(device)

# Sentiment Analysis Pipeline (if gemma-2b is not fine-tuned for sentiment)
sentiment_analyzer = pipeline("sentiment-analysis", device=0 if device == 'cuda' else -1)

# LDA Topic Weights and Labels (adjusted to 0-4 range)
LDA_TOPIC_WEIGHTS = {
    0: 5,  # Project Management & Meetings
    1: 2,  # Logistics, Bidding & Information Updates
    2: 4,  # Scientific Studies & Environmental Data
    3: 3,  # Data Presentation & Health Documentation
    4: 1   # Work Progress & Task Completion
}

LDA_TOPIC_LABELS = {
    0: "Project Management & Meetings",
    1: "Logistics, Bidding & Information Updates",
    2: "Scientific Studies & Environmental Data",
    3: "Data Presentation & Health Documentation",
    4: "Work Progress & Task Completion"
}

# Function to Classify Comment Intent Using gemma-2b Model
def classify_comment_intent(comment_text):
    inputs = tokenizer.encode(comment_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model(inputs)
    intent = torch.argmax(outputs.logits, dim=1).item()  # Assuming model outputs intent labels
    return intent

# Function to Get Sentiment Polarity of Comment
def get_sentiment(comment_text):
    truncated_text = tokenizer.decode(tokenizer.encode(comment_text, max_length=512, truncation=True))
    sentiment = sentiment_analyzer(truncated_text)
    sentiment_label = sentiment[0]['label']
    return sentiment_label

# Rule-Based Logic for Labels and Scores with Hierarchical Influence
def assign_labels_and_scores_with_hierarchy(row):
    # Extract values from the row
    level_0 = row.get('level_0', "")
    level_1 = row.get('level_1', "")
    level_2 = row.get('level_2', "")
    level_3 = row.get('level_3', "")
    next_action = row.get('next_action', "")
    comment_text = row.get('comment_full_text', "")
    dominant_topic = row.get('dominant_topic', 0)

    # Default Labels and Scores
    urgency_label = "Anytime"
    urgency_score = 2

    importance_label = "Low"
    importance_score = 2

    resolution_label = "Resolved"
    resolution_score = 0

    actionability_label = "Non-actionable"
    actionability_score = 1

    sentiment_label = "Neutral"
    sentiment_score = 3

    lda_label = "Other"
    lda_score = 0

    # 1. Urgency Label and Score using Intent Model
    try:
        comment_intent = classify_comment_intent(comment_text)
        if comment_intent == 0:
            urgency_label = "Immediate"
            urgency_score = 5
        elif comment_intent == 1:
            urgency_label = "Soon"
            urgency_score = 4
        elif comment_intent == 2:
            urgency_label = "Later"
            urgency_score = 3
        else:
            urgency_label = "Anytime"
            urgency_score = 2
    except Exception:
        urgency_label = "Anytime"
        urgency_score = 2  # Default to "Anytime" on error

    # 2. Importance Label and Score with Hierarchical Influence
    if level_0 == 'MODIFICATION':
        importance_score = 6
    elif level_0 == 'REQUESTED':
        importance_score = 5
    elif level_0 == 'DISCUSSION':
        importance_score = 4
    elif level_0 == 'INFORMATION_EXCHANGE':
        importance_score = 2
    else:
        importance_score = 1

    # Increase importance based on deeper hierarchical levels
    # If `level_1`, `level_2`, or `level_3` indicate higher relevance, adjust the importance score
    if level_1 in ['PROMISE', 'REQUESTED_CONFIRMATION']:
        importance_score += 1
    if level_2 in ['EXPLICIT', 'CONTENT']:
        importance_score += 1
    if level_3 in ['CHANGE', 'REFERENCE']:
        importance_score += 1

    # Adjust the importance label based on the final importance score
    if importance_score > 6:
        importance_label = "Critical"
    elif importance_score == 6:
        importance_label = "Important"
    elif importance_score == 4:
        importance_label = "Moderate"
    elif importance_score == 2:
        importance_label = "Low"
    elif importance_score == 1:
        importance_label = "Optional"
    else:
        importance_label = "Other"

    # 3. Actionability Label and Score
    if 'must' in comment_text or 'need' in comment_text or next_action == 'KEEP':
        actionability_label = "Actionable"
        actionability_score = 5
    else:
        actionability_label = "Non-actionable"
        actionability_score = 1

    # 4. Resolution Status Label and Score
    if next_action == 'Pending':
        resolution_label = "Pending"
        resolution_score = 5
    elif next_action == 'In Progress':
        resolution_label = "In Progress"
        resolution_score = 3
    elif next_action == 'Resolved':
        resolution_label = "Resolved"
        resolution_score = 0

    # 5. Sentiment Polarity Label and Score
    try:
        sentiment = get_sentiment(comment_text)
        if sentiment.upper() == "NEGATIVE":
            sentiment_label = "Negative"
            sentiment_score = 5
        elif sentiment.upper() == "NEUTRAL":
            sentiment_label = "Neutral"
            sentiment_score = 3
        else:
            sentiment_label = "Positive"
            sentiment_score = 1
    except Exception:
        sentiment_label = "Neutral"
        sentiment_score = 3  # Default to Neutral on error

    # 6. LDA Topic Label and Score
    lda_label = LDA_TOPIC_LABELS.get(dominant_topic, "Other")
    lda_score = LDA_TOPIC_WEIGHTS.get(dominant_topic, 0)

    # Calculate the final priority score, incorporating hierarchical influence
    priority_score = (
        urgency_score +
        importance_score +
        sentiment_score +
        actionability_score +
        resolution_score +
        lda_score
    )

    return (urgency_label, urgency_score,
            importance_label, importance_score,
            sentiment_label, sentiment_score,
            actionability_label, actionability_score,
            resolution_label, resolution_score,
            lda_label, lda_score,
            priority_score)

# Apply the label and score assignment function to the comments DataFrame with a progress bar
tqdm.pandas()  # Activate tqdm progress bar for pandas apply
comments_df[['urgency_label', 'urgency_score',
             'importance_label', 'importance_score',
             'sentiment_label', 'sentiment_score',
             'actionability_label', 'actionability_score',
             'resolution_label', 'resolution_score',
             'lda_label', 'lda_score',
             'priority_score']] = comments_df.progress_apply(assign_labels_and_scores_with_hierarchy, axis=1, result_type='expand')

# Final Triage Assignment Based on Priority Score
def assign_final_triage_level(row):
    if row['priority_score'] >= 20:
        return 'High'
    elif row['priority_score'] >= 15:
        return 'Medium'
    elif row['priority_score'] >= 10:
        return 'Low'
    else:
        return 'Informational'

# Apply Final Triage Level
comments_df['triage_level'] = comments_df.apply(assign_final_triage_level, axis=1)

# Save the Updated DataFrame with Priority Scores and Triage Levels to a CSV File
output_file_path = 'triaged_comments_with_priority_and_labels_hierarchy.csv'
comments_df.to_csv(output_file_path, index=False)

print(f"Triage results with priority scores, labels, and hierarchical influence saved to {output_file_path}")


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of GemmaForSequenceClassification were not initialized from the model checkpoint at google/gemma-2b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
  0%|          | 9/4991 [00:00<02:25, 34.36it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
 84%|████████▍ | 4182/4991 [01:07<00:12, 63.78it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (525 > 512). Running this sequence through the model will result in indexing errors
100%|██████████| 4991/4991 [01:21<00:00, 61.2

Triage results with priority scores, labels, and hierarchical influence saved to triaged_comments_with_priority_and_labels_hierarchy.csv


In [None]:
import pandas as pd

# Define all test cases
test_cases = [
    {
        "comment_full_text": "The financial projections in section 3 are incorrect and need urgent revision. This must be addressed before the deadline.",
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "CONTENT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 0  # Project Management & Meetings
    },
    {
        "comment_full_text": "Please consider updating the figures in the report soon. This change can improve the clarity of the document, but it's not mandatory.",
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "In Progress",
        "dominant_topic": 3  # Data Presentation & Health Documentation
    },
    {
        "comment_full_text": "Could we add a minor glossary at the end for technical terms if time allows?",
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "PROMISE",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 4  # Work Progress & Task Completion
    },
    {
        "comment_full_text": "Nice work on the introduction. It reads really well and sets the tone for the rest of the document.",
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "FEEDBACK",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "Resolved",
        "dominant_topic": 2  # Scientific Studies & Environmental Data
    },
    {
        "comment_full_text": "The data in table 5 is incorrect. This issue is critical and must be fixed immediately to avoid inaccurate conclusions.",
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "CONTENT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 3  # Data Presentation & Health Documentation
    },
    {
        "comment_full_text": "Great job on the figures! If possible, we should add a short explanation soon to clarify the data.",
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 3  # Data Presentation & Health Documentation
    },
    {
        "comment_full_text": "Consider adding more visuals in the final section, but this is not urgent.",
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "FEEDBACK",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Resolved",
        "dominant_topic": 4  # Work Progress & Task Completion
    },
    {
        "comment_full_text": "I am not happy with the way section 2 is structured. It could be made clearer, though it's not critical at this stage.",
        "level_0": "DISCUSSION",
        "level_1": "PROMISE",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 0  # Project Management & Meetings
    }
]

# Convert the test cases into a DataFrame
test_df = pd.DataFrame(test_cases)

# Apply the label and score assignment function to the test DataFrame
test_df[['urgency_label', 'urgency_score',
         'importance_label', 'importance_score',
         'sentiment_label', 'sentiment_score',
         'actionability_label', 'actionability_score',
         'resolution_label', 'resolution_score',
         'lda_label', 'lda_score',
         'priority_score']] = test_df.apply(assign_labels_and_scores_with_hierarchy, axis=1, result_type='expand')

# Apply Final Triage Level
test_df['triage_level'] = test_df.apply(assign_final_triage_level, axis=1)

# View the results
print(test_df[['comment_full_text', 'urgency_label', 'importance_label', 'sentiment_label', 'actionability_label', 'resolution_label', 'lda_label', 'priority_score', 'triage_level']])



                                   comment_full_text urgency_label  \
0  The financial projections in section 3 are inc...          Soon   
1  Please consider updating the figures in the re...          Soon   
2  Could we add a minor glossary at the end for t...          Soon   
3  Nice work on the introduction. It reads really...          Soon   
4  The data in table 5 is incorrect. This issue i...          Soon   
5  Great job on the figures! If possible, we shou...          Soon   
6  Consider adding more visuals in the final sect...          Soon   
7  I am not happy with the way section 2 is struc...          Soon   

  importance_label sentiment_label actionability_label resolution_label  \
0         Critical        Negative          Actionable          Pending   
1         Critical        Positive      Non-actionable      In Progress   
2            Other        Negative      Non-actionable          Pending   
3         Moderate        Positive      Non-actionable         Resolv

In [None]:
import pandas as pd

# Define all test cases with expected outputs
test_cases_with_expected = [
    {
        "comment_full_text": "The financial projections in section 3 are incorrect and need urgent revision. This must be addressed before the deadline.",
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "CONTENT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 0,  # Project Management & Meetings
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Project Management & Meetings",
        "expected_priority_score": 30,  # Adjust based on actual triage logic
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "Please consider updating the figures in the report soon. This change can improve the clarity of the document, but it's not mandatory.",
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "In Progress",
        "dominant_topic": 3,  # Data Presentation & Health Documentation
        "expected_urgency_label": "Soon",
        "expected_importance_label": "Important",
        "expected_sentiment_label": "Positive",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "In Progress",
        "expected_lda_label": "Data Presentation & Health Documentation",
        "expected_priority_score": 19,
        "expected_triage_level": "Medium"
    },
    {
        "comment_full_text": "Could we add a minor glossary at the end for technical terms if time allows?",
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "PROMISE",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 4,  # Work Progress & Task Completion
        "expected_urgency_label": "Later",
        "expected_importance_label": "Optional",
        "expected_sentiment_label": "Neutral",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Work Progress & Task Completion",
        "expected_priority_score": 14,
        "expected_triage_level": "Low"
    },
    # Define additional test cases similarly...
]

# Convert the test cases into a DataFrame
test_df = pd.DataFrame(test_cases_with_expected)

# Define the logic function that assigns labels and scores based on the triage framework
def assign_labels_and_scores_with_hierarchy(row):
    # (The logic from the triage framework will be used here)

    # Here is the previously implemented logic:
    urgency_label = "Anytime"
    urgency_score = 2

    importance_label = "Low"
    importance_score = 2

    resolution_label = "Resolved"
    resolution_score = 0

    actionability_label = "Non-actionable"
    actionability_score = 1

    sentiment_label = "Neutral"
    sentiment_score = 3

    lda_label = "Other"
    lda_score = 0

    if row['dominant_topic'] == 0:
        lda_label = "Project Management & Meetings"
        lda_score = 5
    elif row['dominant_topic'] == 1:
        lda_label = "Logistics, Bidding & Information Updates"
        lda_score = 2
    elif row['dominant_topic'] == 2:
        lda_label = "Scientific Studies & Environmental Data"
        lda_score = 4
    elif row['dominant_topic'] == 3:
        lda_label = "Data Presentation & Health Documentation"
        lda_score = 3
    elif row['dominant_topic'] == 4:
        lda_label = "Work Progress & Task Completion"
        lda_score = 1

    # Placeholder logic for urgency and other values (to be replaced by your actual model output)
    if "urgent" in row['comment_full_text']:
        urgency_label = "Immediate"
        urgency_score = 5
    elif "soon" in row['comment_full_text']:
        urgency_label = "Soon"
        urgency_score = 4

    # Placeholder for importance (this will depend on levels, adjust logic accordingly)
    if row['level_0'] == "MODIFICATION":
        importance_label = "Critical"
        importance_score = 6
    elif row['level_0'] == "DISCUSSION":
        importance_label = "Moderate"
        importance_score = 4

    # Adjust sentiment (using sentiment analysis in your actual framework)
    if "not happy" in row['comment_full_text']:
        sentiment_label = "Negative"
        sentiment_score = 5

    # Actionability example
    if "must" in row['comment_full_text']:
        actionability_label = "Actionable"
        actionability_score = 5

    # Resolution status
    if row['next_action'] == "Pending":
        resolution_label = "Pending"
        resolution_score = 5
    elif row['next_action'] == "In Progress":
        resolution_label = "In Progress"
        resolution_score = 3

    priority_score = (urgency_score + importance_score + sentiment_score +
                      actionability_score + resolution_score + lda_score)

    return urgency_label, urgency_score, importance_label, importance_score, sentiment_label, sentiment_score, actionability_label, actionability_score, resolution_label, resolution_score, lda_label, lda_score, priority_score

# Apply the triage system
test_df[['urgency_label', 'urgency_score',
         'importance_label', 'importance_score',
         'sentiment_label', 'sentiment_score',
         'actionability_label', 'actionability_score',
         'resolution_label', 'resolution_score',
         'lda_label', 'lda_score',
         'priority_score']] = test_df.apply(assign_labels_and_scores_with_hierarchy, axis=1, result_type='expand')

# Assign triage levels based on the priority score
def assign_final_triage_level(row):
    if row['priority_score'] >= 20:
        return 'High'
    elif row['priority_score'] >= 15:
        return 'Medium'
    elif row['priority_score'] >= 10:
        return 'Low'
    else:
        return 'Informational'

test_df['triage_level'] = test_df.apply(assign_final_triage_level, axis=1)

# Compare the expected and predicted outputs
comparison_df = test_df.copy()
comparison_df['urgency_label_match'] = comparison_df['expected_urgency_label'] == comparison_df['urgency_label']
comparison_df['importance_label_match'] = comparison_df['expected_importance_label'] == comparison_df['importance_label']
comparison_df['sentiment_label_match'] = comparison_df['expected_sentiment_label'] == comparison_df['sentiment_label']
comparison_df['actionability_label_match'] = comparison_df['expected_actionability_label'] == comparison_df['actionability_label']
comparison_df['resolution_label_match'] = comparison_df['expected_resolution_label'] == comparison_df['resolution_label']
comparison_df['lda_label_match'] = comparison_df['expected_lda_label'] == comparison_df['lda_label']
comparison_df['priority_score_match'] = comparison_df['expected_priority_score'] == comparison_df['priority_score']
comparison_df['triage_level_match'] = comparison_df['expected_triage_level'] == comparison_df['triage_level']

# View the comparison results
print(comparison_df[['comment_full_text', 'expected_urgency_label', 'urgency_label', 'urgency_label_match',
                     'expected_importance_label', 'importance_label', 'importance_label_match',
                     'expected_sentiment_label', 'sentiment_label', 'sentiment_label_match',
                     'expected_actionability_label', 'actionability_label', 'actionability_label_match',
                     'expected_resolution_label', 'resolution_label', 'resolution_label_match',
                     'expected_lda_label', 'lda_label', 'lda_label_match',
                     'expected_priority_score', 'priority_score', 'priority_score_match',
                     'expected_triage_level', 'triage_level', 'triage_level_match']])


                                   comment_full_text expected_urgency_label  \
0  The financial projections in section 3 are inc...              Immediate   
1  Please consider updating the figures in the re...                   Soon   
2  Could we add a minor glossary at the end for t...                  Later   

  urgency_label  urgency_label_match expected_importance_label  \
0     Immediate                 True                  Critical   
1          Soon                 True                 Important   
2       Anytime                False                  Optional   

  importance_label  importance_label_match expected_sentiment_label  \
0         Critical                    True                 Negative   
1         Moderate                   False                 Positive   
2              Low                   False                  Neutral   

  sentiment_label  sentiment_label_match  ... resolution_label_match  \
0         Neutral                  False  ...                

In [None]:
import pandas as pd

# Define 10 test cases with expected outputs
test_cases_with_expected = [
    {
        "comment_full_text": "The financial projections in section 3 are incorrect and need urgent revision. This must be addressed before the deadline.",#1
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "CONTENT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 0,  # Project Management & Meetings
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Project Management & Meetings",
        "expected_priority_score": 29,
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "Please consider updating the figures in the report soon. This change can improve the clarity of the document, but it's not mandatory.",#2
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "In Progress",
        "dominant_topic": 3,  # Data Presentation & Health Documentation
        "expected_urgency_label": "Soon",
        "expected_importance_label": "Moderate",
        "expected_sentiment_label": "Neutral",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "In Progress",
        "expected_lda_label": "Data Presentation & Health Documentation",
        "expected_priority_score": 18,
        "expected_triage_level": "Medium"
    },
    {
        "comment_full_text": "Could we add a minor glossary at the end for technical terms whenever possible?",#3
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "PROMISE",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 4,  # Work Progress & Task Completion
        "expected_urgency_label": "Anytime",
        "expected_importance_label": "Low",
        "expected_sentiment_label": "Neutral",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Work Progress & Task Completion",
        "expected_priority_score": 14,
        "expected_triage_level": "Low"
    },
    {
        "comment_full_text": "Nice work on the introduction. It reads really well and sets the tone for the rest of the document.",#4
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "FEEDBACK",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "Resolved",
        "dominant_topic": 2,  # Scientific Studies & Environmental Data
        "expected_urgency_label": "Anytime",
        "expected_importance_label": "Low",
        "expected_sentiment_label": "Positive",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Resolved",
        "expected_lda_label": "Scientific Studies & Environmental Data",
        "expected_priority_score": 12,
        "expected_triage_level": "Low"
    },
    {
        "comment_full_text": "The data in table 5 is incorrect. This issue is critical and must be fixed immediately to avoid inaccurate conclusions.",#5
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "CONTENT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 3,  # Data Presentation & Health Documentation
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Data Presentation & Health Documentation",
        "expected_priority_score": 24,
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "Great job on the figures! If possible, we should add a short explanation soon to clarify the data.",#6
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 3,  # Data Presentation & Health Documentation
        "expected_urgency_label": "Soon",
        "expected_importance_label": "Moderate",
        "expected_sentiment_label": "Neutral",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Data Presentation & Health Documentation",
        "expected_priority_score": 20,
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "Consider adding more visuals in the final section, this is urgent.",#7
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "FEEDBACK",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Resolved",
        "dominant_topic": 4,  # Work Progress & Task Completion
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Resolved",
        "expected_lda_label": "Work Progress & Task Completion",
        "expected_priority_score": 12,
        "expected_triage_level": "Medium"
    },
    {
        "comment_full_text": "I am not happy with the way section 2 is structured. It could be made clearer.",#8
        "level_0": "DISCUSSION",
        "level_1": "PROMISE",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "Pending",
        "dominant_topic": 0,  # Project Management & Meetings
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Project Management & Meetings",
        "expected_priority_score": 22,
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "The legal compliance section needs to be reviewed. This is a major issue that must be addressed immediately.",#9
        "level_0": "MODIFICATION",
        "level_1": "REQUESTED",
        "level_2": "EXPLICIT",
        "level_3": "CHANGE",
        "next_action": "Pending",
        "dominant_topic": 2,  # Scientific Studies & Environmental Data
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Actionable",
        "expected_resolution_label": "Pending",
        "expected_lda_label": "Scientific Studies & Environmental Data",
        "expected_priority_score": 25,
        "expected_triage_level": "High"
    },
    {
        "comment_full_text": "We should consider changing the layout of section 6,  it's urgent.",#10
        "level_0": "DISCUSSION",
        "level_1": "REQUESTED_CONFIRMATION",
        "level_2": "CONTENT",
        "level_3": "REFERENCE",
        "next_action": "In Progress",
        "dominant_topic": 1,  # Logistics, Bidding & Information Updates
        "expected_urgency_label": "Immediate",
        "expected_importance_label": "Critical",
        "expected_sentiment_label": "Negative",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "In Progress",
        "expected_lda_label": "Logistics, Bidding & Information Updates",
        "expected_priority_score": 18,
        "expected_triage_level": "Medium"
    },
    {
        "comment_full_text": "The document looks great! No changes are required at this stage.",#11
        "level_0": "INFORMATION_EXCHANGE",
        "level_1": "FEEDBACK",
        "level_2": "EXPLICIT",
        "level_3": "REFERENCE",
        "next_action": "Resolved",
        "dominant_topic": 2,  # Scientific Studies & Environmental Data
        "expected_urgency_label": "Anytime",
        "expected_importance_label": "Low",
        "expected_sentiment_label": "Positive",
        "expected_actionability_label": "Non-actionable",
        "expected_resolution_label": "Resolved",
        "expected_lda_label": "Scientific Studies & Environmental Data",
        "expected_priority_score": 10,
        "expected_triage_level": "Informational"
    }
]

# Convert the test cases into a DataFrame
test_df = pd.DataFrame(test_cases_with_expected)

# (Assuming you have the previously implemented logic for assign_labels_and_scores_with_hierarchy)
# Apply the triage system
test_df[['urgency_label', 'urgency_score',
         'importance_label', 'importance_score',
         'sentiment_label', 'sentiment_score',
         'actionability_label', 'actionability_score',
         'resolution_label', 'resolution_score',
         'lda_label', 'lda_score',
         'priority_score']] = test_df.apply(assign_labels_and_scores_with_hierarchy, axis=1, result_type='expand')

# Assign triage levels based on the priority score
test_df['triage_level'] = test_df.apply(assign_final_triage_level, axis=1)

# Compare the expected and predicted outputs
comparison_df = test_df.copy()
comparison_df['urgency_label_match'] = comparison_df['expected_urgency_label'] == comparison_df['urgency_label']
comparison_df['importance_label_match'] = comparison_df['expected_importance_label'] == comparison_df['importance_label']
comparison_df['sentiment_label_match'] = comparison_df['expected_sentiment_label'] == comparison_df['sentiment_label']
comparison_df['actionability_label_match'] = comparison_df['expected_actionability_label'] == comparison_df['actionability_label']
comparison_df['resolution_label_match'] = comparison_df['expected_resolution_label'] == comparison_df['resolution_label']
comparison_df['lda_label_match'] = comparison_df['expected_lda_label'] == comparison_df['lda_label']
comparison_df['priority_score_match'] = comparison_df['expected_priority_score'] == comparison_df['priority_score']
comparison_df['triage_level_match'] = comparison_df['expected_triage_level'] == comparison_df['triage_level']

# View the comparison results
print(comparison_df[['comment_full_text', 'expected_urgency_label', 'urgency_label', 'urgency_label_match',
                     'expected_importance_label', 'importance_label', 'importance_label_match',
                     'expected_sentiment_label', 'sentiment_label', 'sentiment_label_match',
                     'expected_actionability_label', 'actionability_label', 'actionability_label_match',
                     'expected_resolution_label', 'resolution_label', 'resolution_label_match',
                     'expected_lda_label', 'lda_label', 'lda_label_match',
                     'expected_priority_score', 'priority_score', 'priority_score_match',
                     'expected_triage_level', 'triage_level', 'triage_level_match']])


                                    comment_full_text expected_urgency_label  \
0   The financial projections in section 3 are inc...              Immediate   
1   Please consider updating the figures in the re...                   Soon   
2   Could we add a minor glossary at the end for t...                Anytime   
3   Nice work on the introduction. It reads really...                Anytime   
4   The data in table 5 is incorrect. This issue i...              Immediate   
5   Great job on the figures! If possible, we shou...                   Soon   
6   Consider adding more visuals in the final sect...              Immediate   
7   I am not happy with the way section 2 is struc...              Immediate   
8   The legal compliance section needs to be revie...              Immediate   
9   We should consider changing the layout of sect...              Immediate   
10  The document looks great! No changes are requi...                Anytime   

   urgency_label  urgency_label_match e