In [None]:
# Cell 1: Setup and Initialization
import os
import json
import pickle
import pandas as pd
import numpy as np
import torch
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
from sklearn.model_selection import StratifiedKFold
from datasets import Dataset

# Mount Google Drive
drive.mount('/content/drive')

# --- KEY SETTINGS ---
GDRIVE_PATH = '/content/drive/MyDrive/eecsi_revise/'
SEED = 42

print(f"✅ Setup complete. Working inside folder: {GDRIVE_PATH}")

Mounted at /content/drive
✅ Setup complete. Working inside folder: /content/drive/MyDrive/eecsi_revise/


In [None]:
# Cell 2: Load Data and ACD Predictions

print("--- Loading base data and pre-generated ACD predictions ---")

try:
    # Load the main dataframe
    df = pd.read_csv(os.path.join(GDRIVE_PATH, 'final_golden_dataset_eecsi.csv'))

    # Load the 5-fold split definitions
    with open(os.path.join(GDRIVE_PATH, 'kfold_splits.pkl'), 'rb') as f:
        kfold_splits = pickle.load(f)

    # Load the raw predictions file which contains results from the ACD stage
    with open(os.path.join(GDRIVE_PATH, 'all_model_predictions.pkl'), 'rb') as f:
        all_predictions = pickle.load(f)

    print("✅ All necessary files loaded successfully.")

except FileNotFoundError as e:
    print(f"❌ ERROR: A required file was not found. Please ensure all previous notebooks ran successfully. Details: {e}")
    raise

--- Loading base data and pre-generated ACD predictions ---
✅ All necessary files loaded successfully.


In [None]:
# Cell 3: Generate ASC Predictions for the Best Pipeline

# --- Redefine Helper Class and Functions ---
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.encodings['input_ids'])

# --- Setup for ASC Prediction ---
print("\n--- Generating predictions for the ASC stage... ---")
ASC_MODEL_NAME = "indolem/indobertweet-base-uncased" # This is IndoBERTweet
ASC_RESULTS_PATH = os.path.join(GDRIVE_PATH, 'indobertweet_asc_results/')
TEXT_COLUMN = 'full_text' if 'full_text' in df.columns else 'cleaned_text'

# Filter for relevant data where sentiment analysis is applicable
relevant_df = df[df['aspect'] != 'Irrelevant'].copy()
y_sentiment = relevant_df['sentiment']
sentiment_labels_list = sorted(y_sentiment.unique())
s_label2id = {l: i for i, l in enumerate(sentiment_labels_list)}
s_id2label = {i: l for i, l in enumerate(sentiment_labels_list)}

tokenizer_asc = AutoTokenizer.from_pretrained(ASC_MODEL_NAME)
asc_predictions = pd.Series([None] * len(relevant_df), index=relevant_df.index)

# Recreate the K-Fold splits for the relevant data
skf_asc = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

for i, (train_index, test_index) in enumerate(skf_asc.split(relevant_df[TEXT_COLUMN], relevant_df['sentiment'])):
    fold_num = i + 1
    print(f"  Processing ASC Fold {fold_num}/5...")

    # Find the best checkpoint for this fold
    fold_dir = os.path.join(ASC_RESULTS_PATH, f'fold_{fold_num}')
    state_path = os.path.join(fold_dir, 'trainer_state.json') # trainer_state is in the root of results_fold_X for pipelines
    try:
        with open(state_path, 'r') as f: state = json.load(f)
        best_checkpoint_path = state['best_model_checkpoint']
    except (FileNotFoundError, KeyError):
        possible_checkpoints = [d for d in os.listdir(fold_dir) if d.startswith('checkpoint-')]
        best_checkpoint_path = os.path.join(fold_dir, sorted(possible_checkpoints, key=lambda x: int(x.split('-')[-1]))[-1])

    # Load model and predict
    model = AutoModelForSequenceClassification.from_pretrained(best_checkpoint_path)
    trainer = Trainer(model=model)
    test_data = relevant_df.iloc[test_index]
    test_encodings = tokenizer_asc(list(test_data[TEXT_COLUMN]), truncation=True, padding=True, max_length=128)
    prediction_dataset = SentimentDataset(test_encodings)

    predictions = trainer.predict(prediction_dataset)
    predicted_labels_int = np.argmax(predictions.predictions, axis=1)

    # Store predictions with original DataFrame index
    asc_predictions.iloc[test_index] = predicted_labels_int

# Convert integer predictions back to string labels
final_asc_preds = asc_predictions.map(s_id2label)
print("\n✅ ASC prediction generation complete.")


--- Generating predictions for the ASC stage... ---
  Processing ASC Fold 1/5...


  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mitaeyeong2532[0m ([33mitaeyeong2532-telkom-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  Processing ASC Fold 2/5...


  Processing ASC Fold 3/5...


  Processing ASC Fold 4/5...


  Processing ASC Fold 5/5...



✅ ASC prediction generation complete.


In [None]:
# Cell 4: Combine All Data and Analyze Error Propagation

# 1. Create a final analysis DataFrame
df_analysis = pd.DataFrame({
    'true_aspect': all_predictions['y_true'],
    'pred_aspect_pipeline': all_predictions['indobertweet_pipeline_acd']
})

# Add the ASC predictions, aligning them by index
df_analysis['true_sentiment'] = relevant_df['sentiment']
df_analysis['pred_sentiment_pipeline'] = final_asc_preds

# 2. Determine if each prediction was correct
df_analysis['acd_correct'] = df_analysis['true_aspect'] == df_analysis['pred_aspect_pipeline']
df_analysis['asc_correct'] = df_analysis['true_sentiment'] == df_analysis['pred_sentiment_pipeline']

# 3. Filter out irrelevant tweets for the main analysis, as ASC is not performed on them
df_relevant_analysis = df_analysis[df_analysis['true_aspect'] != 'Irrelevant'].copy()

# 4. Group by the ACD result and count the ASC outcomes
error_propagation_summary = df_relevant_analysis.groupby('acd_correct')['asc_correct'].value_counts(normalize=True).unstack(fill_value=0)
error_propagation_counts = df_relevant_analysis.groupby('acd_correct')['asc_correct'].value_counts().unstack(fill_value=0)

print("\n--- Error Propagation Analysis Summary ---")
print("This table shows the percentage of sentiment predictions (ASC) being correct or wrong,")
print("grouped by whether the initial aspect prediction (ACD) was correct or wrong.")

# Display percentages
print("\n--- Proportions ---")
display(error_propagation_summary.rename(columns={False: 'ASC Wrong', True: 'ASC Correct'}, index={False: 'ACD Wrong', True: 'ACD Correct'}))

# Display raw counts
print("\n--- Raw Counts ---")
display(error_propagation_counts.rename(columns={False: 'ASC Wrong', True: 'ASC Correct'}, index={False: 'ACD Wrong', True: 'ACD Correct'}))

# 5. Final Conclusion based on the numbers
if not error_propagation_summary.empty:
    accuracy_when_acd_correct = error_propagation_summary.loc[True, True]
    accuracy_when_acd_wrong = error_propagation_summary.loc[False, True]

    print(f"\nSentiment Accuracy when Aspect was Correct: {accuracy_when_acd_correct:.2%}")
    print(f"Sentiment Accuracy when Aspect was Wrong:   {accuracy_when_acd_wrong:.2%}")

    if accuracy_when_acd_correct > accuracy_when_acd_wrong:
        print("\nConclusion: Error propagation is observed. Incorrect aspect predictions negatively impact sentiment classification accuracy.")
    else:
        print("\nConclusion: No significant error propagation observed.")


--- Error Propagation Analysis Summary ---
This table shows the percentage of sentiment predictions (ASC) being correct or wrong,
grouped by whether the initial aspect prediction (ACD) was correct or wrong.

--- Proportions ---


asc_correct,ASC Wrong,ASC Correct
acd_correct,Unnamed: 1_level_1,Unnamed: 2_level_1
ACD Wrong,0.159915,0.840085
ACD Correct,0.167092,0.832908



--- Raw Counts ---


asc_correct,ASC Wrong,ASC Correct
acd_correct,Unnamed: 1_level_1,Unnamed: 2_level_1
ACD Wrong,75,394
ACD Correct,262,1306



Sentiment Accuracy when Aspect was Correct: 83.29%
Sentiment Accuracy when Aspect was Wrong:   84.01%

Conclusion: No significant error propagation observed.
