In [3]:
import numpy as np
import re
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from gensim.models import Word2Vec, FastText, KeyedVectors
# ---------------------------
# Step 1. Load Dataset
# ---------------------------
dataset_path = r"Processed_Causality_Dataset.csv"

print("=== Loading Dataset ===")
df = pd.read_csv(dataset_path)

# Extract raw features + labels
X_raw = df["Sentence"].astype(str)
y_raw = df["Causality_Label"]

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y_raw)

# Train/test split (just for check; CV will use full training set)
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X_raw, y, stratify=y, test_size=0.2, random_state=42
)

print(f"Dataset File: {dataset_path}")
print(f"Total Samples: {df.shape[0]}, Columns: {df.shape[1]}")
print(f"Train Split: {len(X_train_raw)} | Test Split: {len(X_test_raw)}")
print(f"Label Classes: {list(label_encoder.classes_)}\n")


# ---------------------------
# 3. Preprocessing
# ---------------------------
_stop = set(stopwords.words("english"))
_lem  = WordNetLemmatizer()

def preprocess_text(text: str) -> str:
    text = text.lower()
    text = re.sub(r"[^\w\s]", " ", text)
    tokens = word_tokenize(text)
    tokens = [_lem.lemmatize(w) for w in tokens if w not in _stop and w.strip()]
    return " ".join(tokens)

X_proc = X_text_raw.apply(preprocess_text).tolist()


# ---------------------------
# 4. Tokenization
# ---------------------------
MAX_NUM_WORDS = 30000
MAX_SEQ_LEN   = 200
EMBEDDING_DIM = 100

tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X_proc)
X_seq = tokenizer.texts_to_sequences(X_proc)
X_pad = pad_sequences(X_seq, maxlen=MAX_SEQ_LEN, padding='post', truncating='post')
VOCAB_SIZE = min(MAX_NUM_WORDS, len(tokenizer.word_index) + 1)


# ---------------------------
# 5. Deep feature extractors
# ---------------------------
FEATURE_DIM = 128
LR = 1e-3

def compile_model(inp, feat):
    out = Dense(NUM_CLASSES, activation='softmax')(feat)
    model = Model(inputs=inp, outputs=out)
    model.compile(loss='categorical_crossentropy', optimizer=Adam(LR), metrics=['accuracy'])
    feat_extractor = Model(inputs=inp, outputs=feat)
    return model, feat_extractor

def build_cnn(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = Conv1D(128, 5, activation='relu', padding='same')(x)
    x = GlobalMaxPooling1D()(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

def build_lstm(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = LSTM(128)(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

def build_bilstm(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = Bidirectional(LSTM(128))(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

def build_gru(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = GRU(128)(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

def build_cnn_gru(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = Conv1D(128, 5, activation='relu', padding='same')(x)
    x = GRU(128)(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

def build_cnn_lstm(max_len, vocab_size, emb_dim, feature_dim=FEATURE_DIM):
    inp = Input(shape=(max_len,))
    x = Embedding(vocab_size, emb_dim)(inp)
    x = Conv1D(128, 5, activation='relu', padding='same')(x)
    x = LSTM(128)(x)
    feat = Dense(feature_dim, activation='relu', name='feat')(x)
    return compile_model(inp, feat)

BACKBONES = {
    "CNN": build_cnn,
    "LSTM": build_lstm,
    "BiLSTM": build_bilstm,
    "GRU": build_gru,
    "CNN-GRU": build_cnn_gru,
    "CNN-LSTM": build_cnn_lstm
}


# ---------------------------
# 6. Classical classifiers (all with predict_proba for SOFT voting)
# ---------------------------
def build_classifiers(random_state=RANDOM_SEED):
    clfs = {
        "SVM-RBF": SVC(kernel="rbf", probability=True, random_state=random_state)
    }
    return clfs

# Parameter grid for hyperparameter tuning
param_grids = {
    "SVM-RBF": {
        "C": [0.1, 1, 10],
        "gamma": ["scale", "auto", 0.001, 0.0001]
    }
}
# ---------------------------
# 7. 10-Fold CV + Soft Voting
# ---------------------------
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=RANDOM_SEED)
fold_results = []

# Logging / output
os.makedirs("outputs", exist_ok=True)
per_fold_csv = "outputs/soft_voting_10fold_results.csv"

for fold_idx, (tr_idx, val_idx) in enumerate(skf.split(X_pad, y), start=1):
    print(f"\n========== Fold {fold_idx}/10 ==========")
    X_tr, X_val = X_pad[tr_idx], X_pad[val_idx]
    y_tr, y_val = y[tr_idx], y[val_idx]
    y_tr_cat, y_val_cat = to_categorical(y_tr, num_classes=NUM_CLASSES), to_categorical(y_val, num_classes=NUM_CLASSES)

    # --- Deep feature extraction per backbone ---
    feats_tr_list, feats_val_list = [], []
    for name, builder in BACKBONES.items():
        print(f"Training backbone: {name}")
        model, feat_extractor = builder(MAX_SEQ_LEN, VOCAB_SIZE, EMBEDDING_DIM)
        es = EarlyStopping(monitor="val_loss", patience=2, restore_best_weights=True, verbose=0)
        model.fit(
            X_tr, y_tr_cat,
            validation_split=0.1,
            epochs=8,
            batch_size=64,
            callbacks=[es],
            verbose=0
        )
        feat_tr = feat_extractor.predict(X_tr, batch_size=128, verbose=0)
        feat_val = feat_extractor.predict(X_val, batch_size=128, verbose=0)
        feats_tr_list.append(feat_tr)
        feats_val_list.append(feat_val)

        # Free memory between models
        del model, feat_extractor
        K.clear_session()

    # Concatenate features from all backbones
    X_tr_feat = np.concatenate(feats_tr_list, axis=1)
    X_val_feat = np.concatenate(feats_val_list, axis=1)

    # Scale features (helps SVM/LR a lot)
    scaler = StandardScaler(with_mean=True, with_std=True)
    X_tr_feat = scaler.fit_transform(X_tr_feat)
    X_val_feat = scaler.transform(X_val_feat)

    # --- Classical classifiers + soft voting ---
    clfs = build_classifiers()
    voting_clf = VotingClassifier(estimators=clfs, voting='soft', n_jobs=-1, flatten_transform=True)
    voting_clf.fit(X_tr_feat, y_tr)
    y_val_pred = voting_clf.predict(X_val_feat)

    # --- Evaluate fold ---
    acc = accuracy_score(y_val, y_val_pred)
    precision = precision_score(y_val, y_val_pred, average="weighted", zero_division=0)
    recall = recall_score(y_val, y_val_pred, average="weighted", zero_division=0)
    f1 = f1_score(y_val, y_val_pred, average="weighted", zero_division=0)

    print(f"Fold {fold_idx} - Acc: {acc:.4f} | Prec: {precision:.4f} | Rec: {recall:.4f} | F1: {f1:.4f}")

    fold_results.append({
        "Fold": fold_idx,
        "Accuracy": acc,
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1
    })

    # Save incremental results
    pd.DataFrame(fold_results).to_csv(per_fold_csv, index=False)

# ---------------------------
# 8. Summary
# ---------------------------
        print(f"--- Fold {fold} Final ---")
        print(f"Accuracy: {acc_curve[end_epoch-1]:.2f}% | Precision: {prec_curve[end_epoch-1]:.2f}% "
              f"| Recall: {rec_curve[end_epoch-1]:.2f}% | F1: {f1_curve[end_epoch-1]:.2f}%")

    # Final CV Results
    print(f"\n>>> {model_name} Final CV Results ({FOLDS} folds)")
    print(f"Accuracy: {acc_final:.2f}")
    print(f"Precision: {prec_final:.2f}")
    print(f"Recall: {rec_final:.2f}")
    print(f"F1: {f1_final:.2f}")
    print("="*70)



=== Loading Dataset ===
Dataset File: Processed_Causality_Dataset.csv
Total Samples: 376, Columns: 2
Train Split: 300 | Test Split: 76
Label Classes: [0, 1]



Epoch   1/100 - acc: 68.00% - prec: 70.22% - rec: 70.06% - f1: 70.19%
Epoch   2/100 - acc: 68.00% - prec: 70.09% - rec: 70.14% - f1: 70.25%
Epoch   3/100 - acc: 68.46% - prec: 70.47% - rec: 70.48% - f1: 70.69%
Epoch   4/100 - acc: 68.05% - prec: 70.68% - rec: 71.07% - f1: 70.87%
Epoch   5/100 - acc: 68.00% - prec: 71.02% - rec: 70.88% - f1: 71.12%
Epoch   6/100 - acc: 68.25% - prec: 70.99% - rec: 71.06% - f1: 71.25%
Epoch   7/100 - acc: 68.38% - prec: 71.34% - rec: 71.59% - f1: 71.59%
Epoch   8/100 - acc: 68.06% - prec: 71.70% - rec: 71.72% - f1: 71.90%
Epoch   9/100 - acc: 68.00% - prec: 71.93% - rec: 71.93% - f1: 72.07%
Epoch  10/100 - acc: 68.00% - prec: 72.10% - rec: 72.51% - f1: 72.13%
--- Fold 1 Final ---
Accuracy: 68.00% | Precision: 72.10% | Recall: 72.51% | F1: 72.13%

Epoch  11/100 - acc: 68.00% - prec: 72.40% - rec: 7