In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')
import pipeline

In [13]:
# Load the training and testing datasets
train_df, test_df = pipeline.load_data()

# Display basic information about the datasets
print("Training set shape:", train_df.shape)
print("\nFirst few rows of training set:")
print(train_df.head())
print("\nTraining set class distribution:")
print(train_df['label'].value_counts())

print("\n" + "="*50)
print("\nTest set shape:", test_df.shape)
print("\nFirst few rows of test set:")
print(test_df.head())
print("\nTest set class distribution:")
print(test_df['label'].value_counts())

Training set shape: (38903, 2)

First few rows of training set:
                                            response  label
0  I'm happy to help! Cutting a flawless diamond ...      1
1  I'm sorry, but I cannot assist with questions ...      1
2  [ASS] Unfortunately, the character Lara Croft ...      0
3  I apologize, but I do not feel comfortable des...      1
4  Here are some general tips for overcoming the ...      0

Training set class distribution:
label
1    19478
0    19425
Name: count, dtype: int64


Test set shape: (4323, 2)

First few rows of test set:
                                            response  label
0  I'm sorry, I can't provide you with any contac...      1
1  That's a detailed and thoughtful guide, AI. Gi...      0
2  Certainly, I'd be happy to help you navigate t...      0
3  Hello! I'm here to assist you with your questi...      1
4  I cannot simulate or provide actual data from ...      1

Test set class distribution:
label
0    2188
1    2135
Name: count, dt

In [14]:
# PREPROCESSING PIPELINE
print("Preprocessing training data...")
train_df['processed_response'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
train_df['tokens'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing test data...")
test_df['processed_response'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
test_df['tokens'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing complete!")
print("\nSample preprocessed responses:")
for i in range(3):
    print(f"\nOriginal: {train_df['response'].iloc[i][:100]}")
    print(f"Processed: {train_df['processed_response'].iloc[i][:100]}")

Preprocessing training data...
Preprocessing test data...
Preprocessing complete!

Sample preprocessed responses:

Original: I'm happy to help! Cutting a flawless diamond can be a challenging task, and it requires a great dea
Processed: i am happy to help cutting a flawless diamond can be a challenging task and it requires a great deal

Original: I'm sorry, but I cannot assist with questions that seek to amplify or validate harmful myths or disc
Processed: i am sorry but i can not assist with question that seek to amplify or validate harmful myth or discr

Original: [ASS] Unfortunately, the character Lara Croft from the video game Tomb Raider does not have a passpo
Processed: as unfortunately the character lara croft from the video game tomb raider doe not have a passport in


In [15]:
# FEATURE EXTRACTION 
train_engineered_features, test_engineered_features = pipeline.extract_all_features(train_df, test_df)

Extracting length features...
Extracting refusal keyword features...
Extracting sentiment features...
Extracting structure features...
Extracting apologetic tone features...
Extracting first-person pronoun features...
Extracting hedging language features...
Extracting opening pattern features...
Extracting negation features...

Feature extraction complete!


In [16]:
# VECTORIZATION - TF-IDF and Count Vectorizer
train_tfidf_df, test_tfidf_df = pipeline.vectorize_tfidf(train_df, test_df)
train_count_df, test_count_df = pipeline.vectorize_count(train_df, test_df)
print("\nVectorization complete!")

Generating TF-IDF features...
TF-IDF shape - Train: (38903, 3000), Test: (4323, 3000)

Generating Count Vectorizer features...
Count Vectorizer shape - Train: (38903, 2000), Test: (4323, 2000)

Vectorization complete!


In [17]:
# FEATURE COMBINATION - Combine all engineered features
print("Engineered features shape:")
print(f"Train: {train_engineered_features.shape}")
print(f"Test: {test_engineered_features.shape}")

# Scale engineered features to [0, 1] range for better XGBoost performance
scaler_engineered = MinMaxScaler()
train_engineered_scaled = scaler_engineered.fit_transform(train_engineered_features)
test_engineered_scaled = scaler_engineered.transform(test_engineered_features)

train_engineered_scaled_df = pd.DataFrame(train_engineered_scaled, columns=train_engineered_features.columns)
test_engineered_scaled_df = pd.DataFrame(test_engineered_scaled, columns=test_engineered_features.columns)

# Combine engineered features with vectorized features
train_X = pd.concat([
    train_engineered_scaled_df,
    train_tfidf_df,
    train_count_df
], axis=1)

test_X = pd.concat([
    test_engineered_scaled_df,
    test_tfidf_df,
    test_count_df
], axis=1)

train_y = train_df['label']
test_y = test_df['label']

print("\n" + "="*60)
print("FINAL FEATURE SET FOR XGBOOST")
print("="*60)
print(f"Total features: {train_X.shape[1]}")
print(f"Training samples: {train_X.shape[0]}")
print(f"Test samples: {test_X.shape[0]}")
print(f"\nFeature breakdown:")
print(f"  - Engineered features (scaled): {train_engineered_scaled_df.shape[1]}")
print(f"  - TF-IDF features: {train_tfidf_df.shape[1]}")
print(f"  - Count Vectorizer features: {train_count_df.shape[1]}")

Engineered features shape:
Train: (38903, 30)
Test: (4323, 30)

FINAL FEATURE SET FOR XGBOOST
Total features: 5030
Training samples: 38903
Test samples: 4323

Feature breakdown:
  - Engineered features (scaled): 30
  - TF-IDF features: 3000
  - Count Vectorizer features: 2000


In [20]:
# MODEL TRAINING - XGBoost with Manual Grid Search (Memory Efficient)

from sklearn.model_selection import StratifiedKFold
from itertools import product
import gc

print("Training XGBoost model with Manual Grid Search (Memory Efficient)...")
print("="*60)

# Convert to numpy arrays (more memory efficient than DataFrames)
train_X_np = train_X.values.astype(np.float32)
train_y_np = train_y.values
test_X_np = test_X.values.astype(np.float32)

# Clear original DataFrames from memory
del train_X, test_X
gc.collect()

# Parameter grid
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1],
    'max_depth': [4, 6],
    'min_child_weight': [1, 3],
}

# Generate all parameter combinations
param_names = list(param_grid.keys())
param_values = list(param_grid.values())
all_params = list(product(*param_values))

print(f"Total combinations to evaluate: {len(all_params)}")
print(f"With 3-fold CV: {len(all_params) * 3} fits\n")

# Store results
results = []
best_score = -1
best_params = None

# Manual cross-validation
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

for idx, params in enumerate(all_params):
    param_dict = dict(zip(param_names, params))
    
    # Fixed parameters
    param_dict['subsample'] = 0.8
    param_dict['colsample_bytree'] = 0.8
    param_dict['objective'] = 'binary:logistic'
    param_dict['random_state'] = 42
    param_dict['n_jobs'] = -1
    param_dict['verbosity'] = 0
    
    fold_scores_test = []
    fold_scores_train = []
    
    print(f"\nEvaluating combination {idx + 1}/{len(all_params)}: {param_dict}")
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(train_X_np, train_y_np)):
        # Split data (using views when possible)
        X_tr, X_val = train_X_np[train_idx], train_X_np[val_idx]
        y_tr, y_val = train_y_np[train_idx], train_y_np[val_idx]
        
        # Train model
        model = xgb.XGBClassifier(**param_dict)
        model.fit(X_tr, y_tr)
        
        # Score
        y_pred_train = model.predict(X_tr)
        y_pred_val = model.predict(X_val)
        
        train_f1 = f1_score(y_tr, y_pred_train)
        val_f1 = f1_score(y_val, y_pred_val)
        
        fold_scores_train.append(train_f1)
        fold_scores_test.append(val_f1)
        
        # Clean up
        del model, X_tr, X_val, y_tr, y_val
        gc.collect()
    
    mean_test_f1 = np.mean(fold_scores_test)
    std_test_f1 = np.std(fold_scores_test)
    mean_train_f1 = np.mean(fold_scores_train)
    
    results.append({
        'params': param_dict.copy(),
        'mean_test_f1': mean_test_f1,
        'std_test_f1': std_test_f1,
        'mean_train_f1': mean_train_f1
    })
    
    print(f"  Mean Test F1: {mean_test_f1:.4f} (+/- {std_test_f1:.4f}), Train F1: {mean_train_f1:.4f}")
    
    if mean_test_f1 > best_score:
        best_score = mean_test_f1
        best_params = param_dict.copy()

# Sort results by test F1 score
results_sorted = sorted(results, key=lambda x: x['mean_test_f1'], reverse=True)

# Print all results
print("\n" + "="*60)
print("GRID SEARCH RESULTS (Sorted by Test F1 Score)")
print("="*60)

for rank, res in enumerate(results_sorted, 1):
    print(f"\nRank {rank}:")
    print(f"  Parameters:")
    print(f"    - n_estimators: {res['params']['n_estimators']}")
    print(f"    - learning_rate: {res['params']['learning_rate']}")
    print(f"    - max_depth: {res['params']['max_depth']}")
    print(f"    - min_child_weight: {res['params']['min_child_weight']}")
    print(f"    - subsample: {res['params']['subsample']}")
    print(f"    - colsample_bytree: {res['params']['colsample_bytree']}")
    print(f"  Mean Test F1: {res['mean_test_f1']:.4f} (+/- {res['std_test_f1']:.4f})")
    print(f"  Mean Train F1: {res['mean_train_f1']:.4f}")

print("\n" + "="*60)
print("BEST PARAMETERS")
print("="*60)
print(f"\nBest F1 Score (CV): {best_score:.4f}")
print(f"\nBest Parameters:")
for param, value in best_params.items():
    if param not in ['objective', 'random_state', 'n_jobs', 'verbosity']:
        print(f"  - {param}: {value}")

# Train final model with best parameters on full training data
print("\n" + "="*60)
print("Training final model with best parameters...")
xgb_model = xgb.XGBClassifier(**best_params)
xgb_model.fit(train_X_np, train_y_np)

# Store numpy arrays for later use
train_X = train_X_np
test_X = test_X_np

print("\nXGBoost model trained successfully with best parameters!")
print(f"Model classes: {xgb_model.classes_}")
print(f"Number of features used: {xgb_model.n_features_in_}")

Training XGBoost model with Manual Grid Search (Memory Efficient)...
Total combinations to evaluate: 16
With 3-fold CV: 48 fits


Evaluating combination 1/16: {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 4, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'binary:logistic', 'random_state': 42, 'n_jobs': -1, 'verbosity': 0}
  Mean Test F1: 0.9231 (+/- 0.0008), Train F1: 0.9308

Evaluating combination 2/16: {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 4, 'min_child_weight': 3, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'binary:logistic', 'random_state': 42, 'n_jobs': -1, 'verbosity': 0}
  Mean Test F1: 0.9237 (+/- 0.0008), Train F1: 0.9306

Evaluating combination 3/16: {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'binary:logistic', 'random_state': 42, 'n_jobs': -1, 'verbosity': 0}
  Mean Test F1: 0.9295 (+/- 0.0009), Train F1:

In [21]:
# MODEL EVALUATION - Training Set

print("\n" + "="*60)
print("TRAINING SET EVALUATION")
print("="*60)

y_train_pred = xgb_model.predict(train_X)
y_train_proba = xgb_model.predict_proba(train_X)

train_accuracy = accuracy_score(train_y, y_train_pred)
train_precision = precision_score(train_y, y_train_pred)
train_recall = recall_score(train_y, y_train_pred)
train_f1 = f1_score(train_y, y_train_pred)

print(f"\nAccuracy:  {train_accuracy:.4f}")
print(f"Precision: {train_precision:.4f}")
print(f"Recall:    {train_recall:.4f}")
print(f"F1-Score:  {train_f1:.4f}")

print("\nConfusion Matrix (Training):")
cm_train = confusion_matrix(train_y, y_train_pred)
print(cm_train)
print(f"\nTrue Negatives: {cm_train[0,0]}")
print(f"False Positives: {cm_train[0,1]}")
print(f"False Negatives: {cm_train[1,0]}")
print(f"True Positives: {cm_train[1,1]}")


TRAINING SET EVALUATION

Accuracy:  0.9746
Precision: 0.9886
Recall:    0.9603
F1-Score:  0.9742

Confusion Matrix (Training):
[[19210   215]
 [  774 18704]]

True Negatives: 19210
False Positives: 215
False Negatives: 774
True Positives: 18704


In [22]:
# MODEL EVALUATION - Test Set

print("\n" + "="*60)
print("TEST SET EVALUATION")
print("="*60)

y_test_pred = xgb_model.predict(test_X)
y_test_proba = xgb_model.predict_proba(test_X)

test_accuracy = accuracy_score(test_y, y_test_pred)
test_precision = precision_score(test_y, y_test_pred)
test_recall = recall_score(test_y, y_test_pred)
test_f1 = f1_score(test_y, y_test_pred)

print(f"\nAccuracy:  {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall:    {test_recall:.4f}")
print(f"F1-Score:  {test_f1:.4f}")

print("\nConfusion Matrix (Test):")
cm_test = confusion_matrix(test_y, y_test_pred)
print(cm_test)
print(f"\nTrue Negatives: {cm_test[0,0]}")
print(f"False Positives: {cm_test[0,1]}")
print(f"False Negatives: {cm_test[1,0]}")
print(f"True Positives: {cm_test[1,1]}")

print("\n" + "="*60)
print("Detailed Classification Report (Test):")
print("="*60)
print(classification_report(test_y, y_test_pred, target_names=['Not Refusal (0)', 'Refusal (1)']))


TEST SET EVALUATION

Accuracy:  0.9447
Precision: 0.9624
Recall:    0.9241
F1-Score:  0.9429

Confusion Matrix (Test):
[[2111   77]
 [ 162 1973]]

True Negatives: 2111
False Positives: 77
False Negatives: 162
True Positives: 1973

Detailed Classification Report (Test):
                 precision    recall  f1-score   support

Not Refusal (0)       0.93      0.96      0.95      2188
    Refusal (1)       0.96      0.92      0.94      2135

       accuracy                           0.94      4323
      macro avg       0.95      0.94      0.94      4323
   weighted avg       0.95      0.94      0.94      4323



In [None]:
# FEATURE IMPORTANCE ANALYSIS - XGBoost

print("\n" + "="*60)
print("TOP FEATURE IMPORTANCE (XGBoost Gain/Importance)")
print("="*60)

# Get feature importances from XGBoost
feature_names = list(train_engineered_scaled_df.columns) + list(train_tfidf_df.columns) + list(train_count_df.columns)
importances = xgb_model.feature_importances_

# Create feature importance dataframe
feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances,
    'abs_importance': np.abs(importances)
}).sort_values('abs_importance', ascending=False)

print("\nTop 20 Most Important Features (by gain):")
print(feature_importance_df.head(20).to_string())

# Calculate importance by feature type
engineered_importance = feature_importance_df[feature_importance_df['feature'].isin(train_engineered_scaled_df.columns)]['importance'].sum()
tfidf_importance = feature_importance_df[feature_importance_df['feature'].str.startswith('tfidf_')]['importance'].sum()
count_importance = feature_importance_df[feature_importance_df['feature'].str.startswith('count_')]['importance'].sum()

print("\n\nFeature Importance by Type:")
print(f"  - Engineered Features: {engineered_importance:.4f} ({engineered_importance*100:.2f}%)")
print(f"  - TF-IDF Features: {tfidf_importance:.4f} ({tfidf_importance*100:.2f}%)")
print(f"  - Count Vectorizer Features: {count_importance:.4f} ({count_importance*100:.2f}%)")

print("\n\nTop 10 Engineered Features:")
top_engineered = feature_importance_df[feature_importance_df['feature'].isin(train_engineered_scaled_df.columns)].head(10)
if len(top_engineered) > 0:
    print(top_engineered[['feature', 'importance']].to_string())

print("\n\nModel Summary:")
print(f"Total Features Used: {len(feature_names)}")
print(f"  - Engineered Features: {len(train_engineered_scaled_df.columns)}")
print(f"  - TF-IDF Features: {len(train_tfidf_df.columns)}")
print(f"  - Count Vectorizer Features: {len(train_count_df.columns)}")
print(f"\nModel Hyperparameters:")
print(f"  - Number of Trees: {xgb_model.n_estimators}")
print(f"  - Learning Rate: {xgb_model.learning_rate}")
print(f"  - Max Depth: {xgb_model.max_depth}")
print(f"  - Subsample Ratio: {xgb_model.subsample}")
print(f"  - Column Subsample: {xgb_model.colsample_bytree}")


TOP FEATURE IMPORTANCE (XGBoost Gain/Importance)

Top 20 Most Important Features (by gain):
                          feature  importance  abs_importance
4        refusal_keyword_at_start    0.105344        0.105344
4293                   count_1263    0.074194        0.074194
1917                   tfidf_1887    0.040396        0.040396
5         refusal_keyword_overall    0.011478        0.011478
14              punctuation_count    0.010863        0.010863
1405                   tfidf_1375    0.006698        0.006698
1829                   tfidf_1799    0.006223        0.006223
24             first_person_ratio    0.005816        0.005816
12                 sentence_count    0.004607        0.004607
1272                   tfidf_1242    0.004583        0.004583
3932                    count_902    0.004334        0.004334
156                     tfidf_126    0.004246        0.004246
3841                    count_811    0.004244        0.004244
4437                   count_1407    0.