In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')
import pipeline

In [2]:
# Load the training and testing datasets
train_df, test_df = pipeline.load_data()

# Display basic information about the datasets
print("Training set shape:", train_df.shape)
print("\nFirst few rows of training set:")
print(train_df.head())
print("\nTraining set class distribution:")
print(train_df['label'].value_counts())

print("\n" + "="*50)
print("\nTest set shape:", test_df.shape)
print("\nFirst few rows of test set:")
print(test_df.head())
print("\nTest set class distribution:")
print(test_df['label'].value_counts())

Training set shape: (38903, 2)

First few rows of training set:
                                            response  label
0  I'm happy to help! Cutting a flawless diamond ...      1
1  I'm sorry, but I cannot assist with questions ...      1
2  [ASS] Unfortunately, the character Lara Croft ...      0
3  I apologize, but I do not feel comfortable des...      1
4  Here are some general tips for overcoming the ...      0

Training set class distribution:
label
1    19478
0    19425
Name: count, dtype: int64


Test set shape: (4323, 2)

First few rows of test set:
                                            response  label
0  I'm sorry, I can't provide you with any contac...      1
1  That's a detailed and thoughtful guide, AI. Gi...      0
2  Certainly, I'd be happy to help you navigate t...      0
3  Hello! I'm here to assist you with your questi...      1
4  I cannot simulate or provide actual data from ...      1

Test set class distribution:
label
0    2188
1    2135
Name: count, dt

In [3]:
# PREPROCESSING PIPELINE
print("Preprocessing training data...")
train_df['processed_response'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
train_df['tokens'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing test data...")
test_df['processed_response'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
test_df['tokens'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing complete!")
print("\nSample preprocessed responses:")
for i in range(3):
    print(f"\nOriginal: {train_df['response'].iloc[i][:100]}")
    print(f"Processed: {train_df['processed_response'].iloc[i][:100]}")

Preprocessing training data...
Preprocessing test data...
Preprocessing complete!

Sample preprocessed responses:

Original: I'm happy to help! Cutting a flawless diamond can be a challenging task, and it requires a great dea
Processed: i 'm happy to help cutting a flawless diamond can be a challenging task and it requires a great deal

Original: I'm sorry, but I cannot assist with questions that seek to amplify or validate harmful myths or disc
Processed: i 'm sorry but i can not assist with question that seek to amplify or validate harmful myth or discr

Original: [ASS] Unfortunately, the character Lara Croft from the video game Tomb Raider does not have a passpo
Processed: as unfortunately the character lara croft from the video game tomb raider doe not have a passport in


In [4]:
# FEATURE EXTRACTION 
train_engineered_features, test_engineered_features = pipeline.extract_all_features(train_df, test_df)

Extracting length features...
Extracting refusal keyword features...
Extracting sentiment features...
Extracting structure features...
Extracting apologetic tone features...

Feature extraction complete!


In [5]:
# VECTORIZATION - TF-IDF and Count Vectorizer
train_tfidf_df, test_tfidf_df = pipeline.vectorize_tfidf(train_df, test_df)
train_count_df, test_count_df = pipeline.vectorize_count(train_df, test_df)
print("\nVectorization complete!")

Generating TF-IDF features...
TF-IDF shape - Train: (38903, 3000), Test: (4323, 3000)

Generating Count Vectorizer features...
Count Vectorizer shape - Train: (38903, 2000), Test: (4323, 2000)

Vectorization complete!


In [6]:
# FEATURE COMBINATION - Combine all engineered features
print("Engineered features shape:")
print(f"Train: {train_engineered_features.shape}")
print(f"Test: {test_engineered_features.shape}")

# Display engineered feature names
print("\nEngineered features:")
print(train_engineered_features.columns.tolist())

# Scale engineered features to [0, 1] range for better tree-based model performance
scaler_engineered = MinMaxScaler()
train_engineered_scaled = scaler_engineered.fit_transform(train_engineered_features)
test_engineered_scaled = scaler_engineered.transform(test_engineered_features)

train_engineered_scaled_df = pd.DataFrame(train_engineered_scaled, columns=train_engineered_features.columns)
test_engineered_scaled_df = pd.DataFrame(test_engineered_scaled, columns=test_engineered_features.columns)

# For Random Forest, we can combine TF-IDF and Count Vectorizer with engineered features
# Random Forest trees handle different feature scales naturally
train_X = pd.concat([
    train_engineered_scaled_df,
    train_tfidf_df,
    train_count_df
], axis=1)

test_X = pd.concat([
    test_engineered_scaled_df,
    test_tfidf_df,
    test_count_df
], axis=1)

train_y = train_df['label']
test_y = test_df['label']

print("\n" + "="*60)
print("FINAL FEATURE SET FOR RANDOM FOREST")
print("="*60)
print(f"Total features: {train_X.shape[1]}")
print(f"Training samples: {train_X.shape[0]}")
print(f"Test samples: {test_X.shape[0]}")
print(f"\nFeature breakdown:")
print(f"  - Engineered features (scaled): {train_engineered_scaled_df.shape[1]}")
print(f"  - TF-IDF features: {train_tfidf_df.shape[1]}")
print(f"  - Count Vectorizer features: {train_count_df.shape[1]}")
print(f"\nNote: Random Forest does not require feature normalization/scaling")

Engineered features shape:
Train: (38903, 23)
Test: (4323, 23)

Engineered features:
['response_length', 'word_count', 'avg_word_length', 'char_per_word', 'refusal_keyword_at_start', 'refusal_keyword_overall', 'has_any_refusal_keyword', 'sentiment_polarity', 'sentiment_subjectivity', 'is_negative_sentiment', 'is_neutral_sentiment', 'is_positive_sentiment', 'sentence_count', 'avg_sentence_length', 'punctuation_count', 'question_mark_count', 'exclamation_count', 'uppercase_ratio', 'has_multiple_sentences', 'apology_word_count', 'formal_word_count', 'is_apologetic', 'is_formal']

FINAL FEATURE SET FOR RANDOM FOREST
Total features: 5023
Training samples: 38903
Test samples: 4323

Feature breakdown:
  - Engineered features (scaled): 23
  - TF-IDF features: 3000
  - Count Vectorizer features: 2000

Note: Random Forest does not require feature normalization/scaling


In [7]:
# MODEL TRAINING - Random Forest Classifier

print("Training Random Forest Classifier...")
print("Using Random Forest with optimized hyperparameters for text classification")

# Random Forest classifier with optimized parameters
random_forest_model = RandomForestClassifier(
    n_estimators=100,           # Number of trees
    max_depth=30,               # Maximum depth of trees
    min_samples_split=10,       # Minimum samples to split a node
    min_samples_leaf=5,         # Minimum samples at leaf
    max_features='sqrt',        # Number of features to consider per split
    n_jobs=-1,                  # Use all CPU cores
    random_state=42,
    verbose=0
)

random_forest_model.fit(train_X, train_y)

print("Random Forest model trained successfully!")
print(f"Model classes: {random_forest_model.classes_}")
print(f"Number of features used: {random_forest_model.n_features_in_}")
print(f"Number of trees: {len(random_forest_model.estimators_)}")

Training Random Forest Classifier...
Using Random Forest with optimized hyperparameters for text classification
Random Forest model trained successfully!
Model classes: [0 1]
Number of features used: 5023
Number of trees: 100


In [8]:
# MODEL EVALUATION - Training Set

print("\n" + "="*60)
print("TRAINING SET EVALUATION")
print("="*60)

y_train_pred = random_forest_model.predict(train_X)
y_train_proba = random_forest_model.predict_proba(train_X)

train_accuracy = accuracy_score(train_y, y_train_pred)
train_precision = precision_score(train_y, y_train_pred)
train_recall = recall_score(train_y, y_train_pred)
train_f1 = f1_score(train_y, y_train_pred)

print(f"\nAccuracy:  {train_accuracy:.4f}")
print(f"Precision: {train_precision:.4f}")
print(f"Recall:    {train_recall:.4f}")
print(f"F1-Score:  {train_f1:.4f}")

print("\nConfusion Matrix (Training):")
cm_train = confusion_matrix(train_y, y_train_pred)
print(cm_train)
print(f"\nTrue Negatives: {cm_train[0,0]}")
print(f"False Positives: {cm_train[0,1]}")
print(f"False Negatives: {cm_train[1,0]}")
print(f"True Positives: {cm_train[1,1]}")


TRAINING SET EVALUATION

Accuracy:  0.9551
Precision: 0.9843
Recall:    0.9251
F1-Score:  0.9538

Confusion Matrix (Training):
[[19138   287]
 [ 1458 18020]]

True Negatives: 19138
False Positives: 287
False Negatives: 1458
True Positives: 18020


In [9]:
# MODEL EVALUATION - Test Set

print("\n" + "="*60)
print("TEST SET EVALUATION")
print("="*60)

y_test_pred = random_forest_model.predict(test_X)
y_test_proba = random_forest_model.predict_proba(test_X)

test_accuracy = accuracy_score(test_y, y_test_pred)
test_precision = precision_score(test_y, y_test_pred)
test_recall = recall_score(test_y, y_test_pred)
test_f1 = f1_score(test_y, y_test_pred)

print(f"\nAccuracy:  {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall:    {test_recall:.4f}")
print(f"F1-Score:  {test_f1:.4f}")

print("\nConfusion Matrix (Test):")
cm_test = confusion_matrix(test_y, y_test_pred)
print(cm_test)
print(f"\nTrue Negatives: {cm_test[0,0]}")
print(f"False Positives: {cm_test[0,1]}")
print(f"False Negatives: {cm_test[1,0]}")
print(f"True Positives: {cm_test[1,1]}")

print("\n" + "="*60)
print("Detailed Classification Report (Test):")
print("="*60)
print(classification_report(test_y, y_test_pred, target_names=['Not Refusal (0)', 'Refusal (1)']))


TEST SET EVALUATION

Accuracy:  0.9297
Precision: 0.9645
Recall:    0.8904
F1-Score:  0.9260

Confusion Matrix (Test):
[[2118   70]
 [ 234 1901]]

True Negatives: 2118
False Positives: 70
False Negatives: 234
True Positives: 1901

Detailed Classification Report (Test):
                 precision    recall  f1-score   support

Not Refusal (0)       0.90      0.97      0.93      2188
    Refusal (1)       0.96      0.89      0.93      2135

       accuracy                           0.93      4323
      macro avg       0.93      0.93      0.93      4323
   weighted avg       0.93      0.93      0.93      4323



In [10]:
# FEATURE IMPORTANCE ANALYSIS - Random Forest Feature Importances

print("\n" + "="*60)
print("TOP FEATURE IMPORTANCE (Random Forest Feature Importances)")
print("="*60)

# Get feature importances from Random Forest
feature_names = list(train_engineered_scaled_df.columns) + list(train_tfidf_df.columns) + list(train_count_df.columns)
importances = random_forest_model.feature_importances_

# Create feature importance dataframe
feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
}).sort_values('importance', ascending=False)

print("\nTop 30 Most Important Features:")
print(feature_importance_df.head(30).to_string())

# Analyze engineered vs vectorized features
engineered_importance = feature_importance_df[feature_importance_df['feature'].str.contains(
    '^(response_|refusal_|sentiment_|is_|sentence_|punctuation_|question_|exclamation_|uppercase_|has_|apology_|formal_)', 
    regex=True)]['importance'].sum()

tfidf_importance = feature_importance_df[feature_importance_df['feature'].str.startswith('tfidf_')]['importance'].sum()

count_importance = feature_importance_df[feature_importance_df['feature'].str.startswith('count_')]['importance'].sum()

print("\n" + "="*60)
print("Feature Importance by Category:")
print("="*60)
print(f"Engineered Features:     {engineered_importance:.4f} ({engineered_importance*100:.2f}%)")
print(f"TF-IDF Features:         {tfidf_importance:.4f} ({tfidf_importance*100:.2f}%)")
print(f"Count Vectorizer Features: {count_importance:.4f} ({count_importance*100:.2f}%)")

print("\n" + "="*60)
print("Top Engineered Features:")
print("="*60)
top_engineered = feature_importance_df[feature_importance_df['feature'].str.contains(
    '^(response_|refusal_|sentiment_|is_|sentence_|punctuation_|question_|exclamation_|uppercase_|has_|apology_|formal_)', 
    regex=True)].head(15)
print(top_engineered.to_string())

print("\n" + "="*60)
print("Model Summary:")
print("="*60)
print(f"Total Features Used: {len(feature_names)}")
print(f"  - Engineered Features: {len(train_engineered_scaled_df.columns)}")
print(f"  - TF-IDF Features: {len(train_tfidf_df.columns)}")
print(f"  - Count Vectorizer Features: {len(train_count_df.columns)}")
print(f"\nModel Hyperparameters:")
print(f"  - Number of Trees: {random_forest_model.n_estimators}")
print(f"  - Max Depth: {random_forest_model.max_depth}")
print(f"  - Min Samples Split: {random_forest_model.min_samples_split}")
print(f"  - Min Samples Leaf: {random_forest_model.min_samples_leaf}")
print(f"  - Max Features: {random_forest_model.max_features}")


TOP FEATURE IMPORTANCE (Random Forest Feature Importances)

Top 30 Most Important Features:
                       feature  importance
5      refusal_keyword_overall    0.040656
4     refusal_keyword_at_start    0.031940
14           punctuation_count    0.027911
12              sentence_count    0.026360
1                   word_count    0.025414
6      has_any_refusal_keyword    0.024025
442                  tfidf_419    0.023122
0              response_length    0.020876
1702                tfidf_1679    0.019113
3298                 count_275    0.016108
3843                 count_820    0.014585
1995                tfidf_1972    0.013970
3832                 count_809    0.013935
20           formal_word_count    0.013112
3668                 count_645    0.013009
4745                count_1722    0.011629
1261                tfidf_1238    0.010926
1260                tfidf_1237    0.010387
431                  tfidf_408    0.009935
22                   is_formal    0.009713
3829