In [4]:
import pipeline
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [5]:
# Load the training and testing datasets
train_df, test_df = pipeline.load_data()

# Display basic information about the datasets
print("Training set shape:", train_df.shape)
print("\nFirst few rows of training set:")
print(train_df.head())
print("\nTraining set class distribution:")
print(train_df['label'].value_counts())

print("\n" + "="*50)
print("\nTest set shape:", test_df.shape)
print("\nFirst few rows of test set:")
print(test_df.head())
print("\nTest set class distribution:")
print(test_df['label'].value_counts())

Training set shape: (38903, 2)

First few rows of training set:
                                            response  label
0  I'm happy to help! Cutting a flawless diamond ...      1
1  I'm sorry, but I cannot assist with questions ...      1
2  [ASS] Unfortunately, the character Lara Croft ...      0
3  I apologize, but I do not feel comfortable des...      1
4  Here are some general tips for overcoming the ...      0

Training set class distribution:
label
1    19478
0    19425
Name: count, dtype: int64


Test set shape: (4323, 2)

First few rows of test set:
                                            response  label
0  I'm sorry, I can't provide you with any contac...      1
1  That's a detailed and thoughtful guide, AI. Gi...      0
2  Certainly, I'd be happy to help you navigate t...      0
3  Hello! I'm here to assist you with your questi...      1
4  I cannot simulate or provide actual data from ...      1

Test set class distribution:
label
0    2188
1    2135
Name: count, dt

In [6]:
print("Preprocessing training data...")
train_df['processed_response'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
train_df['tokens'] = train_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing test data...")
test_df['processed_response'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[0])
test_df['tokens'] = test_df['response'].apply(lambda x: pipeline.preprocess_text(x)[1])

print("Preprocessing complete!")
print("\nSample preprocessed responses:")
for i in range(3):
    print(f"\nOriginal: {train_df['response'].iloc[i][:100]}")
    print(f"Processed: {train_df['processed_response'].iloc[i][:100]}")

Preprocessing training data...
Preprocessing test data...
Preprocessing complete!

Sample preprocessed responses:

Original: I'm happy to help! Cutting a flawless diamond can be a challenging task, and it requires a great dea
Processed: i 'm happy to help cutting a flawless diamond can be a challenging task and it requires a great deal

Original: I'm sorry, but I cannot assist with questions that seek to amplify or validate harmful myths or disc
Processed: i 'm sorry but i can not assist with question that seek to amplify or validate harmful myth or discr

Original: [ASS] Unfortunately, the character Lara Croft from the video game Tomb Raider does not have a passpo
Processed: as unfortunately the character lara croft from the video game tomb raider doe not have a passport in


In [7]:
# FEATURE EXTRACTION 
train_engineered_features, test_engineered_features = pipeline.extract_all_features(train_df, test_df)

Extracting length features...
Extracting refusal keyword features...
Extracting sentiment features...
Extracting structure features...
Extracting apologetic tone features...

Feature extraction complete!


In [8]:
# VECTORIZATION - TF-IDF

train_tfidf_df, test_tfidf_df = pipeline.vectorize_tfidf(train_df, test_df)

print("\nVectorization complete!")

Generating TF-IDF features...
TF-IDF shape - Train: (38903, 3000), Test: (4323, 3000)

Vectorization complete!


In [9]:
# FEATURE COMBINATION - Combine all engineered features

# Combine all engineered features (non-vectorized)
print("Engineered features shape:")
print(f"Train: {train_engineered_features.shape}")
print(f"Test: {test_engineered_features.shape}")

# Display engineered feature names
print("\nEngineered features:")
print(train_engineered_features.columns.tolist())

# Scale engineered features to [0, 1] range for MultinomialNB compatibility
# MultinomialNB requires non-negative values
scaler = MinMaxScaler()
train_engineered_scaled = scaler.fit_transform(train_engineered_features)
test_engineered_scaled = scaler.transform(test_engineered_features)

train_engineered_scaled_df = pd.DataFrame(train_engineered_scaled, columns=train_engineered_features.columns)
test_engineered_scaled_df = pd.DataFrame(test_engineered_scaled, columns=test_engineered_features.columns)

# For Naive Bayes, we'll use TF-IDF with engineered features
# Note: Count vectorizer features are redundant with TF-IDF for Naive Bayes
train_X = pd.concat([
    train_engineered_scaled_df,
    train_tfidf_df
], axis=1)

test_X = pd.concat([
    test_engineered_scaled_df,
    test_tfidf_df
], axis=1)

train_y = train_df['label']
test_y = test_df['label']

print("\n" + "="*50)
print("FINAL FEATURE SET FOR NAIVE BAYES")
print("="*50)
print(f"Total features: {train_X.shape[1]}")
print(f"Training samples: {train_X.shape[0]}")
print(f"Test samples: {test_X.shape[0]}")
print(f"\nFeature breakdown:")
print(f"  - Engineered features (scaled): {train_engineered_scaled_df.shape[1]}")
print(f"  - TF-IDF features: {train_tfidf_df.shape[1]}")

# Train the Naive Bayes model
naive_bayes_model = MultinomialNB(alpha=1.0)
naive_bayes_model.fit(train_X, train_y)

print("\nNaive Bayes model trained successfully!")
print(f"Model classes: {naive_bayes_model.classes_}")

Engineered features shape:
Train: (38903, 23)
Test: (4323, 23)

Engineered features:
['response_length', 'word_count', 'avg_word_length', 'char_per_word', 'refusal_keyword_at_start', 'refusal_keyword_overall', 'has_any_refusal_keyword', 'sentiment_polarity', 'sentiment_subjectivity', 'is_negative_sentiment', 'is_neutral_sentiment', 'is_positive_sentiment', 'sentence_count', 'avg_sentence_length', 'punctuation_count', 'question_mark_count', 'exclamation_count', 'uppercase_ratio', 'has_multiple_sentences', 'apology_word_count', 'formal_word_count', 'is_apologetic', 'is_formal']

FINAL FEATURE SET FOR NAIVE BAYES
Total features: 3023
Training samples: 38903
Test samples: 4323

Feature breakdown:
  - Engineered features (scaled): 23
  - TF-IDF features: 3000

Naive Bayes model trained successfully!
Model classes: [0 1]


In [10]:
# Make predictions on training set
y_train_pred = naive_bayes_model.predict(train_X)

# Evaluate on training set
train_accuracy = accuracy_score(train_y, y_train_pred)
train_precision = precision_score(train_y, y_train_pred)
train_recall = recall_score(train_y, y_train_pred)
train_f1 = f1_score(train_y, y_train_pred)

print("="*50)
print("TRAINING SET PERFORMANCE")
print("="*50)
print(f"Accuracy:  {train_accuracy:.4f}")
print(f"Precision: {train_precision:.4f}")
print(f"Recall:    {train_recall:.4f}")
print(f"F1-Score:  {train_f1:.4f}")
print("\nConfusion Matrix (Training):")
print(confusion_matrix(train_y, y_train_pred))

TRAINING SET PERFORMANCE
Accuracy:  0.9121
Precision: 0.9577
Recall:    0.8625
F1-Score:  0.9076

Confusion Matrix (Training):
[[18683   742]
 [ 2678 16800]]


In [11]:
# Make predictions on test set
y_test_pred = naive_bayes_model.predict(test_X)

# Evaluate on test set
test_accuracy = accuracy_score(test_y, y_test_pred)
test_precision = precision_score(test_y, y_test_pred)
test_recall = recall_score(test_y, y_test_pred)
test_f1 = f1_score(test_y, y_test_pred)

print("="*50)
print("TEST SET PERFORMANCE")
print("="*50)
print(f"Accuracy:  {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall:    {test_recall:.4f}")
print(f"F1-Score:  {test_f1:.4f}")
print("\nConfusion Matrix (Test):")
print(confusion_matrix(test_y, y_test_pred))
print("\nDetailed Classification Report (Test):")
print(classification_report(test_y, y_test_pred, target_names=['Not Refusal (0)', 'Refusal (1)']))

TEST SET PERFORMANCE
Accuracy:  0.9100
Precision: 0.9556
Recall:    0.8576
F1-Score:  0.9040

Confusion Matrix (Test):
[[2103   85]
 [ 304 1831]]

Detailed Classification Report (Test):
                 precision    recall  f1-score   support

Not Refusal (0)       0.87      0.96      0.92      2188
    Refusal (1)       0.96      0.86      0.90      2135

       accuracy                           0.91      4323
      macro avg       0.91      0.91      0.91      4323
   weighted avg       0.91      0.91      0.91      4323

