In [None]:
# supress any warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# python imports
from collections import defaultdict

# third-party imports
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from xgboost import XGBClassifier

## Load Data

In [None]:
prompt_injection_train = pd.read_csv('data/prompt_injection_train.csv')
prompt_injection_test = pd.read_csv('data/prompt_injection_test.csv')

In [None]:
X_train = prompt_injection_train['prompt']
y_train = prompt_injection_train['label']

X_test = prompt_injection_test['prompt']
y_test = prompt_injection_test['label']

## Creating vectors

In [None]:
tfidf = TfidfVectorizer(max_features=10000, ngram_range=(1,2), lowercase=True, stop_words='english')

X_train = tfidf.fit_transform(X_train)
X_test = tfidf.transform(X_test)

## Train a classification model

### Random Forest

In [None]:
clf = GridSearchCV(RandomForestClassifier(), param_grid={
    'n_estimators': [50, 100, 200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [4, 5, 6, 7, 8],
    'criterion': ['gini', 'entropy']
}, scoring='accuracy', cv=5, n_jobs=-1)

clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues', normalize='true')

### Gradient Boosting

In [None]:
clf = GridSearchCV(GradientBoostingClassifier(), param_grid={
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [2, 3, 5, 7],
    'n_estimators': [50, 100, 200]
}, scoring='accuracy', cv=5, n_jobs=-1)

clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues', normalize='true')

### SVC

In [None]:
clf = GridSearchCV(SVC(probability=True), [
    {'kernel': ['rbf'], 'gamma': 2 ** np.arange(-15.0, 4.0, 2),
        'C': 2 ** np.arange(-5.0, 16.0, 2)},
    {'kernel': ['linear'], 'C': 2 ** np.arange(-5.0, 16.0, 2)},
    {'kernel': ['poly'], 'C': 2 ** np.arange(-5.0, 16.0, 2),
        'degree': [2, 3, 4, 5]}
], scoring='accuracy', cv=5, n_jobs=-1)

clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues', normalize='true')

### XGBoost

In [None]:
clf = GridSearchCV(XGBClassifier(), param_grid={
    'min_child_weight': [1, 5, 10],
    'gamma': [0.5, 1.0, 1.5, 2.0, 5.0],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'max_depth': [3, 4, 5]
}, scoring='accuracy', cv=5, n_jobs=-1)

clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues', normalize='true')

### Naive Bayes

In [None]:
clf = MultinomialNB()
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

In [None]:
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues', normalize='true')