In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
data = pd.read_csv("interaction_data.csv")

# Feature columns
features = ['click_rate', 'mouse_speed', 'scroll_depth', 'time_on_page', 'form_fills']

# Label column (1 for human, 0 for bot)
labels = data['is_human']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data[features], labels, test_size=0.2, random_state=42)

# Initialize the model
model = RandomForestClassifier(n_estimators=1000, random_state=500)

# Train the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, predictions))
print("Classification Report:\n", classification_report(y_test, predictions))


Accuracy: 0.9975
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       201
         1.0       0.99      1.00      1.00       199

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400



In [16]:
import numpy as np
new_data = [0, 0, 0, 1.5, 25]
user_interaction_array = np.array(new_data).reshape(1, -1)
prediction = model.predict(user_interaction_array)

model.fit(user_interaction_array,prediction)



In [1]:
import xgboost as xgb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
data = pd.read_csv("interaction_data.csv")

# Feature columns
features = ['click_rate', 'mouse_speed', 'scroll_depth', 'time_on_page', 'form_fills']

# Label column (1 for human, 0 for bot)
labels = data['is_human']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data[features], labels, test_size=0.2, random_state=42)

# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = xgb_model.predict(X_test)
y_prob = xgb_model.predict_proba(X_test)[:, 1]  # Probabilities for human class (1)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.995
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       201
         1.0       0.99      0.99      0.99       199

    accuracy                           0.99       400
   macro avg       0.99      0.99      0.99       400
weighted avg       0.99      0.99      0.99       400



In [19]:
import numpy as np

# Define thresholds
human_threshold = 0.9
bot_threshold = 0.1

# Function to classify user interaction
def classify_interaction(user_interaction):
    # Convert the interaction to the proper format (2D array)
    user_interaction_array = np.array(user_interaction).reshape(1, -1)
    
    # Get probability prediction from the model
    prob_human = xgb_model.predict_proba(user_interaction_array)[0][1]  # Probability for human class (1)
    
    print(prob_human)
    # Decision logic based on thresholds
    if prob_human >= human_threshold:
        return "Human"
    elif prob_human <= bot_threshold:
        return "Bot"
    else:
        return "Trigger CAPTCHA"

# Example usage
user_interaction_values = [1.36, 450.65, 0, 0, 255]  # Example interaction data
result = classify_interaction(user_interaction_values)
print(f"Classification Result: {result}")


0.04382743
Classification Result: Bot


In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best parameters found: ", grid_search.best_params_)
best_model = grid_search.best_estimator_


Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best parameters found:  {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}


In [5]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LeakyReLU, BatchNormalization
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Load your dataset
data = pd.read_csv("interaction_data.csv")

# Feature columns
features = ['click_rate', 'mouse_speed', 'scroll_depth', 'time_on_page', 'form_fills']

# Label column (1 for human, 0 for bot)
labels = data['is_human']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data[features], labels, test_size=0.2, random_state=42)

# Normalize the data
X_train = (X_train - X_train.min()) / (X_train.max() - X_train.min())
X_test = (X_test - X_test.min()) / (X_test.max() - X_test.min())

# Set dimensions for the input and noise
input_dim = X_train.shape[1]
noise_dim = 100

# Build the generator model
generator = Sequential([
    Dense(64, input_dim=noise_dim),
    LeakyReLU(alpha=0.2),
    BatchNormalization(momentum=0.8),
    Dense(128),
    LeakyReLU(alpha=0.2),
    BatchNormalization(momentum=0.8),
    Dense(256),
    LeakyReLU(alpha=0.2),
    BatchNormalization(momentum=0.8),
    Dense(input_dim, activation='tanh')
])

# Build the discriminator model
discriminator = Sequential([
    Dense(256, input_dim=input_dim),
    LeakyReLU(alpha=0.2),
    Dense(128),
    LeakyReLU(alpha=0.2),
    Dense(64),
    LeakyReLU(alpha=0.2),
    Dense(1, activation='sigmoid')
])

# Compile the discriminator
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

# Combine the generator and discriminator into the GAN model
discriminator.trainable = False
gan = Sequential([generator, discriminator])

# Compile the GAN model
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

# Training parameters
epochs = 10000
batch_size = 32

# Create arrays for the labels of real and fake data
real_labels = np.ones((batch_size, 1))
fake_labels = np.zeros((batch_size, 1))

# Training loop
for epoch in range(epochs):
    # Generate fake bot data
    noise = np.random.normal(0, 1, (batch_size, noise_dim))
    generated_data = generator.predict(noise)

    # Sample real human and bot data
    idx = np.random.randint(0, X_train.shape[0], batch_size)
    real_data = X_train.iloc[idx].values

    # Train the discriminator on real and fake data
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator (the generator's goal is to fool the discriminator)
    g_loss = gan.train_on_batch(noise, real_labels)

    # Print progress every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch} / {epochs} - Discriminator Loss: {d_loss[0]}, Accuracy: {100 * d_loss[1]}% - Generator Loss: {g_loss}")

# Fine-tune the discriminator for classification
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

# Evaluate the discriminator as a classifier on test data
loss, accuracy = discriminator.evaluate(X_test, y_test)
print(f"Classifier Accuracy: {accuracy * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step




Epoch 0 / 10000 - Discriminator Loss: 0.7039839625358582, Accuracy: 20.3125% - Generator Loss: [array(0.6966293, dtype=float32), array(0.6966293, dtype=float32), array(0.40625, dtype=float32)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [3

KeyboardInterrupt: 

In [10]:

import numpy as np
import keras 
from sklearn.preprocessing import StandardScaler
external_data = [0.77, 257.61, 409.48, 35.86, 2]
external_data_array = np.array(external_data).reshape(1, -1)
model = keras.models.load_model('your_model.h5')  # Replace with your model's filename
# Normalize the data if you normalized the training data
scaler = StandardScaler()  # Use the same scaler that was used for training data
external_data_scaled = scaler.fit_transform(external_data_array) 

# 3. Make Predictions
predictions = model.predict(external_data_scaled)

# 4. Apply Threshold Logic
human_threshold = 0.9
bot_threshold = 0.1

# Assuming predictions are probabilities (e.g., using a softmax or sigmoid output layer)
prob_human = predictions[0][0]  # Probability of being human, assuming 0 is the human class

# Classification Logic
if prob_human >= human_threshold:
    print("The interaction is classified as human.")
elif prob_human <= bot_threshold:
    print("The interaction is classified as a bot.")
else:
    print("Uncertain prediction. Triggering CAPTCHA for verification.")

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'your_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)