In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pickle

# Load the dataset
df = pd.read_csv("behavioral_dataset.csv")

# Handle missing values
df.fillna(df.mean(), inplace=True)

# Define feature columns and target (risk score)
FEATURE_NAMES = ["mean_dwell", "std_dwell", "mean_flight", "std_flight", 
                 "avg_mouse_speed", "tab_switch_count", "copy_events", 
                 "paste_events", "inactivity_time"]

X = df[FEATURE_NAMES]
y = df['label'] * 100  # Convert 0/1 labels to risk scores (0-100)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Train an SVR model
svr = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=5)
svr.fit(X_train, y_train)

# Evaluate the model
y_pred = svr.predict(X_test)
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

# Save the model and scaler
with open("svr_model.pkl", "wb") as f:
    pickle.dump(svr, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump({"scaler": scaler, "feature_names": FEATURE_NAMES}, f)


Mean Absolute Error: 1998.9056363267816
Mean Squared Error: 7405931.618823596


In [55]:
raw_score = svr.predict([[19,7,95,90,0,3,2,1,70]])
min_risk=0
max_risk=100

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import pickle

# Load the dataset
df = pd.read_csv("behavioral_dataset.csv")
df.fillna(df.mean(), inplace=True)

FEATURE_NAMES = ["mean_dwell", "std_dwell", "mean_flight", "std_flight", 
                 "avg_mouse_speed", "tab_switch_count", "copy_events", 
                 "paste_events", "inactivity_time"]

X = df[FEATURE_NAMES]
y = df['label']  # Keep as binary (0, 1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Train an SVM classifier with probability estimates
svm = SVC(kernel='rbf')
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred))

# Save the model and scaler
with open("svm_model.pkl", "wb") as f:
    pickle.dump(svm, f)

with open("scaler.pkl", "wb") as f:
    pickle.dump({"scaler": scaler, "feature_names": FEATURE_NAMES}, f)


              precision    recall  f1-score   support

          12       0.00      0.00      0.00       0.0
          15       0.00      0.00      0.00       1.0
          25       0.00      0.00      0.00       0.0
          30       0.00      0.00      0.00       1.0
          75       0.00      0.00      0.00       1.0
          80       0.00      0.00      0.00       0.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [39]:
import numpy as np
arr=np.array([19,7,25,10,30,3,2,1,70,0])
svr.predict([[20,89,100,0,39,9,2,1,70]])

array([57.3868302])

In [73]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle

# Load the dataset
df = pd.read_csv("behavioral_dataset.csv")

# Handle missing values
df.fillna(df.mean(), inplace=True)

# Define the feature columns
FEATURE_NAMES = [
    "mean_dwell", "std_dwell", "mean_flight", "std_flight", 
    "avg_mouse_speed", "tab_switch_count", "copy_events", 
    "paste_events", "inactivity_time"
]

# Use continuous risk scores as targets. 
# Ensure your dataset has a continuous 'risk_score' column in the range 0-100.
# If not, you'll need to create such labels.
X = df[FEATURE_NAMES]
y = df["label"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build a simple feedforward neural network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='linear'))  # Linear output for regression

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=100, batch_size=8, validation_data=(X_test_scaled, y_test))

# Evaluate the model
loss, mae = model.evaluate(X_test_scaled, y_test)
print(f"Test MAE: {mae}")
# Save the neural network model and scaler
model.save("nn_model.h5")
with open("nn_scaler.pkl", "wb") as f:
    pickle.dump({"scaler": scaler, "feature_names": FEATURE_NAMES}, f)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 2932.1956 - mae: 43.6184 - val_loss: 2224.6887 - val_mae: 39.7156
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - loss: 2925.1953 - mae: 43.5450 - val_loss: 2219.0796 - val_mae: 39.6508
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - loss: 2918.3870 - mae: 43.4727 - val_loss: 2213.2522 - val_mae: 39.5830
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - loss: 2911.2043 - mae: 43.3987 - val_loss: 2206.7405 - val_mae: 39.5096
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step - loss: 2904.0603 - mae: 43.3241 - val_loss: 2199.9368 - val_mae: 39.4317
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - loss: 2896.6155 - mae: 43.2453 - val_loss: 2192.5918 - val_mae: 39.3463
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━



Test MAE: 12.52405071258545


In [72]:
import numpy as np

predicted_array = model.predict(np.array([[1,7,9,7,0,3,0,0,0]]))
predicted_value = predicted_array[0][0]  # Extract the scalar value

risk_score = max(0, min(100, round(predicted_value, 2)))
print("Risk Score:", risk_score)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Risk Score: 94.16


In [29]:
y

0    10
1    15
2    20
3    70
4    80
5    30
6    90
7    12
8    75
9    25
Name: label, dtype: int64