In [1]:
pip install modelbit

Collecting modelbit
  Obtaining dependency information for modelbit from https://files.pythonhosted.org/packages/57/76/bceff92b2aa6efae85d7428ef51fd43048e9abb4561b9fd8582c024862f1/modelbit-0.37.5-py3-none-any.whl.metadata
  Downloading modelbit-0.37.5-py3-none-any.whl.metadata (2.0 kB)
Collecting pycryptodomex (from modelbit)
  Obtaining dependency information for pycryptodomex from https://files.pythonhosted.org/packages/b2/e8/1b92184ab7e5595bf38000587e6f8cf9556ebd1bf0a583619bee2057afbd/pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl.metadata
  Downloading pycryptodomex-3.20.0-cp35-abi3-win_amd64.whl.metadata (3.4 kB)
Collecting types-requests (from modelbit)
  Obtaining dependency information for types-requests from https://files.pythonhosted.org/packages/8b/ea/91b718b8c0b88e4f61cdd61357cc4a1f8767b32be691fb388299003a3ae3/types_requests-2.31.0.20240406-py3-none-any.whl.metadata
  Downloading types_requests-2.31.0.20240406-py3-none-any.whl.metadata (1.8 kB)
Collecting types-PyYAML (from m


[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
import joblib
import modelbit

# Load training dataset
train_file = "UNSW_NB15_training-set.csv"
df_train = pd.read_csv(train_file)

# Load testing dataset
test_file = "UNSW_NB15_testing-set.csv"
df_test = pd.read_csv(test_file)

# Define categorical features
categorical_features = ['proto', 'service', 'state']

# Apply one-hot encoding to categorical variables
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoder.fit(pd.concat([df_train[categorical_features], df_test[categorical_features]]))
X_train_encoded = encoder.transform(df_train[categorical_features])
X_test_encoded = encoder.transform(df_test[categorical_features])

# Drop original categorical columns from the datasets
df_train = df_train.drop(columns=categorical_features)
df_test = df_test.drop(columns=categorical_features)

# Concatenate encoded features with the original datasets
X_train = pd.concat([df_train.drop(columns=['attack_cat', 'label']), pd.DataFrame(X_train_encoded, columns=encoder.get_feature_names_out(categorical_features))], axis=1)
X_test = pd.concat([df_test.drop(columns=['attack_cat', 'label']), pd.DataFrame(X_test_encoded, columns=encoder.get_feature_names_out(categorical_features))], axis=1)

# Define target variable
y_train = df_train['label']
y_test = df_test['label']

# Define pipeline with preprocessing and model
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scale numerical features
    ('clf', RandomForestClassifier(random_state=42))  # RandomForestClassifier model
])

# Define a smaller hyperparameter grid for grid search
param_grid = {
    'clf__n_estimators': [100, 150],  # Reduced number of trees
    'clf__max_depth': [None, 10],     # Reduced depth options
    'clf__min_samples_split': [2, 5]  # Reduced split options
}

# Perform grid search with cross-validation (3-fold instead of 5)
grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Save the trained model to a file
joblib.dump(grid_search, 'random_forest_model.joblib')

# Make predictions on the testing set
y_pred = grid_search.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("ROC AUC score:", roc_auc)


In [None]:
import modelbit
mb = modelbit.login()

In [None]:
# Define a prediction function for the deployed model
def random_forest_predict(data: pd.DataFrame) -> pd.Series:
    model = joblib.load('random_forest_model.joblib')
    # Apply the same transformations as during training
    categorical_features = ['proto', 'service', 'state']
    X_encoded = encoder.transform(data[categorical_features])
    data = data.drop(columns=categorical_features)
    X = pd.concat([data, pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out(categorical_features))], axis=1)
    return model.predict(X)

# Deploy the model to Modelbit
mb.deploy(
    random_forest_predict,
    name="RandomForestModel"
)

print("Model successfully deployed on Modelbit")