In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

# --- Step 1: Create a self-contained, dummy dataset ---
# In a real-world scenario, you would replace this section with a
# call to load your actual dataset from a file (e.g., a CSV).
def create_dummy_dataset(num_samples=1000):
    """
    Generates a dummy dataset for a simplified Network Intrusion Detection System.
    The 'label' is determined by a simple rule on two 'important' features.
    """
    np.random.seed(42)

    # Define features and their relationships
    features = {
        'duration': np.random.rand(num_samples) * 10,
        'src_bytes': np.random.randint(50, 5000, num_samples),
        'dst_bytes': np.random.randint(50, 5000, num_samples),
        'important_feature_1': np.random.rand(num_samples) * 5,
        'important_feature_2': np.random.rand(num_samples) * 3,
        'protocol_type': np.random.choice(['tcp', 'udp', 'icmp'], num_samples),
        'service': np.random.choice(['http', 'smtp', 'ftp', 'dns'], num_samples),
    }
    df = pd.DataFrame(features)

    # Create a simplified 'label' for intrusion detection (1 = attack, 0 = normal)
    # The label is based on a simple rule to ensure the model can learn something.
    df['label'] = ((df['important_feature_1'] > 4) | (df['important_feature_2'] > 2.5)).astype(int)

    return df

df = create_dummy_dataset()
print("Dummy dataset created successfully.")

# --- Step 2: Data Preprocessing ---
# Separate features (X) and target (y)
X = df.drop('label', axis=1)
y = df['label']

# Identify and encode categorical features using LabelEncoder
categorical_cols = X.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le
    print(f"Encoded '{col}' feature.")

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("Data split into training and testing sets.")

# --- Step 3: Train the Random Forest model ---
model = RandomForestClassifier(n_estimators=100, random_state=42)
print("\nTraining the Random Forest model...")
model.fit(X_train, y_train)
print("Model training complete.")

# --- Step 4: Evaluate the model's performance ---
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(report)

# --- Step 5: Extract and visualize feature importances ---
importances = model.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(12, 8))
plt.barh(feature_importance_df['feature'], feature_importance_df['importance'], color='teal')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances from Random Forest Classifier')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('feature_importance_plot.png')
plt.clf()
print("Plot of feature importances saved as 'feature_importance_plot.png'")

# --- Step 6: Save the trained model and encoders for future use ---
joblib.dump(model, 'nids_model.joblib')
joblib.dump(label_encoders, 'label_encoders.joblib')
print("Trained model and label encoders saved.")


Dummy dataset created successfully.
Encoded 'protocol_type' feature.
Encoded 'service' feature.
Data split into training and testing sets.

Training the Random Forest model...
Model training complete.

--- Model Evaluation ---
Accuracy: 0.9967

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       208
           1       1.00      0.99      0.99        92

    accuracy                           1.00       300
   macro avg       1.00      0.99      1.00       300
weighted avg       1.00      1.00      1.00       300

Plot of feature importances saved as 'feature_importance_plot.png'
Trained model and label encoders saved.


<Figure size 1200x800 with 0 Axes>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

# --- Step 1: Create a self-contained, dummy dataset ---
def create_dummy_dataset(num_samples=1000):
    """
    Generates a dummy dataset for a simplified Network Intrusion Detection System.
    """
    np.random.seed(42)

    features = {
        'duration': np.random.rand(num_samples) * 10,
        'src_bytes': np.random.randint(50, 5000, num_samples),
        'dst_bytes': np.random.randint(50, 5000, num_samples),
        'important_feature_1': np.random.rand(num_samples) * 5,
        'important_feature_2': np.random.rand(num_samples) * 3,
        'protocol_type': np.random.choice(['tcp', 'udp', 'icmp'], num_samples),
        'service': np.random.choice(['http', 'smtp', 'ftp', 'dns'], num_samples),
    }
    df = pd.DataFrame(features)

    df['label'] = ((df['important_feature_1'] > 4) | (df['important_feature_2'] > 2.5)).astype(int)

    return df

df = create_dummy_dataset()
print("Dummy dataset created successfully.")

# --- Step 2: Data Preprocessing and Training ---
X = df.drop('label', axis=1)
y = df['label']

categorical_cols = X.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
print("\nTraining the Random Forest model...")
model.fit(X_train, y_train)
print("Model training complete.")

# --- Step 3: Evaluate and Visualize ---
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(report)

importances = model.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)
plt.figure(figsize=(12, 8))
plt.barh(feature_importance_df['feature'], feature_importance_df['importance'], color='teal')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances from Random Forest Classifier')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('feature_importance_plot.png')
plt.clf()
print("Plot of feature importances saved as 'feature_importance_plot.png'")

# --- Step 4: Corrected live detection example ---
# The key here is to make the new data match the training data's structure.
# First, you must save the trained model and the encoders to disk.
joblib.dump(model, 'nids_model.joblib')
joblib.dump(label_encoders, 'label_encoders.joblib')
print("\nTrained model and label encoders saved for future use.")

# --- Corrected section for using the model on new data ---
print("\n--- Corrected Live Detection Example ---")
# This part simulates loading the model and encoders in a new session
loaded_model = joblib.load('nids_model.joblib')
loaded_encoders = joblib.load('label_encoders.joblib')

# Define a new data point to be classified.
# The keys of this dictionary must match the original feature names.
new_raw_data = {
    'duration': [1.5],
    'src_bytes': [2500],
    'dst_bytes': [150],
    'important_feature_1': [4.5], # This value should trigger an "attack" label
    'important_feature_2': [0.5],
    'protocol_type': ['tcp'],
    'service': ['http']
}

# Convert the new data into a DataFrame
new_df = pd.DataFrame(new_raw_data)

# Apply the same LabelEncoders as were used during training
for col, le in loaded_encoders.items():
    if col in new_df.columns:
        new_df[col] = le.transform(new_df[col])

# Now the feature names and values match the training data.
# Make the prediction.
prediction = loaded_model.predict(new_df)

# Interpret the prediction
if prediction[0] == 0:
    print(f"Prediction: Normal traffic (Label: {prediction[0]})")
else:
    print(f"Prediction: **Intrusion Detected!** (Label: {prediction[0]})")


Dummy dataset created successfully.

Training the Random Forest model...
Model training complete.

--- Model Evaluation ---
Accuracy: 0.9967

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       208
           1       1.00      0.99      0.99        92

    accuracy                           1.00       300
   macro avg       1.00      0.99      1.00       300
weighted avg       1.00      1.00      1.00       300

Plot of feature importances saved as 'feature_importance_plot.png'

Trained model and label encoders saved for future use.

--- Corrected Live Detection Example ---
Prediction: **Intrusion Detected!** (Label: 1)


<Figure size 1200x800 with 0 Axes>

In [None]:
import pandas as pd
import joblib
from flask import Flask, render_template, request

app = Flask(__name__)

# --- Model and Encoder Loading ---
# IMPORTANT: These files must be in the same directory as this app.py file.
try:
    model = joblib.load('nids_model.joblib')
    label_encoders = joblib.load('label_encoders.joblib')
    print("Model and encoders loaded successfully.")
except FileNotFoundError:
    print("Error: Required model files ('nids_model.joblib', 'label_encoders.joblib') not found.")
    print("Please ensure they are in the same directory as this script.")
    model = None
    label_encoders = None

# Define the order of features as they were during training
FEATURE_ORDER = ['duration', 'src_bytes', 'dst_bytes', 'important_feature_1',
                 'important_feature_2', 'protocol_type', 'service']

@app.route('/', methods=['GET', 'POST'])
def home():
    """
    Handles both GET (displaying the form) and POST (processing form data) requests.
    """
    prediction_result = None

    # Check if the model files were loaded successfully
    if not model or not label_encoders:
        return render_template('index.html', error_message="Model files not found. Please check your directory.")

    # Handle POST request when the form is submitted
    if request.method == 'POST':
        try:
            # 1. Get raw data from the form
            raw_data = {
                'duration': [float(request.form['duration'])],
                'src_bytes': [int(request.form['src_bytes'])],
                'dst_bytes': [int(request.form['dst_bytes'])],
                'important_feature_1': [float(request.form['important_feature_1'])],
                'important_feature_2': [float(request.form['important_feature_2'])],
                'protocol_type': [request.form['protocol_type']],
                'service': [request.form['service']]
            }

            # 2. Create a DataFrame with the correct feature order
            new_df = pd.DataFrame(raw_data, columns=FEATURE_ORDER)

            # 3. Apply the same LabelEncoders as were used during training
            for col, le in label_encoders.items():
                if col in new_df.columns:
                    new_df[col] = le.transform(new_df[col])

            # 4. Make the prediction
            prediction = model.predict(new_df)

            # 5. Interpret and store the result
            if prediction[0] == 1:
                prediction_result = "Intrusion Detected!"
            else:
                prediction_result = "Normal Traffic"

        except Exception as e:
            # Catch any errors during prediction and display a user-friendly message
            print(f"Prediction error: {e}")
            prediction_result = f"An error occurred during prediction: {e}"

    # Render the HTML page, passing the prediction result
    return render_template('index.html', result=prediction_result)

if __name__ == '__main__':
    # Run the Flask app in debug mode
    app.run(debug=True)


Model and encoders loaded successfully.
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with watchdog (inotify)


In [None]:
import joblib

# Use 'model' because that's what you named it in Step 3 of your code
joblib.dump(model, 'nids_model.joblib')

# Also save the encoders so the Pi knows how to handle 'tcp' and 'http'
joblib.dump(label_encoders, 'label_encoders.joblib')

print("Success! Both files are ready for download.")

NameError: name 'model' is not defined