# Model Prediction
>This process involves loading a pre-trained model and applying it to a new dataset. The relevant features are extracted from the data, predictions are generated, and the results are appended to the original dataset. The updated dataset, including the predictions, is then saved to a new CSV file, with the final 500 rows displayed for validation.


In [1]:
import pandas as pd
from joblib import load

# Paths to the model file and the CSV file
mlp_model_filename = 'adaboost_sub_model.joblib'
csv_file_path = 'part1.csv'  # Update this to the path of your new data CSV file

# Load the trained MLP model
mlp_model = load(mlp_model_filename)

# Function to load new data
def load_new_data(file_path):
    return pd.read_csv(file_path)

# Ensure that the new data contains the same features as in our training data
predictor_names = [
    'flow_duration', 'Header_Length', 'Duration', 'Rate', 'Srate', 'Drate',
    'fin_flag_number', 'syn_flag_number', 'rst_flag_number', 'psh_flag_number',
    'ack_flag_number', 'ece_flag_number', 'cwr_flag_number', 'ack_count',
    'syn_count', 'fin_count', 'urg_count', 'rst_count', 'HTTP', 'HTTPS', 'DNS',
    'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP', 'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv',
    'LLC', 'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number',
    'Magnitue', 'Radius', 'Covariance', 'Variance', 'Weight'
]

# Load new data for prediction
new_data = load_new_data(csv_file_path)
print(f"CSV file loaded from: {csv_file_path}")

# Extract predictors from the new data
predictors = new_data[predictor_names]

# Make predictions using the loaded MLP model
predictions_mlp = mlp_model.predict(predictors)

# Add the predicted labels to the new_data DataFrame
new_data['Predicted_Labels_MLP'] = predictions_mlp

# Save the predictions to a new CSV file
predictions_filename = 'mlp_predictions.csv'
new_data.to_csv(predictions_filename, index=False)

print(f"Predictions saved to: {predictions_filename}")

# Display the first few rows of the predictions
print(new_data.tail(500))


CSV file loaded from: part1.csv
Predictions saved to: mlp_predictions.csv
        flow_duration  Header_Length  Protocol Type  Duration         Rate  \
230523       0.000000          54.00           6.00     64.00     6.993630   
230524       0.000000          54.00           6.00     64.00     1.771019   
230525       0.199567       38275.00          17.00     64.00  3835.028666   
230526      55.315331      452613.40          12.60    116.20    18.965288   
230527       0.000000           0.00           1.00     64.00  4908.489175   
...               ...            ...            ...       ...          ...   
231018       0.111664         169.80          17.00     64.00     2.709472   
231019       0.001510          67.60           6.11     65.91    14.599723   
231020       0.000000          53.46           5.94     63.36     3.865004   
231021       0.000000          81.00           6.00     64.00     7.341495   
231022       0.000000          53.46           5.94     63.36    69.