In [None]:
# Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import ydata_profiling

In [None]:
# Functions

def plot_correlation(df):
    # Correlation between different variables
    corr = df.corr()
    # Set up the matplotlib plot configuration
    f, ax = plt.subplots(figsize=(10, 10))
    # Generate a mask for upper traingle
    mask = np.triu(np.ones_like(corr, dtype=bool))
    # Configure a custom diverging colormap
    cmap = sns.diverging_palette(230, 20, as_cmap=True)
    # Draw the heatmap
    sns.heatmap(corr, annot=True, mask=mask, cmap=cmap)

In [None]:
# Main

# Dataset path
dataset_path = '/home/leandro/remy/centralized/datasets/WSN-DS/'

# Load dataset
wsn_df = pd.read_csv(f'{dataset_path}data.csv')
# Sort DataFrame by 'Time' column
wsn_df = wsn_df.sort_values(by=[' Time']).reset_index(drop=True)
# Drop 'id' column
wsn_df.drop(columns=' id', inplace=True)
# Rename class column
wsn_df.rename(columns={"Attack type": "class"}, inplace=True)
# Remove TDMA schedule attack
wsn_df = wsn_df[wsn_df['class'] != 'TDMA'].reset_index(drop=True)

# Convert classes to numeric
wsn_df["class"] = wsn_df["class"].map({
    "Normal": 0,
    "Grayhole": 1,
    "Blackhole": 2,
    "Flooding": 3
}.get)

In [None]:
# Sample count of each class
wsn_df['class'].value_counts().plot(kind='bar')

In [None]:
# Plot correlation between features
plot_correlation(wsn_df)

In [None]:
# Generate profile report
report = wsn_df.profile_report(title="WSN-DS Dataset", html={"style": {"full_width": True}})
# Save report to file
report.to_file("profile_report.html")