<a href="https://colab.research.google.com/github/prksh830/Healthcare/blob/main/wsn_efficiency_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# ============================================
# 1. Load Dataset
# ============================================
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv("WSN-DS.csv")

print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
df.head()


Dataset shape: (374661, 19)
Columns: [' id', ' Time', ' Is_CH', ' who CH', ' Dist_To_CH', ' ADV_S', ' ADV_R', ' JOIN_S', ' JOIN_R', ' SCH_S', ' SCH_R', 'Rank', ' DATA_S', ' DATA_R', ' Data_Sent_To_BS', ' dist_CH_To_BS', ' send_code ', 'Energy Consumption', 'Attack type']


Unnamed: 0,id,Time,Is_CH,who CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,Rank,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,send_code,Energy Consumption,Attack type
0,101000,50,1,101000,0.0,1,0,0,25,1,0,0,0,1200,48,130.08535,0,2.4694,Normal
1,101001,50,0,101044,75.32345,0,4,1,0,0,1,2,38,0,0,0.0,4,0.06957,Normal
2,101002,50,0,101010,46.95453,0,4,1,0,0,1,19,41,0,0,0.0,3,0.06898,Normal
3,101003,50,0,101044,64.85231,0,4,1,0,0,1,16,38,0,0,0.0,4,0.06673,Normal
4,101004,50,0,101010,4.83341,0,4,1,0,0,1,25,41,0,0,0.0,3,0.06534,Normal


In [11]:
# ============================================
# 2. Select Required Parameters
# ============================================

# Keep only the parameters useful for Energy Efficiency & QoS
required_cols = [
    "Is_CH", "Dist_To_CH", "ADV_S", "ADV_R", "JOIN_S", "JOIN_R",
    "SCH_S", "SCH_R", "DATA_S", "DATA_R", "Data_Sent_To_BS",
    "dist_CH_To_BS", "Energy Consumption", "Attack type"
]

# Remove leading/trailing spaces from column names for accurate matching
df.columns = df.columns.str.strip()

df = df[required_cols]

print("Filtered shape:", df.shape)
df.head()

Filtered shape: (374661, 14)


Unnamed: 0,Is_CH,Dist_To_CH,ADV_S,ADV_R,JOIN_S,JOIN_R,SCH_S,SCH_R,DATA_S,DATA_R,Data_Sent_To_BS,dist_CH_To_BS,Energy Consumption,Attack type
0,1,0.0,1,0,0,25,1,0,0,1200,48,130.08535,2.4694,Normal
1,0,75.32345,0,4,1,0,0,1,38,0,0,0.0,0.06957,Normal
2,0,46.95453,0,4,1,0,0,1,41,0,0,0.0,0.06898,Normal
3,0,64.85231,0,4,1,0,0,1,38,0,0,0.0,0.06673,Normal
4,0,4.83341,0,4,1,0,0,1,41,0,0,0.0,0.06534,Normal


In [12]:
# ============================================
# 3. Compute Performance Metrics
# ============================================

# Energy Efficiency (%)
df["Energy_Efficiency"] = (1 - (df["Energy Consumption"] / df["Energy Consumption"].max())) * 100

# QoS (%)
df["QoS"] = (df["DATA_R"] / (df["DATA_S"] + 1e-6)) * 100

print(df[["Energy Consumption", "Energy_Efficiency", "DATA_S", "DATA_R", "QoS"]].head())

   Energy Consumption  Energy_Efficiency  DATA_S  DATA_R           QoS
0             2.46940          94.523876       0    1200  1.200000e+11
1             0.06957          99.845722      38       0  0.000000e+00
2             0.06898          99.847030      41       0  0.000000e+00
3             0.06673          99.852020      38       0  0.000000e+00
4             0.06534          99.855102      41       0  0.000000e+00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Energy_Efficiency"] = (1 - (df["Energy Consumption"] / df["Energy Consumption"].max())) * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["QoS"] = (df["DATA_R"] / (df["DATA_S"] + 1e-6)) * 100


In [15]:
# ============================================
# 3. Compute Performance Metrics (Fixed)
# ============================================

# Energy Efficiency (%)
df.loc[:, "Energy_Efficiency"] = (1 - (df["Energy Consumption"] / df["Energy Consumption"].max())) * 100

# QoS (%), safe handling of division by zero
df.loc[:, "QoS"] = np.where(df["DATA_S"] > 0,
                            (df["DATA_R"] / df["DATA_S"]) * 100,
                            0)

# Show summary instead of just first 5 rows
print(df[["Energy Consumption", "Energy_Efficiency", "DATA_S", "DATA_R", "QoS"]].describe())


       Energy Consumption  Energy_Efficiency         DATA_S         DATA_R  \
count       374661.000000      374661.000000  374661.000000  374661.000000   
mean             0.305661          99.322169      44.857925      73.890045   
std              0.669462           1.484594      42.574464     230.246335   
min              0.000000           0.000000       0.000000       0.000000   
25%              0.056150          99.517097      13.000000       0.000000   
50%              0.097970          99.782742      35.000000       0.000000   
75%              0.217760          99.875482      62.000000       0.000000   
max             45.093940         100.000000     241.000000    1496.000000   

                 QoS  
count  374661.000000  
mean      411.310489  
std      1995.952833  
min         0.000000  
25%         0.000000  
50%         0.000000  
75%         0.000000  
max     74800.000000  


In [13]:
# ============================================
# 4. Preprocessing (One-Hot Encoding + Scaling)
# ============================================
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Define features and targets
X = df.drop(["Attack type"], axis=1)
y = df["Attack type"]  # classification target

# Identify categorical and numerical columns
categorical = ["Is_CH"]
numerical = [col for col in X.columns if col not in categorical]

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Train shape:", X_train.shape, " Test shape:", X_test.shape)

Train shape: (299728, 15)  Test shape: (74933, 15)


In [None]:
# ============================================
# 5. Baseline Models
# ============================================
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

models = {
    "LogReg": LogisticRegression(max_iter=2000),
    "DecisionTree": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier(),
    "GradientBoosting": GradientBoostingClassifier(),
    "SVM": SVC(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
}

results = {}

for name, model in models.items():
    pipe = Pipeline(steps=[("preprocess", preprocessor), ("clf", model)])
    pipe.fit(X_train, y_train)
    preds = pipe.predict(X_test)
    acc = accuracy_score(y_test, preds)
    results[name] = acc
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))


LogReg Accuracy: 0.9750
              precision    recall  f1-score   support

   Blackhole       0.64      1.00      0.78      2010
    Flooding       0.90      0.87      0.89       662
    Grayhole       0.86      0.55      0.67      2919
      Normal       0.99      0.99      0.99     68014
        TDMA       0.97      0.91      0.94      1328

    accuracy                           0.98     74933
   macro avg       0.87      0.87      0.85     74933
weighted avg       0.98      0.98      0.97     74933


DecisionTree Accuracy: 0.9942
              precision    recall  f1-score   support

   Blackhole       0.98      0.99      0.98      2010
    Flooding       0.94      0.95      0.94       662
    Grayhole       0.98      0.98      0.98      2919
      Normal       1.00      1.00      1.00     68014
        TDMA       0.90      0.92      0.91      1328

    accuracy                           0.99     74933
   macro avg       0.96      0.97      0.96     74933
weighted avg       0.

In [16]:
# ============================================
# 6. Deep Learning - Transformer Encoder
# ============================================
import tensorflow as tf
from tensorflow.keras import layers, models

# Preprocess features for DL
X_all = preprocessor.fit_transform(X)
y_all = pd.get_dummies(y).values  # one-hot encode target

X_train_dl, X_test_dl, y_train_dl, y_test_dl = train_test_split(X_all, y_all, test_size=0.2, random_state=42)

input_shape = X_train_dl.shape[1]

# Transformer Encoder Block
inputs = layers.Input(shape=(input_shape,))
x = layers.Reshape((input_shape, 1))(inputs)
x = layers.MultiHeadAttention(num_heads=4, key_dim=8)(x, x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(y_train_dl.shape[1], activation="softmax")(x)

transformer_model = models.Model(inputs, outputs)
transformer_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

history = transformer_model.fit(X_train_dl, y_train_dl, validation_split=0.2, epochs=10, batch_size=32, verbose=1)

loss, acc = transformer_model.evaluate(X_test_dl, y_test_dl, verbose=0)
print(f"\nTransformer Encoder Test Accuracy: {acc:.4f}")


Epoch 1/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 7ms/step - accuracy: 0.9087 - loss: 0.3425 - val_accuracy: 0.9097 - val_loss: 0.2660
Epoch 2/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 5ms/step - accuracy: 0.9110 - loss: 0.2575 - val_accuracy: 0.9099 - val_loss: 0.2613
Epoch 3/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 5ms/step - accuracy: 0.9111 - loss: 0.2577 - val_accuracy: 0.9098 - val_loss: 0.2552
Epoch 4/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 5ms/step - accuracy: 0.9112 - loss: 0.2573 - val_accuracy: 0.9095 - val_loss: 0.2599
Epoch 5/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 6ms/step - accuracy: 0.9112 - loss: 0.2560 - val_accuracy: 0.9091 - val_loss: 0.2543
Epoch 6/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 5ms/step - accuracy: 0.9115 - loss: 0.2548 - val_accuracy: 0.9100 - val_loss: 0.2548
Epoch 7/10

In [17]:
# ============================================
# 7. Deep Learning - CNN-LSTM
# ============================================

inputs = layers.Input(shape=(input_shape, 1))
x = layers.Conv1D(64, kernel_size=3, activation="relu")(inputs)
x = layers.MaxPooling1D(pool_size=2)(x)
x = layers.LSTM(64)(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(y_train_dl.shape[1], activation="softmax")(x)

cnn_lstm_model = models.Model(inputs, outputs)
cnn_lstm_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Reshape input for CNN-LSTM
X_train_dl_cnn = np.expand_dims(X_train_dl, axis=-1)
X_test_dl_cnn = np.expand_dims(X_test_dl, axis=-1)

history = cnn_lstm_model.fit(X_train_dl_cnn, y_train_dl, validation_split=0.2, epochs=10, batch_size=32, verbose=1)

loss, acc = cnn_lstm_model.evaluate(X_test_dl_cnn, y_test_dl, verbose=0)
print(f"\nCNN-LSTM Test Accuracy: {acc:.4f}")


Epoch 1/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 8ms/step - accuracy: 0.9544 - loss: 0.1415 - val_accuracy: 0.9776 - val_loss: 0.0465
Epoch 2/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 7ms/step - accuracy: 0.9800 - loss: 0.0450 - val_accuracy: 0.9914 - val_loss: 0.0432
Epoch 3/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 8ms/step - accuracy: 0.9856 - loss: 0.0389 - val_accuracy: 0.9903 - val_loss: 0.0322
Epoch 4/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 7ms/step - accuracy: 0.9904 - loss: 0.0307 - val_accuracy: 0.9934 - val_loss: 0.0269
Epoch 5/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 8ms/step - accuracy: 0.9920 - loss: 0.0282 - val_accuracy: 0.9925 - val_loss: 0.0295
Epoch 6/10
[1m7494/7494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 7ms/step - accuracy: 0.9926 - loss: 0.0257 - val_accuracy: 0.9923 - val_loss: 0.0257
Epoch 7/10