In [None]:
import pandas as pd
import numpy as np
import math
from statistics import mode
import time
import sys
import tensorflow as tf
from tensorflow import keras
import random
import matplotlib.pyplot as plt
from memory_profiler import profile
from sklearn.preprocessing import StandardScaler
import csv

In [None]:
# Get the list of available GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')

if not gpus:
    print("No GPUs found. TensorFlow is using CPU.")
else:
    for gpu in gpus:
        print("GPU device name:", gpu.name)

In [None]:
# read input combined dataset for DQN training
sdata = pd.read_csv('DQN_Dataset_Sample/DQN_Training_Data_Sample/CombinedAttackData.csv',index_col=0)
# Show input data
sdata

In [None]:
# Show input data columns
sdata.columns

In [None]:
#Calculate lower bound and upper bound values for average speed at each instant
sdata['ASLB'] = sdata['TSmean']-sdata['TSstd']
sdata['ASUB'] = sdata['TSmean']+sdata['TSstd']
#Calculate difference of two types of calculated distances 
sdata['DisDiff'] = abs(abs(sdata['disCover'])- abs(sdata['SADis']))

In [None]:
#Drop additional columns from input data which is not required for further processing
sdata = sdata.drop(columns=['rTotalSpeed', 'sxspeed', 'syspeed',  'syacc', 'sTotalAcc', 'disCover','SADis', 'TSmean', 'TSstd']).reset_index(drop=True)
sdata

In [None]:
# standardize the data # sdata
std_columns = ['sTotalSpeed','sxacc', 'beaconRate', 'rDensity', 'DisDiff', 'rAvgSpeed']

# Extract the columns to be standardized
data_to_standardize = sdata[std_columns]

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the data and transform the data
scaled_data = scaler.fit_transform(data_to_standardize)

# Create a new DataFrame with the standardized data
df_standardized = pd.DataFrame(scaled_data, columns=[f'{col}_std' for col in std_columns])

# Concatenate the new standardized columns with the original DataFrame
sdata = pd.concat([sdata, df_standardized], axis=1)

sdata

In [None]:
# Define a custom callback to measure training time and memory usage
class TimeMemoryCallback(tf.keras.callbacks.Callback):
    def __init__(self):
        self.data = {'Epoch': [], 'Training Time': [], 'Memory RSS': [], 'Memory VMS': []}
        self.start_time = 0

    def on_train_begin(self, logs=None):
        self.start_time = time.time()

    def on_epoch_begin(self, epoch, logs=None):
        # Memory profiling for the beginning of each epoch
        memory_info = self.get_memory_usage()
        self.data['Epoch'].append(epoch)
        self.data['Memory RSS'].append(memory_info['rss'])
        self.data['Memory VMS'].append(memory_info['vms'])

    def on_train_end(self, logs=None):
        end_time = time.time()
        elapsed_time = end_time - self.start_time
        self.data['Training Time'] = [elapsed_time] * len(self.data['Epoch'])

    @staticmethod
    @profile
    def get_memory_usage():
        # This method is decorated with @profile to enable memory profiling
        import psutil

        process = psutil.Process()
        memory_info = process.memory_info()

        return {'rss': memory_info.rss / (1024 ** 2), 'vms': memory_info.vms / (1024 ** 2)}


In [None]:
# Create a DQN model
state_size = 5
action_size = 2  # Accept or Reject
learning_rate = 0.001
epsilon = 1
decoy_rate = 0.99
epsilon_min = 0.1
gamma = 0.001
epochs = 10
max_steps = 1000
# Initialize Q-table with zeros without heading
num_states = len(sdata)
Q = np.zeros((num_states, action_size))

# Create a Q-network and move it to the GPU
# if not gpus:
model = keras.Sequential([keras.layers.Dense(64, activation='relu', input_shape=(state_size,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(action_size, activation='linear')])
model.compile(optimizer=keras.optimizers.Adam(learning_rate), loss='mse', metrics=['accuracy'])

In [None]:

# Define the experience replay buffer
buffer_size = 5000
per = 0

# Initialize an empty list to store the loss values, accuracy and rewards
episode_train_loss = []
episode_test_loss = []
episode_train_accuracy_model = []
episode_train_accuracy_Calculated = []
episode_detection_rate = []
each_reward = []
episode_reward = []

#Initialize Q table to store Q values
Q_table = pd.DataFrame(columns=['State_ID', 'Q0', 'Q1'])

#Initialize dataframe for every loss and accuracy
all_loss_df = pd.DataFrame(columns=['Each_Loss', 'Each_Accuracy'])
state_id = 0
#count for epsilon decays
ecount = 0
nid = state_id
batch_size = 32 # Adjust as needed

rnum = len(sdata.receiver.unique())  #number of unique receivers

with tf.device("/GPU:0"):
  test_tp, test_fp, test_tn, test_fn = 0, 0, 0, 0
  all_results = pd.DataFrame(columns = ['epoch','TP', 'TN', 'FP', 'FN', 'Accuracy', 'Precision', 'Recall', 'Fscore', 'FPR', 'MDR'])
  overall_overhead = pd.DataFrame(columns = ['Epoch', 'Episode', 'Step','Receiver', 'Memory_Usage', 'Time_Overhead'])
  acc1, pre1, recall1, fs1, fpr1, mdr1 = 0, 0, 0, 0, 0, 0
  acc, pre, recall, fs, fpr, mdr = 0, 0, 0, 0, 0, 0
  for epoch in range(0,10):

      rcount = 0
      all_overhead = pd.DataFrame(columns = ['Epoch', 'Episode', 'Step', 'Receiver', 'Memory_Usage', 'Time_Overhead'])

      for ri in sdata.receiver.unique():
        rcount+=1
        rdata = sdata[sdata.receiver==ri].sort_values(by='newTime').reset_index(drop=True)

        experience_replay = []
        train_loss = 0
        train_accuracy = 0
        batch_count = 0
        reward_total = 0
        for si in range(len(rdata)):
          #Extract state vector
          state = [rdata.loc[si,'beaconRate_std'],rdata.loc[si,'rDensity_std'], rdata.loc[si,'sTotalSpeed_std'], abs(rdata.loc[si,'sxacc_std']), rdata.loc[si,'DisDiff_std'], rdata.loc[si,'rAvgSpeed_std']]

          #Choose action through exploration or exploitation
          if np.random.rand() < epsilon:
            action = random.choice([0,1])
            epsilon = epsilon*decoy_rate
            if epsilon >= epsilon_min:
              epsilon = epsilon*decoy_rate
           
            ecount+=1
            
          else:
            q_values = model.predict(np.array([state]))
            action = np.argmax(q_values)
            Q[nid,:] = q_values

            Q_table.loc[len(Q_table.index)] = [nid, q_values[0][0], q_values[0][1]]

          #Compute Reward
          actual_action = rdata.loc[si,'AttackerType']
          if rdata.loc[si,'ASLB'] < rdata.loc[si,'sTotalSpeed'] <= rdata.loc[si,'ASUB'] and rdata.loc[si,'DisDiff']==0:
            rflag = 1
          else:
            rflag = 0

          if rflag == 1:
            reward = 1 if action == 0 else -1
          else:
            reward = -1 if action ==  0 else 1

          if actual_action == action:
            if action == 0:
              test_tn+=1
            else:
              test_tp+=1
          else:
            if action == 0:
              test_fn+=1
            else:
              test_fp+=1

          each_reward.append(reward)
          reward_total+=reward

          #Store to experiance replay
          experience_replay.append((state, action, reward))

    #     # Batch Sampling and Q-Value Updates

          if si>=5:
            if len(experience_replay) >= 32:
              batch_size = 32
            else:
              batch_size = si-1
            batch_count += 1
            minibatch = random.sample(experience_replay, batch_size)  # Use random.sample to get a sublist of experiences
            states, actions, rewards = zip(*minibatch)

            # Convert the tuples to numpy arrays
            states = np.array(states)
            actions = np.array(actions)
            rewards = np.array(rewards)
                  
            # Calculate the Q-values for the current state
            q_values = model.predict(states)

            # Calculate the target Q-values based on the current Q-values and rewards
            target_q_values = q_values.copy()

            # Calculate the target Q-values for the minibatch
            for k in range(batch_size):
              target_q_values[k][actions[k]] = rewards[k] + gamma * np.max(q_values[k])

            # Update the Q-values using the DQN loss function

            # Create the custom callback
            custom_callback = TimeMemoryCallback()
            history = model.fit(states, target_q_values, epochs=1, verbose=0, callbacks=[custom_callback])
            train_loss += history.history['loss'][0]
            train_accuracy += history.history['accuracy'][0]
            print("batch count=", batch_count)
            print("loss and accuracy=", history.history['loss'][0],history.history['accuracy'][0])
            all_loss_df.loc[len(all_loss_df.index)] = [history.history['loss'][0],history.history['accuracy'][0]]

            episode_train_loss.append(train_loss/batch_count)
            episode_train_accuracy_Calculated.append((test_tp+test_tn)/(test_tp+test_fp+test_tn+test_fn))
            episode_train_accuracy_model.append(train_accuracy/batch_count)
            episode_detection_rate.append((test_tp)/(test_tp + test_fn))
            episode_reward.append(reward_total/batch_count)

            if (test_tp+test_fp)!=0:
              pre = test_tp/(test_tp+test_fp)
            if (test_tp+test_fn)!=0:
              recall = test_tp/(test_tp+test_fn)
            if (test_tn+test_fp)!=0:
              fpr = test_fp/(test_tn+test_fp)
            if (test_tp+test_fn)!=0:
              mdr = test_fn/(test_tp+test_fn)
              acc = (test_tp+test_tn)/(test_tp+test_fp+test_tn+test_fn)
            if pre:
              if recall:
                fs = 2/((1/pre)+(1/recall))
            all_results.loc[len(all_results.index)] = [epoch, test_tp, test_tn, test_fp, test_fn, acc, pre, recall, fs, fpr, mdr]

            # Convert the collected data to a DataFrame
            df_metrics = pd.DataFrame(custom_callback.data)

            all_overhead.loc[len(all_overhead)] = [epoch, rcount, ri, si, df_metrics['Memory RSS'], df_metrics['Training Time']]

        overall_overhead.loc[len(overall_overhead)] = [all_overhead.loc[len(all_overhead)-1,'Epoch'], all_overhead.loc[len(all_overhead)-1,'Episode'],all_overhead.loc[len(all_overhead)-1,'Step'], all_overhead.loc[len(all_overhead)-1,'Receiver'], all_overhead.loc[len(all_overhead)-1,'Memory_Usage'], all_overhead.loc[len(all_overhead)-1,'Time_Overhead']]

      model.save(f"Training_output/Trained_{epoch+1}e_103L_001DF_Model.h5")  # Save the model to a file

      print("Epoch done=", epoch)
      if (per+1)*epochs <= epoch*1000:
          per += 1
          print(round(per/10,1), '% done')



In [None]:
# Save the overhead, Q values and training results
overall_overhead.to_csv('Training_output/Training_103L_001DF_Overhead2.csv')
Q_table.to_csv('Training_output/Training_103L_001DF_QValueEach2.csv')
all_results.to_csv('Training_output/Training_103L_001DF_Results2.csv')

result_data = {'Loss': episode_train_loss, 'Model_accuracy': episode_train_accuracy_model, 'Calculated_accuracy': episode_train_accuracy_Calculated, 'Detection_rate': episode_detection_rate, 'Reward': episode_reward }
df_result = pd.DataFrame(result_data)
df_result.to_csv('Training_output/Training_103L_001DF_TrainingResults2.csv')

csv_file_path = 'Training_output/Training_103L_001DF_QTable2.csv'

#Save Q table
# Open the CSV file in write mode
with open(csv_file_path, 'w', newline='') as csv_file:
    # Create a CSV writer object
    csv_writer = csv.writer(csv_file)

    # Write each row of the array to the CSV file
    for row in Q:
        csv_writer.writerow(row)


In [None]:
import csv
# Specify the file path
csv_file_path = 'STD_Training/STD_AllAttacksTT_103L_001DF_QTable2.csv'

# Open the CSV file in write mode
with open(csv_file_path, 'w', newline='') as csv_file:
    # Create a CSV writer object
    csv_writer = csv.writer(csv_file)

    # Write each row of the array to the CSV file
    for row in Q:
        csv_writer.writerow(row)
