# Feature Extraction

We use this script to extract the features from the raw dataset, then feed them into Neural Network. 
Notice that we take all data into consideration.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Importing the data
col_index = [1,2,3,4,6,7,9,10,12,13,15,16]
col_names = ['re_time','re_ID','re_x','re_y','tr_time','tr_ID','tr_x','tr_y','tr_vx','tr_vy','RSSI','Label']
data = pd.read_csv('dataset/attack1withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk1 = data.dropna(axis = 0, how = 'any')

data = pd.read_csv('dataset/attack2withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk2 = data.dropna(axis = 0, how = 'any')

data = pd.read_csv('dataset/attack4withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk4 = data.dropna(axis = 0, how = 'any')

data = pd.read_csv('dataset/attack8withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk8 = data.dropna(axis = 0, how = 'any')

data = pd.read_csv('dataset/attack16withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk16 = data.dropna(axis = 0, how = 'any')

## Analysis of Attack 1
For the attack 1, the position of sender is always \[5560, 5820\], the parameter is constant. 

In [3]:
recevier_ID = np.unique(np.array(data_atk1.iloc[:,1]))
sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))

In [4]:
# we present an attack, notice that the location does not change, but the velocity is non-zeron
t = data_atk1.loc[(data_atk1['Label']==1)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID']==t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
tt.loc[tt['re_ID']==t_recevier_ID[0]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
382279,21601,10,3584.4,5811.5,21601,49,5560.0,5820.0,-2.0983,-31.609,4e-09,1
382297,21602,10,3584.3,5843.0,21602,49,5560.0,5820.0,-2.0985,-31.612,5.15e-08,1
382313,21603,10,3584.3,5874.4,21603,49,5560.0,5820.0,-1.2382,-31.693,5.9e-09,1
382327,21604,10,3585.2,5905.8,21604,49,5560.0,5820.0,-1.2328,-31.556,3.59e-07,1
382344,21605,10,3586.8,5937.1,21605,49,5560.0,5820.0,-0.018429,-31.663,2.65e-08,1
382362,21607,10,3591.4,5999.5,21607,49,5560.0,5820.0,-0.018451,-31.701,2.12e-08,1
382373,21608,10,3594.4,6030.6,21608,49,5560.0,5820.0,-0.018436,-31.675,1.41e-09,1
382383,21609,10,3597.7,6061.6,21609,49,5560.0,5820.0,1.3579,-31.592,5.29e-09,1


## Analysis of Attack 2 
It is hard to detect since they behave normal 

In [5]:
# we first check whether for each session, the data are all attack or mix of attack and normal
def attack_check(data):
    count = 0
    label = []
    label_sender = []
    for i in range(len(sender_ID)):        
        data_recevier = data[data['tr_ID'] == sender_ID[i]]
        sender_receiver = np.unique(np.array(data_recevier.iloc[:,1]))
        m = np.unique(np.array(data_recevier.iloc[:,-1]))
        if len(m)>1:
            print('Notice that the sender', sender_ID[i], 'broadcast both attack and normal messages')
        label_sender.append(m)
        count += len(sender_receiver)
        for j in range(len(sender_receiver)):
            data_this_session = data_recevier[data_recevier['re_ID']==sender_receiver[j]]
            n = np.unique(np.array(data_this_session.iloc[:,-1]))
            if len(n)>1:
                print('Notice that the communication between the sender', sender_ID[i], 'and the receiver',
                      sender_receiver[j], 'has both attack and normal messages')
            label.append(n)
    print('There are', count, 'sessions of communication between all sender and all receiver')
    return label, label_sender

In [6]:
t = data_atk2.loc[(data_atk2['Label']==2)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID']==t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
tt.loc[tt['re_ID']==t_recevier_ID[0]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
378811,21601,10,3584.4,5811.5,21601,49,3846.8,5836.8,-2.0983,-31.609,4.0025e-09,2
378829,21602,10,3584.3,5843.0,21602,49,3844.7,5805.2,-2.0985,-31.612,5.1538e-08,2
378845,21603,10,3584.3,5874.4,21603,49,3842.8,5773.5,-1.2382,-31.693,5.8982e-09,2
378859,21604,10,3585.2,5905.8,21604,49,3841.5,5741.8,-1.2328,-31.556,3.5854e-07,2
378876,21605,10,3586.8,5937.1,21605,49,3841.2,5710.1,-0.018429,-31.663,2.65e-08,2
378894,21607,10,3591.4,5999.5,21607,49,3841.2,5646.7,-0.018451,-31.701,2.1196e-08,2
378905,21608,10,3594.4,6030.6,21608,49,3841.2,5615.0,-0.018436,-31.675,1.4116e-09,2
378915,21609,10,3597.7,6061.6,21609,49,3841.7,5583.3,1.3579,-31.592,5.2855e-09,2


## Analysis of Attack 3

In [7]:
t = data_atk4.loc[(data_atk4['Label']==4)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID']==t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
tt.loc[tt['re_ID']==t_recevier_ID[0]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
372518,21601,10,3584.4,5811.5,21601,49,19610.0,15902.0,-2.0983,-31.609,4.7607e-09,4
372535,21602,10,3584.3,5843.0,21602,49,967.35,20525.0,-2.0985,-31.612,4.3863e-08,4
372564,21604,10,3585.2,5905.8,21604,49,1892.3,16564.0,-1.2328,-31.556,3.5402e-07,4
372581,21605,10,3586.8,5937.1,21605,49,9285.9,7930.7,-0.018429,-31.663,2.6421e-08,4
372600,21607,10,3591.4,5999.5,21607,49,2521.2,14296.0,-0.018451,-31.701,2.1048e-08,4
372611,21608,10,3594.4,6030.6,21608,49,18771.0,7168.0,-0.018436,-31.675,1.4334e-09,4
372621,21609,10,3597.7,6061.6,21609,49,14488.0,1701.4,1.3579,-31.592,4.5266e-09,4


## Analysis of Attack 4

In [8]:
t = data_atk8.loc[(data_atk8['Label']==8)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID']==t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
tt.loc[tt['re_ID']==t_recevier_ID[0]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
376868,21601,10,3584.4,5811.5,21601,49,3837.8,5704.2,-2.0983,-31.609,5.4779e-09,8
376886,21602,10,3584.3,5843.0,21602,49,3443.7,5916.5,-2.0985,-31.612,4.8415e-08,8
376901,21603,10,3584.3,5874.4,21603,49,3537.1,5876.8,-1.2382,-31.693,5.9954e-09,8
376913,21604,10,3585.2,5905.8,21604,49,3579.7,6034.2,-1.2328,-31.556,3.7467e-07,8
376930,21605,10,3586.8,5937.1,21605,49,3742.2,5808.9,-0.018429,-31.663,2.5045e-08,8
376949,21607,10,3591.4,5999.5,21607,49,3699.3,5850.9,-0.018451,-31.701,2.0345e-08,8
376960,21608,10,3594.4,6030.6,21608,49,3335.5,6004.7,-0.018436,-31.675,1.6087e-09,8
376970,21609,10,3597.7,6061.6,21609,49,3726.6,5726.4,1.3579,-31.592,5.0486e-09,8


## Analysis of Attack 5

In [9]:
t = data_atk16.loc[(data_atk16['Label']==16)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID']==t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
tt.loc[tt['re_ID']==t_recevier_ID[0]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
374977,21601,10,3584.4,5811.5,21601,49,3596.8,5986.8,-2.0983,-31.609,4.6176e-09,16
374995,21602,10,3584.3,5843.0,21602,49,3594.7,5955.2,-2.0985,-31.612,4.9973e-08,16
375010,21603,10,3584.3,5874.4,21603,49,3592.8,5923.5,-1.2382,-31.693,6.2617e-09,16
375023,21604,10,3585.2,5905.8,21604,49,3592.8,5923.5,-1.2328,-31.556,3.5969e-07,16
375040,21605,10,3586.8,5937.1,21605,49,3592.8,5923.5,-0.018429,-31.663,2.6213e-08,16
375059,21607,10,3591.4,5999.5,21607,49,3592.8,5923.5,-0.018451,-31.701,1.9388e-08,16
375070,21608,10,3594.4,6030.6,21608,49,3592.8,5923.5,-0.018436,-31.675,1.6214e-09,16
375080,21609,10,3597.7,6061.6,21609,49,3592.8,5923.5,1.3579,-31.592,5.0201e-09,16


## Feature 1: Location Plausibility Check 
Feature 1 is used to indicate the realibility of the location of sender. Intuitionly, the location should be consistent with the motion. We consider the previous location and its velocity, predict the current location, compare the real location with the predicted location, then obtain the score of location plausibility. 

The scoure is in the range  [0, 4], such that for x and y direction range in [0,2] individually. If the real location is within the range in 95% confidence of predicted location, we score it as 0, from 95-99%, we score it as 1, when the range is out of 99%, we score it as 2. 

That is, with lower score, 

In [10]:
def location_plausibility(receiver_of_sender):
    #x_95 = [-5.6983, 5.2265]
    #x_99 = [-7.1795, 7.7077]
    #y_95 = [-8.1203, 8.0501]
    #y_99 = [-12.1629, 12.0927]
    
    x_95 = [-10, 10]
    x_99 = [-18, 18]
    y_95 = [-10, 10]
    y_99 = [-18, 18]
    
    score = []
    length = receiver_of_sender.shape[0]
    # for the start of the series, we think it is two. 
    score.append(2) 
    if length <=1:
        return score
    for k in range(length-1):
            time_interval = (receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time'])

            x_pre_95_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [0] * time_interval * 0.1)
            x_pre_95_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [1] * time_interval * 0.1)
            x_pre_99_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [0] * time_interval * 0.1)
            x_pre_99_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [1] * time_interval * 0.1)

            y_pre_95_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [0] * time_interval * 0.1)
            y_pre_95_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1] * time_interval * 0.1)
            y_pre_99_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_99 [0] * time_interval * 0.1)
            y_pre_99_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1] * time_interval * 0.1)

            t_x = 0
            t_y = 0
            
            #print(receiver_of_sender.iloc[k+1]['tr_x'])
            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_95_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_95_up:
                t_x = 1

            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_99_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_99_up:
                t_x = 2

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_95_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_95_up:
                t_y = 1

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_99_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_99_up:
                t_y = 2  
                
            score.append(t_x+t_y)
    return np.mean(score)

## Feature 2: Movement plausibility check


In [11]:
def movement_plausibility(receiver_of_sender):
    flag  = 0.
    length = receiver_of_sender.shape[0]
    
    if length <=1:
        flag = np.random.randint(2)
        return flag
    
    x_placement = receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x']
    y_placement = receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y']

    average_velocity_x = np.average(receiver_of_sender['tr_vx'].values)
    average_velocity_y = np.average(receiver_of_sender['tr_vy'].values)
    if(x_placement==0 and y_placement==0):
        if (average_velocity_x!=0 or average_velocity_y!=0):
            flag = 1.

    return flag

In [13]:
def quantititative_information(receiver_of_sender):
    feature3 = []
    feature4 = []
    feature5 = []
    feature6 = []
    
    length = receiver_of_sender.shape[0]
    
    if length == 1:
        feature3 = [0]
        feature4 = [0]
        feature5 = [0]
        feature6 = [0]
        return feature3, feature4, feature5, feature6
    
    time_interval = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    v_bar_dist_x = (receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x'])/(time_interval)
    v_bar_dist_y = (receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y'])/(time_interval)
    
    # Calculate the v_velocity
    v_bar_velocity_all_x = 0
    v_bar_velocity_all_y = 0
    for i in range(length-1):
        delta_t = receiver_of_sender.iloc[i+1]['re_time'] - receiver_of_sender.iloc[i]['re_time']
        v_bar_velocity_all_x = v_bar_velocity_all_x + receiver_of_sender.iloc[i]['tr_vx'] * delta_t
        v_bar_velocity_all_y = v_bar_velocity_all_y + receiver_of_sender.iloc[i]['tr_vy'] * delta_t
    v_bar_velo_x = v_bar_velocity_all_x/time_interval
    v_bar_velo_y = v_bar_velocity_all_y/time_interval
    
    v_measure_x = np.abs(v_bar_dist_x - v_bar_velo_x)
    v_measure_y = np.abs(v_bar_dist_y - v_bar_velo_y)
    v_mag = np.linalg.norm([v_measure_x, v_measure_y])

    v_total = np.abs(np.linalg.norm([v_bar_dist_x*time_interval, v_bar_dist_y*time_interval]) 
                     - np.linalg.norm([v_bar_velocity_all_x, v_bar_velocity_all_y]))
    
    feature3 = v_measure_x
    feature4 = v_measure_y
    feature5 = v_mag 
    feature6 = v_total
    return feature3, feature4, feature5, feature6


In [12]:
def distance_check(receiver_of_sender, threshold):
    length = receiver_of_sender.shape[0]
    distance_score = []
    for i in range(length):
        distance_score.append(0)
        x = receiver_of_sender.iloc[i]['tr_x'] - receiver_of_sender.iloc[i]['re_x']
        y = receiver_of_sender.iloc[i]['tr_y'] - receiver_of_sender.iloc[i]['re_y']
        distance = np.linalg.norm([x,y])
        if distance >= 800:
            distance_score[i] = 1
    return np.mean(distance_score)

In [14]:
t = data_atk16.loc[(data_atk16['Label']==16)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID'] == t_sender_ID[0]]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
ttt = tt.loc[tt['re_ID']==t_recevier_ID[0]]
a = location_plausibility(ttt)
b = movement_plausibility(ttt)
c,d,e,f = quantititative_information(ttt)
g = distance_check(ttt, 800)
print(a,b,c,d,e,f,g)

1.75 0.0 0.34269312499999993 23.733999999999977 23.736473928912044 189.83548571012344 0.0


In [15]:
def add_feature_vectors(data):
    
    sender_ID = np.unique(np.array(data.iloc[:,5]))
    number_id_tr_s = len(sender_ID)

    for i in range(number_id_tr_s):
        this = data.loc[(data['tr_ID'] == sender_ID[i])]
        this_recevier_ID = np.unique(np.array(this.iloc[:,1]))
        number_id_re_s = len(this_recevier_ID)
        for j in range(number_id_re_s):
            b = this.loc[this['re_ID'] == this_recevier_ID[j]]
            
            feature_1 = location_plausibility(b)
            feature_2 = movement_plausibility(b)
            feature_3,feature_4,feature_5,feature_6 = quantititative_information(b)
            feature_7 = distance_check(b, 800)
            
            b = b.head(1)
            
            b['feature_1'] = feature_1
            b['feature_2'] = feature_2
            b['feature_3'] = feature_3
            b['feature_4'] = feature_4
            b['feature_5'] = feature_5
            b['feature_6'] = feature_6
            b['feature_7'] = feature_7
            
            if i==0 and j==0:
                feature_vector = b
            else:
                feature_vector = pd.concat([feature_vector, b])
    return feature_vector

In [16]:
#export dataframe to a csv file
import os
def put_csv(data, attack_type):
    data_with_features = add_feature_vectors(data)
    outname = './attack'+ str(attack_type) +'with7FeatureVector.csv'
    outdir = './dataset'
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    fullname = './dataset/attack'+ str(attack_type) +'with7FeatureVector.csv'   
    data_with_features.to_csv(fullname, encoding='utf-8', index=False)

In [17]:
put_csv(data_atk1, 1)

In [18]:
put_csv(data_atk2, 2)

In [19]:
put_csv(data_atk4, 4)

In [20]:
put_csv(data_atk8, 8)

In [21]:
put_csv(data_atk16, 16)