# Feature Extraction

We use this module to extract the features we used to feed into NN. Remember, take all the data into consideration.

In [277]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [278]:
# Importing the data
col_index = [1,2,3,4,6,7,9,10,12,13,15,16]
col_names = ['re_time','re_ID','re_x','re_y','tr_time','tr_ID','tr_x','tr_y','tr_vx','tr_vy','RSSI','Label']
data = pd.read_csv('dataset/attack1withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk1 = data.dropna(axis = 0, how = 'any')
data_atk1 = data_atk1.head(100000)

In [279]:
# Extract the sender ID and receiver ID and get their information. 
recevier_ID = np.unique(np.array(data_atk1.iloc[:,1]))
sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
sender_ID

array([   7,   13,   19,   25,   31,   37,   43,   49,   55,   61,   67,
         73,   79,   85,   91,   97,  103,  115,  121,  127,  133,  139,
        145,  151,  157,  163,  169,  175,  181,  187,  193,  199,  205,
        211,  217,  223,  229,  235,  241,  247,  253,  259,  265,  271,
        277,  283,  289,  295,  301,  307,  313,  319,  325,  331,  337,
        343,  349,  355,  361,  367,  373,  379,  385,  391,  397,  403,
        409,  415,  421,  433,  439,  445,  451,  457,  463,  469,  475,
        481,  487,  493,  499,  505,  511,  523,  529,  535,  541,  547,
        553,  559,  565,  571,  577,  583,  589,  595,  601,  607,  613,
        619,  625,  631,  637,  643,  649,  655,  661,  667,  673,  679,
        685,  691,  697,  703,  709,  715,  721,  727,  733,  739,  745,
        751,  757,  763,  769,  775,  781,  787,  793,  799,  805,  811,
        817,  823,  835,  841,  847,  853,  859,  865,  871,  877,  883,
        889,  895,  907,  913,  919,  925,  931,  9

## Feature 1: Location Plausibility Check 
Give a confidential score about the realibility of location. The scoure is in the range  [0, 4], where for x and y direction range in [0,2] individually.
1. for each unique transmitter ID, create its metrics.

In [280]:
def location_plausibility(receiver_of_sender):
    x_95 = [-4.6983, 5.2265]
    x_99 = [-7.1795, 7.7077]
    y_95 = [-8.1203, 8.0501]
    y_99 = [-12.1629, 12.0927]
    score = []
    length = receiver_of_sender.shape[0]
    score.append(0)
    if length <=1:
        return score
    for k in range(length-1):
            time_interval = (receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time'])

            x_pre_95_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [0]*time_interval*0.1)
            x_pre_95_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [1]*time_interval*0.1)
            x_pre_99_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [0]*time_interval*0.1)
            x_pre_99_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [1]*time_interval*0.1)

            y_pre_95_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [0]*time_interval*0.1)
            y_pre_95_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1]*time_interval*0.1)
            y_pre_99_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_99 [0]*time_interval*0.1)
            y_pre_99_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1]*time_interval*0.1)

            t_x = 0
            t_y = 0
            #print(receiver_of_sender.iloc[k+1]['tr_x'])
            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_95_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_95_up:
                t_x = 1

            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_99_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_99_up:
                t_x = 2

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_95_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_95_up:
                t_y = 1

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_99_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_99_up:
                t_y = 2  

            score.append(t_x+t_y)
    return score
def add_feature_1(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    number_id_s = len(sender_ID)

    for i in range(number_id_s):
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        feature_1 = location_plausibility(b)
        if(i==0):
            data_atk1["feature_1"] = pd.Series(feature_1, index=b.index)
        else:
            data_atk1["feature_1"].update(pd.Series(feature_1, index=b.index))
    return data_atk1
        

## Feature 2: Movement plausibility check


This feature is a plausibility check for constant location

In [281]:
def movement_plausibility(receiver_of_sender):
    score = []
    flag  = 0
    length = receiver_of_sender.shape[0]
    score.append(1)
    if length <=1:
        return score
    x_placement = receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x']
    y_placement = receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y']
    #time_duration = (receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']) * 0.1
    #average_velocity_x = x_placement/time_duration
    #average_velocity_y = y_placement/time_duration
    average_velocity_x = np.average(receiver_of_sender['tr_vx'].values)
    average_velocity_y = np.average(receiver_of_sender['tr_vy'].values)
    if(x_placement==0 and y_placement==0 and average_velocity_x!=0 and average_velocity_y!=0):
        flag = 1
    else:
        flag = 0
    for i in range(length -1):
        score.append(flag)
    return score

def add_feature_2(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    for i in range(s_number_id):
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        feature_2 = movement_plausibility(b)
        if(i==0):
            data_atk1["feature_2"] = pd.Series(feature_2, index=b.index)
        else:
            data_atk1["feature_2"].update(pd.Series(feature_2, index=b.index))
    return data_atk1

In [282]:
#data_atk1 = data_atk1[data_atk1['tr_ID']==2761]
#np.average(data_atk1['tr_vx'].values)

In [283]:
#b = data_atk1.loc[(data_atk1['tr_ID']==2761)]
#feature_2 = movement_plausibility(b)


## Feature 3,4,5,6: Quantititative Information: 

Feature 3,4,5,6 are not plausibilitu checks, in effect, they are numerical features describing the behavior of the vehicle.

In [284]:
#function : feature of v_bar_distance of x
def quantititative_information_3(receiver_of_sender):
    v_bar_dist_list =[]
    length = receiver_of_sender.shape[0]
    if length <=1:
        return v_bar_dist_list.append(receiver_of_sender.iloc[0]['tr_x'])
    time_interval = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    v_bar_dist = (receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x'])/(time_interval*0.1)
    
    for k in range(length):
        v_bar_dist_list.append(v_bar_dist)    
    
    return v_bar_dist_list
def add_feature_3(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    for i in range(s_number_id):
        #print(sender_ID[i])
        #if(sender_ID[i]==97): break
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        feature_3 = quantititative_information_3(b)
        if(i==0):
            data_atk1["feature_3"] = pd.Series(feature_3, index=b.index)
        else:
            data_atk1["feature_3"].update(pd.Series(feature_3, index=b.index))
    return data_atk1
##################################################################################################################
#function : feature of  v_bar_distance of y
def quantititative_information_4(receiver_of_sender):
    v_bar_dist_list =[]
    length = receiver_of_sender.shape[0]
    if length <=1:
        return v_bar_dist_list.append(receiver_of_sender.iloc[0]['tr_x'])
    time_interval = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    v_bar_dist = (receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y'])/(time_interval*0.1)
    for k in range(length):
        v_bar_dist_list.append(v_bar_dist)    
    
    return v_bar_dist_list
def add_feature_4(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    for i in range(s_number_id):
        #print(sender_ID[i])
        #if(sender_ID[i]==97): break
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        feature_4 = quantititative_information_4(b)
        if(i==0):
            data_atk1["feature_4"] = pd.Series(feature_4, index=b.index)
        else:
            data_atk1["feature_4"].update(pd.Series(feature_4, index=b.index))
    return data_atk1


In [285]:
#function : feature of v_bar_veclocity of x
def quantititative_information_5(receiver_of_sender):
    v_bar_veclocity_list =[]
    length = receiver_of_sender.shape[0]
    #v_bar_veclocity_list.append(recevier_of_sender.iloc[0]['tr_vx'])
    if length <=1:
        return v_bar_veclocity_list.append(receiver_of_sender.iloc[0]['tr_vx'])
    vt_list = []
    total_time = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    for k in range(length-1):    
        time_interval = receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time']
        v_j           = receiver_of_sender.iloc[k]['tr_vx']
        vt            = v_j * time_interval
        vt_list.append(vt)
    v_bar = sum(vt_list)/total_time
    for k in range(length):
        v_bar_veclocity_list.append(v_bar)
   
    return v_bar_veclocity_list
def add_feature_5(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    for i in range(s_number_id):
        #print("sender ",sender_ID[i])
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        recevier_ID = np.unique(np.array(b.iloc[:,1]))
        for j in range(len(recevier_ID)):
            #print("receiver",recevier_ID[j])
            c = b.loc[(b['re_ID']==recevier_ID[j])]
            feature_5 = quantititative_information_5(c)
            if(i==0):
                data_atk1["feature_5"] = pd.Series(feature_5, index=c.index)
            else:
                data_atk1["feature_5"].update(pd.Series(feature_5, index=c.index))
    return data_atk1
###################################################################################################################
#function : feature of v_bar_veclocity of y   
def quantititative_information_6(receiver_of_sender):
    v_bar_veclocity_list =[]
    length = receiver_of_sender.shape[0]
    #v_bar_veclocity_list.append(recevier_of_sender.iloc[0]['tr_vx'])
    if length <=1:
        return v_bar_veclocity_list.append(receiver_of_sender.iloc[0]['tr_vy'])
    vt_list = []
    #print('--------------------')
    total_time = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    for k in range(length-1):    
        time_interval = receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time']
        v_j           = receiver_of_sender.iloc[k]['tr_vy']
        vt            = v_j * time_interval
        vt_list.append(vt)
    v_bar = sum(vt_list)/total_time
    for k in range(length):
        v_bar_veclocity_list.append(v_bar)
    #print("----------------",v_bar_veclocity_list)
    return v_bar_veclocity_list
def add_feature_6(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    

    for i in range(s_number_id):
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        recevier_ID = np.unique(np.array(b.iloc[:,1]))
        #print("sender \n ",sender_ID[i])
        #print("b \n",b)
        for j in range(len(recevier_ID)):
            #print("receiver \n",recevier_ID[j])
            c = b.loc[(b['re_ID']==recevier_ID[j])]
            feature_6 = quantititative_information_6(c)
            if(i==0):
                data_atk1["feature_6"] = pd.Series(feature_6, index=c.index)
            else:
                data_atk1["feature_6"].update(pd.Series(feature_6, index=c.index))
            #print("c",c)    
    return data_atk1

In [286]:
#print("sender id \n",sender_ID)
#print("receiver id \n",recevier_ID)
add_feature_6(data_atk1)

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,feature_6
0,21606,562,5607.3,5965.1,21606,253,5367.5,5930.1,1.022900,1.994000,1.420000e-08,0,
1,21608,562,5594.2,5973.5,21608,547,5373.5,5934.6,-6.183700,-12.053000,3.120000e-09,0,-12.053000
2,21609,562,5583.8,5980.2,21609,547,5368.1,5924.2,-4.662800,-9.088500,2.260000e-09,0,-12.053000
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1,-2.843200
4,21893,3166,3642.0,5183.0,21893,2929,3781.6,5256.0,-11.941000,-3.984800,3.150000e-09,0,-3.479507
6,21893,3166,3642.0,5183.0,21893,2791,4168.2,5286.1,-12.956000,-2.708300,2.940000e-09,0,
7,21893,3166,3642.0,5183.0,21893,3073,3609.1,5420.4,-4.221500,36.586000,2.070000e-08,0,36.718412
8,21893,3166,3641.4,5186.5,21893,1951,3485.7,5192.2,-0.424860,-0.083085,3.580000e-09,0,-0.010386
9,21893,3166,3640.9,5190.1,21893,3097,5560.0,5820.0,-0.023292,-40.018000,1.580000e-09,1,-39.854438
10,21893,3166,3640.9,5190.1,21893,3043,3596.1,5514.8,-3.297800,40.256000,1.240000e-08,0,40.243286


## Feature 7: Distance between transmitter and receiver

This feature show the distance between tranitter and receiver, in our case, we set a threshold which equals 800 meters

In [287]:
#add a distance feature 7


def distance_7(receiver_of_sender):
    distance_list = []
    length = receiver_of_sender.shape[0]
    #print("length = ",length)
    for i in range(length):    
        distance = np.linalg.norm([receiver_of_sender.iloc[i]['re_x']-receiver_of_sender.iloc[i]['tr_x'],receiver_of_sender.iloc[i]['re_y']-receiver_of_sender.iloc[i]['tr_y']])
    
        if (distance >800):
            distance_list.append(1)
        else:
            distance_list.append(0)

        #print(distance)
        #distance_list.append(distance)
    return distance_list

def add_feature_7(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    s_number_id = len(sender_ID)
    for i in range(s_number_id):
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        feature_7 = distance_7(b)
        if(i==0):
            data_atk1["feature_7"] = pd.Series(feature_7, index=b.index)
        else:
            data_atk1["feature_7"].update(pd.Series(feature_7, index=b.index))
    return data_atk1

In [288]:
add_feature_1(data_atk1)
add_feature_2(data_atk1)
add_feature_3(data_atk1)
add_feature_4(data_atk1)


  


Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,feature_6,feature_1,feature_2,feature_3,feature_4
0,21606,562,5607.3,5965.1,21606,253,5367.5,5930.1,1.022900,1.994000,1.420000e-08,0,,0.0,1.0,8.666667,53.666667
1,21608,562,5594.2,5973.5,21608,547,5373.5,5934.6,-6.183700,-12.053000,3.120000e-09,0,-12.053000,0.0,1.0,-40.571429,-39.428571
2,21609,562,5583.8,5980.2,21609,547,5368.1,5924.2,-4.662800,-9.088500,2.260000e-09,0,-12.053000,4.0,0.0,-40.571429,-39.428571
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1,-2.843200,0.0,1.0,-0.000000,-0.000000
4,21893,3166,3642.0,5183.0,21893,2929,3781.6,5256.0,-11.941000,-3.984800,3.150000e-09,0,-3.479507,0.0,1.0,-94.000000,-22.343750
6,21893,3166,3642.0,5183.0,21893,2791,4168.2,5286.1,-12.956000,-2.708300,2.940000e-09,0,,0.0,1.0,-130.916667,-15.500000
7,21893,3166,3642.0,5183.0,21893,3073,3609.1,5420.4,-4.221500,36.586000,2.070000e-08,0,36.718412,0.0,1.0,-7.764706,367.176471
8,21893,3166,3641.4,5186.5,21893,1951,3485.7,5192.2,-0.424860,-0.083085,3.580000e-09,0,-0.010386,0.0,1.0,-99.479167,-11.750000
9,21893,3166,3640.9,5190.1,21893,3097,5560.0,5820.0,-0.023292,-40.018000,1.580000e-09,1,-39.854438,0.0,1.0,0.000000,0.000000
10,21893,3166,3640.9,5190.1,21893,3043,3596.1,5514.8,-3.297800,40.256000,1.240000e-08,0,40.243286,0.0,1.0,0.071429,402.928571


In [289]:
add_feature_5(data_atk1)
add_feature_6(data_atk1)
add_feature_7(data_atk1)

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,feature_6,feature_1,feature_2,feature_3,feature_4,feature_5,feature_7
0,21606,562,5607.3,5965.1,21606,253,5367.5,5930.1,1.022900,1.994000,1.420000e-08,0,,0.0,1.0,8.666667,53.666667,,0.0
1,21608,562,5594.2,5973.5,21608,547,5373.5,5934.6,-6.183700,-12.053000,3.120000e-09,0,-12.053000,0.0,1.0,-40.571429,-39.428571,-6.183700,0.0
2,21609,562,5583.8,5980.2,21609,547,5368.1,5924.2,-4.662800,-9.088500,2.260000e-09,0,-12.053000,4.0,0.0,-40.571429,-39.428571,-6.183700,0.0
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1,-2.843200,0.0,1.0,-0.000000,-0.000000,-13.608000,1.0
4,21893,3166,3642.0,5183.0,21893,2929,3781.6,5256.0,-11.941000,-3.984800,3.150000e-09,0,-3.479507,0.0,1.0,-94.000000,-22.343750,-12.543067,0.0
6,21893,3166,3642.0,5183.0,21893,2791,4168.2,5286.1,-12.956000,-2.708300,2.940000e-09,0,,0.0,1.0,-130.916667,-15.500000,,0.0
7,21893,3166,3642.0,5183.0,21893,3073,3609.1,5420.4,-4.221500,36.586000,2.070000e-08,0,36.718412,0.0,1.0,-7.764706,367.176471,-1.073500,0.0
8,21893,3166,3641.4,5186.5,21893,1951,3485.7,5192.2,-0.424860,-0.083085,3.580000e-09,0,-0.010386,0.0,1.0,-99.479167,-11.750000,-0.053108,0.0
9,21893,3166,3640.9,5190.1,21893,3097,5560.0,5820.0,-0.023292,-40.018000,1.580000e-09,1,-39.854438,0.0,1.0,0.000000,0.000000,2.841806,1.0
10,21893,3166,3640.9,5190.1,21893,3043,3596.1,5514.8,-3.297800,40.256000,1.240000e-08,0,40.243286,0.0,1.0,0.071429,402.928571,-0.429768,0.0


In [290]:
#export dataframe to a csv file
import os

outname = './attack1withlabels_7feature.csv'

outdir = './dataset'
if not os.path.exists(outdir):
    os.mkdir(outdir)
fullname = os.path.join(outdir, outname)    
data_atk1.to_csv("./dataset/attack1withlabels_7feature.csv", encoding='utf-8', index=False)
data_atk1

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,feature_6,feature_1,feature_2,feature_3,feature_4,feature_5,feature_7
0,21606,562,5607.3,5965.1,21606,253,5367.5,5930.1,1.022900,1.994000,1.420000e-08,0,,0.0,1.0,8.666667,53.666667,,0.0
1,21608,562,5594.2,5973.5,21608,547,5373.5,5934.6,-6.183700,-12.053000,3.120000e-09,0,-12.053000,0.0,1.0,-40.571429,-39.428571,-6.183700,0.0
2,21609,562,5583.8,5980.2,21609,547,5368.1,5924.2,-4.662800,-9.088500,2.260000e-09,0,-12.053000,4.0,0.0,-40.571429,-39.428571,-6.183700,0.0
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1,-2.843200,0.0,1.0,-0.000000,-0.000000,-13.608000,1.0
4,21893,3166,3642.0,5183.0,21893,2929,3781.6,5256.0,-11.941000,-3.984800,3.150000e-09,0,-3.479507,0.0,1.0,-94.000000,-22.343750,-12.543067,0.0
6,21893,3166,3642.0,5183.0,21893,2791,4168.2,5286.1,-12.956000,-2.708300,2.940000e-09,0,,0.0,1.0,-130.916667,-15.500000,,0.0
7,21893,3166,3642.0,5183.0,21893,3073,3609.1,5420.4,-4.221500,36.586000,2.070000e-08,0,36.718412,0.0,1.0,-7.764706,367.176471,-1.073500,0.0
8,21893,3166,3641.4,5186.5,21893,1951,3485.7,5192.2,-0.424860,-0.083085,3.580000e-09,0,-0.010386,0.0,1.0,-99.479167,-11.750000,-0.053108,0.0
9,21893,3166,3640.9,5190.1,21893,3097,5560.0,5820.0,-0.023292,-40.018000,1.580000e-09,1,-39.854438,0.0,1.0,0.000000,0.000000,2.841806,1.0
10,21893,3166,3640.9,5190.1,21893,3043,3596.1,5514.8,-3.297800,40.256000,1.240000e-08,0,40.243286,0.0,1.0,0.071429,402.928571,-0.429768,0.0
