# Feature Extraction

We use this module to extract the features we used to feed into NN. Remember, take all the data into consideration.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [100]:
# Importing the data
col_index = [1,2,3,4,6,7,9,10,12,13,15,16]
col_names = ['re_time','re_ID','re_x','re_y','tr_time','tr_ID','tr_x','tr_y','tr_vx','tr_vy','RSSI','Label']
data_atk1 = pd.read_csv('dataset/attack1withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk1 = data_atk1.dropna(axis = 0, how = 'any')
data_atk1 = data_atk1[data_atk1['tr_ID']==2761]
data_atk1

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1
32,21894,3166,3636.5,5218.3,21894,2761,5560.0,5820.0,-13.518000,-2.824400,1.800000e-09,1
1560,21894,2914,3644.5,5222.8,21894,2761,5560.0,5820.0,-13.518000,-2.824400,4.420000e-09,1
13017,21950,3628,3642.3,5202.3,21950,2761,5560.0,5820.0,-13.681000,-1.821500,7.450000e-08,1
13044,21951,3628,3637.6,5232.2,21951,2761,5560.0,5820.0,-13.783000,-1.835000,1.790000e-08,1
13072,21952,3628,3633.0,5261.9,21952,2761,5560.0,5820.0,-13.739000,-1.596500,1.920000e-08,1
13100,21953,3628,3628.4,5291.6,21953,2761,5560.0,5820.0,-13.845000,-1.608900,3.200000e-08,1
13151,21955,3628,3620.4,5351.2,21955,2761,5560.0,5820.0,-13.769000,-1.688900,1.080000e-08,1
13181,21956,3628,3617.0,5380.9,21956,2761,5560.0,5820.0,-13.819000,-1.705900,1.870000e-08,1
13214,21957,3628,3613.6,5410.7,21957,2761,5560.0,5820.0,-13.674000,-1.841000,1.800000e-08,1


In [18]:
# Extract the sender ID and receiver ID and get their information. 
recevier_ID = np.unique(np.array(data_atk1.iloc[:,1]))
sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
sender_ID

array([   7,   13,   19,   25,   31,   37,   43,   49,   55,   61,   67,
         73,   79,   85,   91,   97,  103,  115,  121,  127,  133,  139,
        145,  151,  157,  163,  169,  175,  181,  187,  193,  199,  205,
        211,  217,  223,  229,  235,  241,  247,  253,  259,  265,  271,
        277,  283,  289,  295,  301,  307,  313,  319,  325,  331,  337,
        343,  349,  355,  361,  367,  373,  379,  385,  391,  397,  403,
        409,  415,  421,  427,  433,  439,  445,  451,  457,  463,  469,
        475,  481,  487,  493,  499,  505,  511,  517,  523,  529,  535,
        541,  547,  553,  559,  565,  571,  577,  583,  589,  595,  601,
        607,  613,  619,  625,  631,  637,  643,  649,  655,  661,  667,
        673,  679,  685,  691,  697,  703,  709,  715,  721,  727,  733,
        739,  745,  751,  757,  763,  769,  775,  781,  787,  793,  799,
        805,  811,  817,  823,  829,  835,  841,  847,  853,  859,  865,
        871,  877,  883,  889,  895,  901,  907,  9

## Feature 1: Location Plausibility Check 
Give a confidential score about the realibility of location. The scoure is in the range  [0, 4], where for x and y direction range in [0,2] individually.
1. for each unique transmitter ID, create its metrics.

In [87]:
def location_plausibility(receiver_of_sender):
    x_95 = [-4.6983, 5.2265]
    x_99 = [-7.1795, 7.7077]
    y_95 = [-8.1203, 8.0501]
    y_99 = [-12.1629, 12.0927]
    score = []
    length = receiver_of_sender.shape[0]
    score.append(0)
    if length <=1:
        return score
    for k in range(length-1):
            time_interval = (receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time'])

            x_pre_95_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [0]*time_interval*0.1)
            x_pre_95_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [1]*time_interval*0.1)
            x_pre_99_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [0]*time_interval*0.1)
            x_pre_99_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [1]*time_interval*0.1)

            y_pre_95_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [0]*time_interval*0.1)
            y_pre_95_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1]*time_interval*0.1)
            y_pre_99_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_99 [0]*time_interval*0.1)
            y_pre_99_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval*0.1 * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1]*time_interval*0.1)

            t_x = 0
            t_y = 0
            #print(receiver_of_sender.iloc[k+1]['tr_x'])
            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_95_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_95_up:
                t_x = 1

            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_99_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_99_up:
                t_x = 2

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_95_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_95_up:
                t_y = 1

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_99_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_99_up:
                t_y = 2  

            score.append(t_x+t_y)
    return score
def add_figure_1(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    number_id = len(sender_ID)
    
    for i in range(number_id):
        #print(sender_ID[i])
        #if(sender_ID[i]==97): break
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        figure_1 = location_plausibility(b)
        data_atk1['figure_1'] = pd.Series(figure_1, index=b.index)
    return data_atk1
        

In [127]:
#test
b = data_atk1.loc[(data_atk1['tr_ID']==3475)]
figure_1 = location_plausibility(b)
#figure_1

In [129]:
# to add figure 1 
c = add_figure_1(data_atk1)

In [130]:
c

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,figure_1,figure_2
3,21893,3166,3642.0,5183.0,21893,2761,5560.0,5820.0,-13.608000,-2.843200,3.790000e-09,1,0,1
32,21894,3166,3636.5,5218.3,21894,2761,5560.0,5820.0,-13.518000,-2.824400,1.800000e-09,1,4,1
1560,21894,2914,3644.5,5222.8,21894,2761,5560.0,5820.0,-13.518000,-2.824400,4.420000e-09,1,4,1
13017,21950,3628,3642.3,5202.3,21950,2761,5560.0,5820.0,-13.681000,-1.821500,7.450000e-08,1,0,1
13044,21951,3628,3637.6,5232.2,21951,2761,5560.0,5820.0,-13.783000,-1.835000,1.790000e-08,1,4,1
13072,21952,3628,3633.0,5261.9,21952,2761,5560.0,5820.0,-13.739000,-1.596500,1.920000e-08,1,4,1
13100,21953,3628,3628.4,5291.6,21953,2761,5560.0,5820.0,-13.845000,-1.608900,3.200000e-08,1,4,1
13151,21955,3628,3620.4,5351.2,21955,2761,5560.0,5820.0,-13.769000,-1.688900,1.080000e-08,1,2,1
13181,21956,3628,3617.0,5380.9,21956,2761,5560.0,5820.0,-13.819000,-1.705900,1.870000e-08,1,4,1
13214,21957,3628,3613.6,5410.7,21957,2761,5560.0,5820.0,-13.674000,-1.841000,1.800000e-08,1,4,1


## Feature 2: Movement plausibility check


This feature is a plausibility check for constant location

In [131]:
def movement_plausibility(receiver_of_sender):
    score = []
    flag  = 0
    length = receiver_of_sender.shape[0]
    score.append(1)
    if length <=1:
        return score
    x_placement = receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x']
    y_placement = receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y']
    #time_duration = (receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']) * 0.1
    #average_velocity_x = x_placement/time_duration
    #average_velocity_y = y_placement/time_duration
    average_velocity_x = np.average(receiver_of_sender['tr_vx'].values)
    average_velocity_y = np.average(receiver_of_sender['tr_vy'].values)
    if(x_placement==0 and y_placement==0 and average_velocity_x!=0 and average_velocity_y!=0):
        flag = 1
    else:
        flag = 0
    for i in range(length -1):
        score.append(flag)
    return score

def add_figure_2(data_atk1):
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    number_id = len(sender_ID)
    for i in range(number_id):
        print(sender_ID[i])
        if(sender_ID[i]==97): break
        b = data_atk1.loc[(data_atk1['tr_ID']==sender_ID[i])]
        figure_2 = movement_plausibility(b)
        data_atk1['figure_2'] = pd.Series(figure_2, index=b.index)
    return data_atk1

In [125]:
#data_atk1 = data_atk1[data_atk1['tr_ID']==2761]
#np.average(data_atk1['tr_vx'].values)

In [124]:
#b = data_atk1.loc[(data_atk1['tr_ID']==2761)]
#figure_2 = movement_plausibility(b)


In [126]:
# to add figure 2 
c = add_figure_2(data_atk1)

2761


## Getting the feature

## Feature 2: Movement Plausibility Check 

In [87]:
n = 0
m = np.array([])

In [89]:
np.append(m,n)

array([0.])