# Feature Extraction

We use this script to extract the features from the raw dataset, then feed them into Neural Network. 
Notice that we take all data into consideration.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Importing the data
col_index = [1,2,3,4,6,7,9,10,12,13,15,16]
col_names = ['re_time','re_ID','re_x','re_y','tr_time','tr_ID','tr_x','tr_y','tr_vx','tr_vy','RSSI','Label']
data = pd.read_csv('dataset/attack1withlabels.csv', usecols = col_index, header = None, names = col_names)
data_atk1 = data.dropna(axis = 0, how = 'any')
# data_atk1 = data_atk1.head(2000)

In [3]:
# Extract the sender ID and receiver ID and get their information. 
recevier_ID = np.unique(np.array(data_atk1.iloc[:,1]))
sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))

## Analysis of Attack 1
For the attack 1, the position of receiver is always \[5560, 5820\], the parameter is constant. 

In [4]:
# we present an attack, notice that the location does not change, but the velocity is non-zeron
t = data_atk1.loc[(data_atk1['Label']==1)]
t_recevier_ID = np.unique(np.array(t.iloc[:,1]))
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['re_ID'] == t_recevier_ID[0]]
tt.loc[tt['tr_ID']==t_sender_ID[1]]

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label
382286,21601,10,3584.4,5821.0,21601,61,5560.0,5820.0,1.4375,-33.474,1.61e-08,1
382303,21602,10,3584.3,5852.5,21602,61,5560.0,5820.0,1.4179,-33.017,2.67e-09,1
382318,21603,10,3584.3,5883.9,21603,61,5560.0,5820.0,1.4051,-32.719,8.19e-09,1
382332,21604,10,3585.6,5915.2,21604,61,5560.0,5820.0,2.7163,-32.266,9.66e-09,1
382347,21605,10,3587.5,5946.5,21605,61,5560.0,5820.0,2.6963,-32.028,9.36e-09,1
382356,21606,10,3589.8,5977.7,21606,61,5560.0,5820.0,2.6773,-31.803,4.39e-09,1
382367,21607,10,3592.1,6008.8,21607,61,5560.0,5820.0,2.6528,-31.511,2.04e-09,1
382377,21608,10,3595.4,6039.9,21608,61,5560.0,5820.0,2.6357,-31.309,1.44e-09,1


## Feature 1: Location Plausibility Check 
Feature 1 is used to indicate the realibility of the location of sender. Intuitionly, the location should be consistent with the motion. We consider the previous location and its velocity, predict the current location, compare the real location with the predicted location, then obtain the score of location plausibility. 

The scoure is in the range  [0, 4], such that for x and y direction range in [0,2] individually. If the real location is within the range in 95% confidence of predicted location, we score it as 0, from 95-99%, we score it as 1, when the range is out of 99%, we score it as 2. 

That is, with lower score, 

In [5]:
def location_plausibility(receiver_of_sender):
    #x_95 = [-5.6983, 5.2265]
    #x_99 = [-7.1795, 7.7077]
    #y_95 = [-8.1203, 8.0501]
    #y_99 = [-12.1629, 12.0927]
    
    x_95 = [-10, 10]
    x_99 = [-18, 18]
    y_95 = [-10, 10]
    y_99 = [-18, 18]
    
    score = []
    length = receiver_of_sender.shape[0]
    # for the start of the series, we think it is two. 
    score.append(2) 
    if length <=1:
        return score
    for k in range(length-1):
            time_interval = (receiver_of_sender.iloc[k+1]['re_time'] - receiver_of_sender.iloc[k]['re_time'])

            x_pre_95_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [0] * time_interval * 0.1)
            x_pre_95_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_95 [1] * time_interval * 0.1)
            x_pre_99_low = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [0] * time_interval * 0.1)
            x_pre_99_up  = receiver_of_sender.iloc[k]['tr_x'] + time_interval * (receiver_of_sender.iloc[k]['tr_vx'] +  x_99 [1] * time_interval * 0.1)

            y_pre_95_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [0] * time_interval * 0.1)
            y_pre_95_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1] * time_interval * 0.1)
            y_pre_99_low = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_99 [0] * time_interval * 0.1)
            y_pre_99_up  = receiver_of_sender.iloc[k]['tr_y'] + time_interval * (receiver_of_sender.iloc[k]['tr_vy'] +  y_95 [1] * time_interval * 0.1)

            t_x = 0
            t_y = 0
            
            #print(receiver_of_sender.iloc[k+1]['tr_x'])
            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_95_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_95_up:
                t_x = 1

            if receiver_of_sender.iloc[k+1]['tr_x']<=x_pre_99_low or receiver_of_sender.iloc[k+1]['tr_x'] >= x_pre_99_up:
                t_x = 2

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_95_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_95_up:
                t_y = 1

            if receiver_of_sender.iloc[k+1]['tr_y']<=y_pre_99_low or receiver_of_sender.iloc[k+1]['tr_y'] >= y_pre_99_up:
                t_y = 2  
                
            score.append(t_x+t_y)
    return score

## Feature 2: Movement plausibility check


In [6]:
def movement_plausibility(receiver_of_sender):
    score = []
    flag  = 0
    length = receiver_of_sender.shape[0]
    
    if length <=1:
        score.append(np.random.randint(2))
        return score
    
    x_placement = receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x']
    y_placement = receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y']

    average_velocity_x = np.average(receiver_of_sender['tr_vx'].values)
    average_velocity_y = np.average(receiver_of_sender['tr_vy'].values)
    if(x_placement==0 and y_placement==0):
        if (average_velocity_x!=0 or average_velocity_y!=0):
            flag = 1

    for i in range(length):
        score.append(flag)
    return score

In [7]:
def quantititative_information(receiver_of_sender):
    feature3 = []
    feature4 = []
    feature5 = []
    feature6 = []
    
    length = receiver_of_sender.shape[0]
    
    if length == 1:
        feature3 = [0]
        feature4 = [0]
        feature5 = [0]
        feature6 = [0]
        return feature3, feature4, feature5, feature6
    
    time_interval = receiver_of_sender.iloc[-1]['re_time'] - receiver_of_sender.iloc[0]['re_time']
    v_bar_dist_x = (receiver_of_sender.iloc[-1]['tr_x'] - receiver_of_sender.iloc[0]['tr_x'])/(time_interval)
    v_bar_dist_y = (receiver_of_sender.iloc[-1]['tr_y'] - receiver_of_sender.iloc[0]['tr_y'])/(time_interval)
    
    # Calculate the v_velocity
    v_bar_velocity_all_x = 0
    v_bar_velocity_all_y = 0
    for i in range(length-1):
        delta_t = receiver_of_sender.iloc[i+1]['re_time'] - receiver_of_sender.iloc[i]['re_time']
        v_bar_velocity_all_x = v_bar_velocity_all_x + receiver_of_sender.iloc[i]['tr_vx'] * delta_t
        v_bar_velocity_all_y = v_bar_velocity_all_y + receiver_of_sender.iloc[i]['tr_vy'] * delta_t
    v_bar_velo_x = v_bar_velocity_all_x/time_interval
    v_bar_velo_y = v_bar_velocity_all_y/time_interval
    
    v_measure_x = np.abs(v_bar_dist_x - v_bar_velo_x)
    v_measure_y = np.abs(v_bar_dist_y - v_bar_velo_y)
    v_mag = np.linalg.norm([v_measure_x, v_measure_y])

    v_total = np.linalg.norm([v_bar_dist_x*time_interval, v_bar_dist_y*time_interval]) - np.linalg.norm([v_bar_velocity_all_x, v_bar_velocity_all_y])
    
    
    for k in range(length):
        feature3.append(v_measure_x) 
        feature4.append(v_measure_y)
        feature5.append(v_mag)
        feature6.append(v_total)
    return feature3, feature4, feature5, feature6


In [8]:
t = data_atk1 #.loc[(data_atk1['Label']==1)]
t_sender_ID = np.unique(np.array(t.iloc[:,5]))
tt = t.loc[t['tr_ID'] == 73]
t_recevier_ID = np.unique(np.array(tt.iloc[:,1]))
ttt = tt.loc[tt['re_ID']==2050]
a,b,c,d = quantititative_information(ttt)
print(a,b,c,d)
ttt.shape[0]

[0] [0] [0] [0]


1

In [9]:
def distance_check(receiver_of_sender, threshold):
    length = receiver_of_sender.shape[0]
    distance_score = []
    for i in range(length):
        distance_score.append(0)
        x = receiver_of_sender.iloc[i]['tr_x'] - receiver_of_sender.iloc[i]['re_x']
        y = receiver_of_sender.iloc[i]['tr_y'] - receiver_of_sender.iloc[i]['re_y']
        distance = np.linalg.norm([x,y])
        if distance >= 800:
            distance_score[i] = 1
    return distance_score

In [10]:
def add_feature(data_atk1):
    # for each sender, 
    sender_ID = np.unique(np.array(data_atk1.iloc[:,5]))
    number_id_tr_s = len(sender_ID)

    for i in range(number_id_tr_s):
        this = data_atk1.loc[(data_atk1['tr_ID'] == sender_ID[i])]
        this_recevier_ID = np.unique(np.array(this.iloc[:,1]))
        number_id_re_s = len(this_recevier_ID)
        for j in range(number_id_re_s):
            b = this.loc[this['re_ID'] == this_recevier_ID[j]]
            
            feature_1 = location_plausibility(b)
            feature_2 = movement_plausibility(b)
            feature_3,feature_4,feature_5,feature_6 = quantititative_information(b)
            feature_7 = distance_check(b, 800)
            
            b['feature_1'] = pd.Series(feature_1, index=b.index)
            b['feature_2'] = pd.Series(feature_2, index=b.index)
            b['feature_3'] = pd.Series(feature_3, index=b.index)
            b['feature_4'] = pd.Series(feature_4, index=b.index)
            b['feature_5'] = pd.Series(feature_5, index=b.index)
            b['feature_6'] = pd.Series(feature_6, index=b.index)
            b['feature_7'] = pd.Series(feature_7, index=b.index)
            
            if i==0 and j==0:
                data_atk1_new = b
            else:
                data_atk1_new = pd.concat([data_atk1_new, b])
    return data_atk1_new

In [11]:
data_atk1_new = add_feature(data_atk1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

In [12]:
#export dataframe to a csv file
import os

outname = './attack1withlabels_7feature_guodong.csv'

outdir = './dataset'
if not os.path.exists(outdir):
    os.mkdir(outdir)
fullname = os.path.join(outdir, outname)    
data_atk1_new.to_csv("./dataset/attack1withlabels_7feature_guodong.csv", encoding='utf-8', index=False)
data_atk1_new

Unnamed: 0,re_time,re_ID,re_x,re_y,tr_time,tr_ID,tr_x,tr_y,tr_vx,tr_vy,RSSI,Label,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7
125033,21600,16,3587.6,5846.8,21600,7,3584.4,5805.2,-0.030508,31.477,4.620000e-08,0,2,0,0.299677,0.010714,0.299869,0.004903,0
125049,21601,16,3587.6,5877.7,21601,7,3584.4,5836.7,-0.030526,31.496,2.950000e-07,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125066,21602,16,3588.7,5908.6,21602,7,3584.3,5868.2,-0.030407,31.373,9.760000e-08,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125079,21603,16,3590.3,5939.5,21603,7,3585.0,5899.5,1.337800,31.344,1.410000e-07,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125092,21604,16,3592.6,5970.4,21604,7,3586.3,5930.8,1.417700,31.207,4.270000e-07,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125103,21605,16,3594.9,6001.3,21605,7,3588.6,5962.1,2.320800,31.212,3.270000e-07,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125113,21606,16,3597.9,6032.1,21606,7,3591.0,5993.2,2.317400,31.166,2.960000e-09,0,0,0,0.299677,0.010714,0.299869,0.004903,0
125122,21607,16,3601.2,6062.9,21607,7,3593.8,6024.4,3.280300,31.043,1.770000e-07,0,0,0,0.299677,0.010714,0.299869,0.004903,0
392124,21601,22,3621.7,5340.2,21601,7,3584.4,5836.7,-0.030526,31.496,3.300000e-09,0,2,0,0.631183,0.077000,0.635862,-0.326928,0
392187,21604,22,3611.4,5429.3,21604,7,3586.3,5930.8,1.417700,31.207,4.240000e-09,0,0,0,0.631183,0.077000,0.635862,-0.326928,0
