## Librairies

In [None]:
%matplotlib inline

In [None]:
from importlib import reload
from cycler import cycler
from scipy.io  import loadmat
from sklearn   import mixture
from sklearn   import neighbors
from V2XMD     import data_evaluation
from V2XMD     import data_processing
from V2XMD     import data_visualisation
from V2XMD     import plausibility_checks
import pycm
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pandas_profiling as pdp

import re
import seaborn as sns
import sklearn
import sklearn.metrics
import sys
import time

In [None]:
sns.set_style('dark')

In [None]:
columns_names=[ "type",
                "reception_time",
                "receiver_id",
                "receiver_x_position",
                "receiver_y_position",
                "receiver_z_position",
                "transmission_time" ,
                "transmitter_id",
                "bsm_id",
                "transmitter_x_position",
                "transmitter_y_position",
                "transmitter_z_position",
                "transmitter_x_velocity",
                "transmitter_y_velocity",
                "transmitter_z_velocity",
                "rssi",
                "label"]
dfs=[]

In [None]:
import V2XMD

In [None]:
dfs=[]
for i in [1,2,4,8,16]:
    mat = loadmat('./data/WiSec_DataModifiedVeremi_Dataset-master/attack'+str(i)+'withlabels.mat')['attack'+str(i)+'withlabels']
    df  = pd.DataFrame(mat,columns=columns_names)
    df  = df.assign(dataset=V2XMD.data_visualisation.attack_types[i])
    dfs.append( df)
df = pd.concat(dfs)
df.reset_index(inplace=True,drop=True)
df.dropna(inplace=True)

In [None]:
df.label=df.label.apply(lambda x: V2XMD.data_visualisation.attack_types[x])

In [None]:
# for sample test (Whole data takes a lot of time)
# df=df.sample(frac=0.01)

# Summary statistics

In [None]:
with pd.option_context('display.float_format', '{:,.2f}'.format):
    display( df.describe().T)

# histograms

In [None]:
rdf=df[[
 
 'receiver_x_position',
 'transmitter_x_position',
 'receiver_y_position',
 'transmitter_y_position',
 'transmission_time',
 "reception_time",
 'transmitter_x_velocity',
 'transmitter_y_velocity',
 'rssi',
]]

In [None]:
train, test=sklearn.model_selection.train_test_split(df,test_size=0.1)

In [None]:
bsm_ids=test.bsm_id.value_counts().iloc[:10].index

In [None]:
bsm_ids_df=df[df.bsm_id.isin(bsm_ids)]

In [None]:
label_encoder = sklearn.preprocessing.LabelEncoder().fit(bsm_ids_df.bsm_id)

In [None]:
plt.figure()
plt.scatter(label_encoder.transform(bsm_ids_df.bsm_id),bsm_ids_df.receiver_id,c=label_encoder.transform(bsm_ids_df.bsm_id),cmap=plt.cm.tab10,s=150)
plt.scatter(label_encoder.transform(train[train.bsm_id.isin(bsm_ids)].bsm_id)-0.05,train[train.bsm_id.isin(bsm_ids)].receiver_id,c='black',s=10,label='Train')
plt.scatter(label_encoder.transform(test[test.bsm_id.isin(bsm_ids)].bsm_id)+0.05,test[test.bsm_id.isin(bsm_ids)].receiver_id,c='white',s=10,label='Validtion')
plt.xlabel('Message ID')
plt.ylabel('Reciever ID')
plt.legend(fontsize='x-large',facecolor='#9b0000', framealpha=1,frameon = 1)
plt.savefig("out/train_split.png",dpi=300,bbox_inches='tight')

In [None]:
ax=plt.subplots(3,2,figsize=(8,7.5))
caxes=np.concatenate(ax[1]).tolist()
for i,cname in enumerate([
 
 'receiver_x_position',
 'transmitter_x_position',

 'receiver_y_position',
 'transmitter_y_position',

 'transmitter_x_velocity',

 'transmitter_y_velocity',

]):    
    rdf.hist(bins=100,xrot=45,column=cname,ax=caxes[i])
    caxes[i].set_ylabel('N° occurrence')
    caxes[i].set_xlabel('feature value')
    sns.kdeplot(rdf[cname],color='red',label='kde',ax=caxes[i].twinx())
    plt.ylabel('kde value')
plt.tight_layout()
plt.savefig("out/hist_features.png",dpi=300,bbox_inches='tight')

# Box plots

In [None]:
plt.figure(figsize=(8,6))
plt.subplot(141)
df[[
 
 'receiver_x_position',
 'transmitter_x_position',

]].boxplot()
plt.ylabel('Value')
plt.xticks(rotation=90)

plt.tight_layout()
plt.subplot(142)
df[[
 
 'receiver_y_position',
 'transmitter_y_position',
]].boxplot()
plt.xticks(rotation=90)

plt.tight_layout()

plt.subplot(143)

df[[
 'transmission_time',
 "reception_time",
]].boxplot()
plt.xticks(rotation=90)

plt.tight_layout()
plt.subplot(144)

df[[
 'transmitter_x_velocity',
 'transmitter_y_velocity',
]].boxplot()
plt.xticks(rotation=90)
plt.tight_layout()


plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig("out/boxplots_features.png",dpi=300,bbox_inches='tight')

## Message reduendency

In [None]:
df.groupby('bsm_id').size().hist(bins=20,figsize=(8,4))
plt.ylabel('Number of messages')
plt.xlabel("Number of broadcasts")
plt.twinx()
sns.kdeplot(df.groupby('bsm_id').size(),color='red',label='kde')
plt.legend()
plt.ylabel('Kde')
plt.savefig("out/messaging_rates.png",dpi=600,bbox_inches='tight');

# Machine learning application

In [None]:
from V2XMD.models import TrainModel

In [None]:
"""
This part of the notebook contains all expiriments. 
We variate the following parameters in heirarchical setup with this order.
    MBD type (Message based and Communication based)
    Split type (random and our split)
    feature engineering ( use or not of extracted features)
    One classe detection (a model for each class VS a model for all classes at the same time)
    
and the we plot results for train and test perfs
"""

### Message based detection

## Random split

#### No feature engineering
 


##### One classe detection at a time

In [None]:
rnd_nofeature_oneclass_model= TrainModel(df,split='random', multi_class=False,features=False)
rnd_nofeature_oneclass_model.train()

In [None]:
rnd_nofeature_oneclass_model.train_results

## train results

In [None]:
rnd_nofeature_oneclass_model.results_visualisation('train')

### test results

In [None]:
rnd_nofeature_oneclass_model.results_visualisation('test')

##### Multi class detection

In [None]:
rnd_nofeature_multiclass_model= TrainModel(df,split='random', multi_class=True,features=False)
rnd_nofeature_multiclass_model.train()

In [None]:
rnd_nofeature_multiclass_model.train_results

## train results

In [None]:
rnd_nofeature_multiclass_model.results_visualisation('train')

### test results

In [None]:
rnd_nofeature_multiclass_model.results_visualisation('test')

## Feature engineering


##### One classe detection at a time

In [None]:
rnd_feature_oneclass_model= TrainModel(df,split='random', multi_class=False,features=True)
rnd_feature_oneclass_model.train()

In [None]:
rnd_feature_oneclass_model.train_results

## train results

In [None]:
rnd_feature_oneclass_model.results_visualisation('train')

### test results

In [None]:
rnd_feature_oneclass_model.results_visualisation('test')

##### Multi class detection

In [None]:
rnd_feature_multiclass_model= TrainModel(df,split='random', multi_class=True,features=True)
rnd_feature_multiclass_model.train()

In [None]:
rnd_feature_multiclass_model.train_results

## train results

In [None]:
rnd_feature_multiclass_model.results_visualisation('train')

### test results

In [None]:
rnd_feature_multiclass_model.results_visualisation('test')

#### No feature engineering
 


##### One classe detection at a time

In [None]:
tmp_nofeature_oneclass_model= TrainModel(df,split='our', multi_class=False,features=False)
tmp_nofeature_oneclass_model.train()

In [None]:
tmp_nofeature_oneclass_model.train_results

## train results

In [None]:
tmp_nofeature_oneclass_model.results_visualisation('train')

### test results

In [None]:
tmp_nofeature_oneclass_model.results_visualisation('test')

##### Multi class detection

In [None]:
tmp_nofeature_multiclass_model= TrainModel(df,split='our', multi_class=True,features=False)
tmp_nofeature_multiclass_model.train()

In [None]:
tmp_nofeature_multiclass_model.train_results

## train results

In [None]:
tmp_nofeature_multiclass_model.results_visualisation('train')

### test results

In [None]:
tmp_nofeature_multiclass_model.results_visualisation('test')

## Feature engineering


##### One classe detection at a time

In [None]:
tmp_feature_oneclass_model= TrainModel(df,split='our', multi_class=False,features=True)
tmp_feature_oneclass_model.train()

In [None]:
tmp_feature_oneclass_model.train_results

## train results

In [None]:
tmp_feature_oneclass_model.results_visualisation('train')

### test results

In [None]:
tmp_feature_oneclass_model.results_visualisation('test')

##### Multi class detection

In [None]:
tmp_feature_multiclass_model= TrainModel(df,split='our', multi_class=True,features=True)
tmp_feature_multiclass_model.train()

In [None]:
tmp_feature_multiclass_model.train_results

## train results

In [None]:
tmp_feature_multiclass_model.results_visualisation('train')

### test results

In [None]:
tmp_feature_multiclass_model.results_visualisation('test')

# Comparison 

In [None]:
all_results=pd.concat([
        
    rnd_nofeature_oneclass_model.train_results,
    rnd_nofeature_oneclass_model.test_results,
    rnd_nofeature_multiclass_model.train_results,
    rnd_nofeature_multiclass_model.test_results,
    rnd_feature_oneclass_model.train_results,
    rnd_feature_oneclass_model.test_results,
    rnd_feature_multiclass_model.train_results,
    rnd_feature_multiclass_model.test_results,
    tmp_nofeature_oneclass_model.train_results,
    tmp_nofeature_oneclass_model.test_results,
    tmp_nofeature_multiclass_model.train_results,
    tmp_nofeature_multiclass_model.test_results,
    tmp_feature_oneclass_model.train_results,
    tmp_feature_oneclass_model.test_results,
    tmp_feature_multiclass_model.train_results,
    tmp_feature_multiclass_model.test_results,
])

In [None]:
all_results=all_results.sort_index()

In [None]:
n_rows=2
n_cols=2
id_sub= iter(np.arange(1,20))
plt.subplots(n_rows,n_cols,figsize=(8,8))
names_y_label = iter(['with features','without features'])
for feature in ['True','False']:
    for multiclass in ['False']:
        for split  in ['train','test']:
            print(split,feature,multiclass)
            id_c=next(id_sub)
            plt.subplot(n_rows,n_cols,id_c)
            matrix=(all_results.loc[(slice(None),'our',split,feature,multiclass),(slice(None),'f1-score')].reset_index(['split',	'Set'	,'features',	'multiclass'],drop=True)
                   >all_results.loc[(slice(None),'random',split,feature,multiclass),(slice(None),'f1-score')].reset_index(['split',	'Set'	,'features',	'multiclass'],drop=True))
            plt.imshow(matrix,cmap=plt.cm.RdYlGn)
            for x in range(5):
                plt.axvline(x+0.5,c='grey')
                plt.axhline(x+0.5,c='grey')
                
            if id_c > n_cols*(n_rows-1):
                plt.xticks(ticks=range(6),labels=list(matrix.droplevel(1,axis=1).columns),rotation=90)
                plt.xlabel('Algorithm')
            else : plt.xticks([])
            if id_c%2 == 1 : 
                plt.yticks(ticks=range(6),labels=list(matrix.index),rotation=0)
                plt.title('Validation')
                plt.ylabel('Attack ('+next(names_y_label)+')' )
            else : 
                plt.yticks(ticks=[])
                plt.title('Test')
#                 plt.twinx()
#                 plt.yticks(ticks=[])

#                 plt.ylabel(next(names_y_label))
#             plt.colorbar()
plt.tight_layout()
plt.savefig("out/heatmap_results.png",dpi=600,bbox_inches='tight');

In [None]:
ax=all_results.loc[(slice(None),'random','test','True','False'),(slice(None),'f1-score')].droplevel(['multiclass','features','Set']).droplevel(1,axis=1).unstack('split').plot(rot=90,marker='*',style='--',cmap= plt.cm.tab10,figsize=(9,6))
all_results.loc[(slice(None),'our','test','True','False'),(slice(None),'f1-score')].droplevel(['multiclass','features','Set']).droplevel(1,axis=1).unstack('split').plot(rot=90,marker='>',ax=ax,cmap= plt.cm.tab10)
plt.grid()
plt.ylabel('F1-Score')
plt.title('ml_methods_comparison')

plt.savefig("out/ml_methods_comparison.png",dpi=600,bbox_inches='tight');


### results table

In [None]:
all_results.loc[(slice(None),'our','train',slice(None),'False'),(slice(None),'f1-score')].reset_index()

In [None]:
plt.figure(figsize=(5,3.3))
all_results.loc[(slice(None),'our','train',slice(None),'False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='Validation')
all_results.loc[(slice(None),'our','test',slice(None),'False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='Test')
plt.ylim(0.75,1)
plt.legend()
nms=['* ' if i%2==1 else '' for i in range(12) ]+all_results.loc[(slice(None),'our','train',slice(None),'False'),(slice(None),'f1-score')].reset_index()[['Attack']].Attack.values+[' *' if i%2==1 else '' for i in range(12) ]
plt.xticks(range(12),nms)
plt.xlabel('Setup')
plt.ylabel('f1-score')
plt.grid()
plt.title('our_split_results')

plt.savefig("out/our_split_results.png",dpi=600,bbox_inches='tight');

In [None]:
plt.figure(figsize=(5,3.3))
all_results.loc[(slice(None),'random','train',slice(None),'False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='Validation')
all_results.loc[(slice(None),'random','test',slice(None),'False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='Test')
plt.ylim(0.75,1)
plt.legend()
nms=['* ' if i%2==1 else '' for i in range(12) ]+all_results.loc[(slice(None),'random','train',slice(None),'False'),(slice(None),'f1-score')].reset_index()[['Attack']].Attack.values+[' *' if i%2==1 else '' for i in range(12) ]
plt.xticks(range(12),nms)
plt.xlabel('Setup')
plt.grid()
plt.ylabel('f1-score')
plt.title('Random_split_results')

plt.savefig("out/Random_split_results.png",dpi=600,bbox_inches='tight');

In [None]:
plt.figure(figsize=(5,3.3))
all_results.loc[(slice(None),'our','test','False','False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='without features')
all_results.loc[(slice(None),'our','test','True','False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='with features')
plt.ylim(0.75,1)
plt.legend()
nms=all_results.loc[(slice(None),'our','test','True','False'),(slice(None),'f1-score')].reset_index()[['Attack']].Attack.values
plt.xticks(range(6),nms)
plt.xlabel('Attack')
plt.ylabel('f1-score')
plt.grid()
plt.title('our_split_features_results')

plt.savefig("out/our_split_features_results.png",dpi=600,bbox_inches='tight');

In [None]:
plt.figure(figsize=(5,3.3))
all_results.loc[(slice(None),'random','test','False','False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='without features')
all_results.loc[(slice(None),'random','test','True','False'),(slice(None),'f1-score')].max(axis=1).plot(rot=90,use_index=False,label='with features')
plt.ylim(0.75,1)
nms=all_results.loc[(slice(None),'random','test','True','False'),(slice(None),'f1-score')].reset_index()[['Attack']].Attack.values
plt.xticks(range(6),nms)
plt.legend()
plt.xlabel('Attack')
plt.ylabel('f1-score')
plt.grid()
plt.title('Random_split_features_results')
plt.savefig("out/Random_split_features_results.png",dpi=600,bbox_inches='tight');