# Anamoly Detection

In [1]:
import numpy as np
import pandas as pd
import sklearn
import scipy
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

In [2]:
data = pd.read_csv('New_way_data _base.csv',sep=',')
data.head()

Unnamed: 0,BAROMETRIC_PRESSURE,ENGINE_COOLANT_TEMP,ENGINE_RPM,INTAKE_MANIFOLD_PRESSURE,MAF,SPEED,THROTTLE_POS,class
0,101.0,89.0,1173.0,35.0,3.47,9.0,0.184,0
1,95.317414,104.70254,4793.486993,69.295708,51.247763,56.748608,0.700599,1
2,96.340461,104.444472,3649.884731,58.358234,68.126434,11.383728,0.637216,1
3,100.0,84.0,1582.0,82.0,26.92,44.0,0.369,0
4,99.0,85.0,1657.0,37.0,9.29,36.0,0.278,0


In [3]:
data.describe()

Unnamed: 0,BAROMETRIC_PRESSURE,ENGINE_COOLANT_TEMP,ENGINE_RPM,INTAKE_MANIFOLD_PRESSURE,MAF,SPEED,THROTTLE_POS,class
count,11999.0,11999.0,11999.0,11999.0,11999.0,11999.0,11999.0,11999.0
mean,98.674875,92.866203,2602.941311,42.961829,29.152501,36.107847,0.453011,0.366781
std,1.805359,10.045511,1551.962101,17.183507,24.736596,19.872697,0.250767,0.481946
min,95.001729,59.0,0.0,17.0,0.0,0.0,0.173,0.0
25%,97.052433,83.0,1388.0,30.874198,5.29,21.0,0.231,0.0
50%,100.0,89.0,1973.0,39.306032,17.56,39.0,0.325,0.0
75%,100.0,103.344989,4139.780165,55.09494,56.286875,52.0,0.705217,1.0
max,101.0,109.989668,5499.408407,101.0,69.99416,98.0,0.933288,1.0


In [4]:
data.shape

(11999, 8)

In [5]:
data.isnull().values.any()

False

In [6]:
## Get the anomaly and the normal dataset 

anamoly = data[data['class']==1]

normal = data[data['class']==0]

In [7]:
data1=data

In [8]:
anomaly = data1[data1['class']==1]

valid = data1[data1['class']==0]

outlier_fraction = len(anomaly)/float(len(valid))

In [10]:
columns = data1.columns.tolist()
# Filtering the columns to remove data we do not want 
columns = [c for c in columns if c not in ["class"] ]
target = "class"

# Define a random state 
state = np.random.RandomState(42)

X = data1[columns]
Y = data1[target]

print(columns)
columns[0]

['BAROMETRIC_PRESSURE', 'ENGINE_COOLANT_TEMP', 'ENGINE_RPM', 'INTAKE_MANIFOLD_PRESSURE', 'MAF', 'SPEED', 'THROTTLE_POS']


'BAROMETRIC_PRESSURE'

## Model Prediction


### Isolation Forest Algorithm :

### Local Outlier Factor(LOF) Algorithm


In [11]:
##Define the outlier detection methods

classifiers = {
    "Isolation Forest":IsolationForest(n_estimators=100, max_samples=len(X), 
                                       random_state=state,contamination=outlier_fraction),
    "Local Outlier Factor":LocalOutlierFactor(n_neighbors=20, algorithm='auto', 
                                              leaf_size=30, metric='minkowski',
                                              p=2, metric_params=None)  
}

In [12]:
y_final_loc = []
y_final_if = []
    
for i, (clf_name,clf) in enumerate(classifiers.items()):
    
    #Fit the data and tag outliers
    if clf_name == "Local Outlier Factor":
        y_pred = clf.fit_predict(X)
        scores_prediction = clf.negative_outlier_factor_
    else:    
        clf.fit(X)
        scores_prediction = clf.decision_function(X)
        y_pred = clf.predict(X)
    #Reshaping the prediction values
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1

    if clf_name == "Local Outlier Factor":
        y_final_loc.append(y_pred)
    else:
        y_final_if.append(y_pred)
     
    # Run Classification Metrics
    print(clf_name)
    print("Accuracy Score :")
    print(accuracy_score(Y,y_pred))
    print("Classification report :")
    print(classification_report(Y,y_pred))
    print("\n")

    
y_loc = np.array(y_final_loc)
y_if =  np.array(y_final_if)

Isolation Forest
Accuracy Score :
0.7875656304692058
Classification report :
              precision    recall  f1-score   support

           0       1.00      0.66      0.80      7598
           1       0.63      1.00      0.78      4401

    accuracy                           0.79     11999
   macro avg       0.82      0.83      0.79     11999
weighted avg       0.87      0.79      0.79     11999



Local Outlier Factor
Accuracy Score :
0.6229685807150596
Classification report :
              precision    recall  f1-score   support

           0       0.63      0.98      0.77      7598
           1       0.00      0.00      0.00      4401

    accuracy                           0.62     11999
   macro avg       0.31      0.49      0.38     11999
weighted avg       0.40      0.62      0.49     11999





In [13]:
y_loc

array([[0, 0, 0, ..., 0, 0, 0]])

In [14]:
print(y_loc.shape)

y_loc_t = np.transpose(y_loc)

(1, 11999)


In [15]:
y_loc.shape[1]

11999

In [16]:
for i in range(0,y_loc.shape[1]):
    if y_loc_t[i] == 1:
        print(i)

160
246
350
518
617
646
731
879
901
1037
1272
1352
1543
1700
1704
1905
2114
2154
2448
2451
2462
2772
2825
2844
2855
2956
3014
3109
3219
3266
3413
3533
3543
3590
3724
3767
3809
3831
3864
3916
3988
4144
4314
4385
4467
4483
4564
4737
4868
4887
4929
4939
5060
5105
5180
5192
5426
5580
5649
5732
5780
5810
5878
6053
6115
6118
6159
6247
6475
6591
7035
7042
7203
7213
7392
7444
7876
7898
7915
8212
8224
8248
8389
8422
8496
8638
8639
8675
8799
8802
9028
9090
9112
9211
9214
9241
9317
9339
9385
9421
9604
9623
9626
9769
10031
10141
10166
10259
10312
10390
10545
10583
10701
10714
10757
11011
11195
11318
11384
11404
11510
11813
11952


In [17]:
y_if.shape
y_if_t = np.transpose(y_if)

In [18]:
for i in range(0,y_loc.shape[1]):
    if y_if_t[i] == 1:
        print(i)

0
1
2
8
10
13
14
15
16
17
18
19
22
25
27
29
30
32
33
34
37
39
42
43
44
45
46
47
48
49
50
51
53
55
57
59
62
65
66
67
71
72
73
75
77
80
81
85
86
87
88
96
97
99
100
102
103
105
106
111
113
114
115
118
119
120
121
122
123
124
125
126
128
130
131
132
133
135
136
139
140
141
142
144
146
148
151
152
153
155
157
159
160
161
162
163
164
167
168
169
171
172
173
174
176
179
181
186
187
190
193
199
201
203
206
207
208
209
211
212
214
216
219
221
222
224
225
229
230
231
233
236
237
239
240
242
243
246
249
250
251
256
257
260
261
264
265
266
268
272
274
277
278
280
284
286
289
291
293
294
295
297
298
300
301
302
304
306
307
308
310
311
313
316
317
318
320
321
325
327
328
331
333
334
335
336
337
345
347
348
349
351
352
353
354
355
356
358
359
361
365
366
367
369
370
374
375
378
379
380
381
382
384
387
388
391
392
394
396
397
399
402
412
415
418
419
420
421
422
423
424
426
431
432
434
435
437
441
442
443
444
447
448
450
451
452
454
455
456
457
458
460
461
466
467
468
469
471
473
475
476
478
479
480
48

3474
3475
3477
3479
3481
3488
3489
3490
3491
3492
3496
3501
3504
3505
3506
3507
3510
3512
3513
3514
3516
3519
3520
3521
3524
3526
3527
3530
3532
3535
3537
3538
3539
3544
3546
3548
3550
3554
3555
3557
3558
3560
3562
3564
3565
3567
3568
3569
3572
3573
3575
3576
3577
3579
3580
3584
3585
3586
3587
3588
3590
3591
3593
3594
3596
3597
3598
3600
3601
3603
3604
3605
3609
3612
3613
3614
3618
3619
3621
3622
3623
3624
3625
3627
3630
3631
3634
3635
3639
3640
3647
3649
3653
3655
3657
3658
3659
3662
3663
3664
3665
3666
3667
3668
3672
3673
3674
3675
3680
3682
3684
3686
3687
3691
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3706
3708
3710
3712
3713
3715
3716
3717
3718
3720
3722
3723
3724
3725
3726
3727
3730
3731
3733
3734
3735
3740
3745
3747
3748
3751
3752
3753
3754
3755
3756
3758
3759
3762
3764
3765
3766
3767
3769
3770
3771
3773
3778
3784
3787
3788
3790
3791
3792
3794
3796
3801
3802
3804
3805
3807
3809
3811
3813
3816
3817
3821
3822
3823
3824
3827
3829
3831
3833
3834
3835
3837
3838
3842


6411
6412
6413
6415
6417
6420
6421
6422
6423
6424
6427
6428
6432
6434
6436
6437
6438
6439
6441
6445
6447
6448
6450
6451
6452
6453
6454
6455
6458
6459
6460
6461
6463
6464
6465
6467
6468
6470
6471
6472
6474
6475
6476
6477
6478
6479
6480
6481
6483
6484
6485
6486
6487
6488
6489
6490
6493
6498
6500
6503
6505
6506
6510
6511
6512
6513
6514
6515
6518
6522
6523
6524
6526
6527
6528
6533
6534
6536
6538
6539
6540
6546
6550
6552
6553
6555
6556
6559
6562
6564
6565
6570
6572
6574
6575
6576
6577
6579
6580
6585
6587
6589
6590
6591
6593
6594
6595
6597
6599
6601
6602
6605
6606
6607
6608
6610
6611
6615
6617
6620
6621
6623
6625
6627
6631
6632
6633
6634
6635
6637
6638
6639
6640
6641
6644
6646
6647
6648
6649
6650
6652
6653
6654
6655
6656
6657
6660
6661
6662
6663
6666
6667
6668
6669
6671
6672
6673
6674
6676
6677
6680
6682
6683
6688
6689
6697
6700
6701
6704
6706
6707
6709
6710
6711
6712
6714
6716
6717
6718
6720
6722
6724
6727
6728
6729
6730
6731
6733
6734
6735
6736
6738
6739
6742
6743
6744
6745
6748
6751
6752


9394
9395
9398
9399
9400
9401
9404
9405
9407
9408
9410
9413
9414
9415
9418
9419
9421
9422
9423
9424
9425
9426
9427
9428
9429
9430
9431
9432
9437
9439
9442
9444
9446
9448
9452
9454
9455
9456
9457
9458
9459
9460
9464
9468
9470
9473
9474
9480
9481
9482
9483
9485
9486
9487
9488
9490
9493
9494
9496
9497
9498
9499
9501
9502
9503
9504
9505
9506
9507
9508
9514
9519
9520
9522
9523
9524
9525
9526
9527
9528
9531
9532
9534
9535
9539
9541
9543
9546
9548
9550
9551
9553
9556
9557
9558
9559
9564
9568
9569
9570
9572
9573
9574
9575
9578
9580
9581
9582
9583
9588
9590
9591
9592
9593
9594
9596
9597
9598
9601
9602
9603
9605
9607
9608
9609
9610
9611
9613
9618
9623
9624
9627
9629
9631
9634
9636
9641
9643
9644
9645
9646
9647
9648
9649
9650
9652
9653
9654
9655
9657
9658
9659
9662
9663
9664
9665
9666
9667
9668
9670
9671
9672
9673
9674
9677
9679
9680
9681
9682
9684
9687
9688
9690
9692
9696
9699
9700
9702
9703
9704
9705
9707
9714
9717
9719
9720
9721
9722
9723
9725
9726
9727
9728
9729
9731
9732
9733
9734
9735
9737
