/
ndpiReader.c
6158 lines (5044 loc) · 206 KB
/
ndpiReader.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* ndpiReader.c
*
* Copyright (C) 2011-24 - ntop.org
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* nDPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with nDPI. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "ndpi_config.h"
#ifdef __linux__
#include <sched.h>
#endif
#include "ndpi_api.h"
#include "../src/lib/third_party/include/uthash.h"
#include "../src/lib/third_party/include/ahocorasick.h"
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <float.h> /* FLT_EPSILON */
#ifdef WIN32
#include <winsock2.h> /* winsock.h is included automatically */
#include <windows.h>
#include <ws2tcpip.h>
#include <process.h>
#include <io.h>
#else
#include <unistd.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <sys/socket.h>
#include <sys/mman.h>
#endif
#include <string.h>
#include <stdarg.h>
#include <search.h>
#include <pcap.h>
#include <signal.h>
#include <time.h>
#include <pthread.h>
#include <assert.h>
#include <math.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifndef _MSC_BUILD
#include <libgen.h>
#endif
#include <errno.h>
#include "reader_util.h"
#define ntohl64(x) ( ( (uint64_t)(ntohl( (uint32_t)((x << 32) >> 32) )) << 32) | ntohl( ((uint32_t)(x >> 32)) ) )
#define htonl64(x) ntohl64(x)
#define HEURISTICS_CODE 1
/** Client parameters **/
static char *_pcap_file[MAX_NUM_READER_THREADS]; /**< Ingress pcap file/interfaces */
#ifndef USE_DPDK
static FILE *playlist_fp[MAX_NUM_READER_THREADS] = { NULL }; /**< Ingress playlist */
#endif
static FILE *results_file = NULL;
static char *results_path = NULL;
static char * bpfFilter = NULL; /**< bpf filter */
static char *_protoFilePath = NULL; /**< Protocol file path */
static char *_customCategoryFilePath= NULL; /**< Custom categories file path */
static char *_maliciousJA3Path = NULL; /**< Malicious JA3 signatures */
static char *_maliciousSHA1Path = NULL; /**< Malicious SSL certificate SHA1 fingerprints */
static char *_riskyDomainFilePath = NULL; /**< Risky domain files */
static char *_categoriesDirPath = NULL; /**< Directory containing domain files */
static u_int8_t live_capture = 0;
static u_int8_t undetected_flows_deleted = 0;
static FILE *csv_fp = NULL; /**< for CSV export */
static FILE *serialization_fp = NULL; /**< for TLV,CSV,JSON export */
static ndpi_serialization_format serialization_format = ndpi_serialization_format_unknown;
static char* domain_to_check = NULL;
static char* ip_port_to_check = NULL;
static u_int8_t ignore_vlanid = 0;
/** User preferences **/
u_int8_t enable_realtime_output = 0, enable_protocol_guess = NDPI_GIVEUP_GUESS_BY_PORT | NDPI_GIVEUP_GUESS_BY_IP, enable_payload_analyzer = 0, num_bin_clusters = 0, extcap_exit = 0;
u_int8_t verbose = 0, enable_flow_stats = 0;
struct cfg {
char *proto;
char *param;
char *value;
};
#define MAX_NUM_CFGS 32
static struct cfg cfgs[MAX_NUM_CFGS];
static int num_cfgs = 0;
int reader_log_level = 0;
char *_disabled_protocols = NULL;
static u_int8_t stats_flag = 0;
u_int8_t human_readeable_string_len = 5;
u_int8_t max_num_udp_dissected_pkts = 24 /* 8 is enough for most protocols, Signal and SnapchatCall require more */, max_num_tcp_dissected_pkts = 80 /* due to telnet */;
static u_int32_t pcap_analysis_duration = (u_int32_t)-1;
static u_int32_t risk_stats[NDPI_MAX_RISK] = { 0 }, risks_found = 0, flows_with_risks = 0;
static struct ndpi_stats cumulative_stats;
static u_int16_t decode_tunnels = 0;
static u_int16_t num_loops = 1;
static u_int8_t shutdown_app = 0, quiet_mode = 0;
static u_int8_t num_threads = 1;
static struct timeval startup_time, begin, end;
#ifdef __linux__
static int core_affinity[MAX_NUM_READER_THREADS];
#endif
static struct timeval pcap_start = { 0, 0}, pcap_end = { 0, 0 };
#ifndef USE_DPDK
static struct bpf_program bpf_code;
#endif
static struct bpf_program *bpf_cfilter = NULL;
/** Detection parameters **/
static time_t capture_for = 0;
static time_t capture_until = 0;
static u_int32_t num_flows;
extern u_int8_t enable_doh_dot_detection;
extern u_int32_t max_num_packets_per_flow, max_packet_payload_dissection, max_num_reported_top_payloads;
extern u_int16_t min_pattern_len, max_pattern_len;
u_int8_t dump_internal_stats;
static struct ndpi_bin malloc_bins;
static int enable_malloc_bins = 0;
static int max_malloc_bins = 14;
int malloc_size_stats = 0;
struct flow_info {
struct ndpi_flow_info *flow;
u_int16_t thread_id;
};
static struct flow_info *all_flows;
struct info_pair {
u_int32_t addr;
u_int8_t version; /* IP version */
char proto[16]; /*app level protocol*/
int count;
};
typedef struct node_a {
u_int32_t addr;
u_int8_t version; /* IP version */
char proto[16]; /*app level protocol*/
int count;
struct node_a *left, *right;
}addr_node;
// struct to add more statitcs in function printFlowStats
typedef struct hash_stats{
char* domain_name;
int occurency; /* how many time domain name occury in the flow */
UT_hash_handle hh; /* hashtable to collect the stats */
}hash_stats;
struct port_stats {
u_int32_t port; /* we'll use this field as the key */
u_int32_t num_pkts, num_bytes;
u_int32_t num_flows;
u_int32_t num_addr; /*number of distinct IP addresses */
u_int32_t cumulative_addr; /*cumulative some of IP addresses */
addr_node *addr_tree; /* tree of distinct IP addresses */
struct info_pair top_ip_addrs[MAX_NUM_IP_ADDRESS];
u_int8_t hasTopHost; /* as boolean flag */
u_int32_t top_host; /* host that is contributed to > 95% of traffic */
u_int8_t version; /* top host's ip version */
char proto[16]; /* application level protocol of top host */
UT_hash_handle hh; /* makes this structure hashable */
};
struct port_stats *srcStats = NULL, *dstStats = NULL;
// struct to hold count of flows received by destination ports
struct port_flow_info {
u_int32_t port; /* key */
u_int32_t num_flows;
UT_hash_handle hh;
};
// struct to hold single packet tcp flows sent by source ip address
struct single_flow_info {
u_int32_t saddr; /* key */
u_int8_t version; /* IP version */
struct port_flow_info *ports;
u_int32_t tot_flows;
UT_hash_handle hh;
};
struct single_flow_info *scannerHosts = NULL;
// struct to hold top receiver hosts
struct receiver {
u_int32_t addr; /* key */
u_int8_t version; /* IP version */
u_int32_t num_pkts;
UT_hash_handle hh;
};
struct receiver *receivers = NULL, *topReceivers = NULL;
#define WIRESHARK_NTOP_MAGIC 0x19680924
PACK_ON
struct ndpi_packet_trailer {
u_int32_t magic; /* WIRESHARK_NTOP_MAGIC */
u_int16_t master_protocol /* e.g. HTTP */, app_protocol /* e.g. FaceBook */;
ndpi_risk flow_risk;
u_int16_t flow_score;
char name[16];
} PACK_OFF;
static pcap_dumper_t *extcap_dumper = NULL;
static pcap_t *extcap_fifo_h = NULL;
static char extcap_buf[16384];
static char *extcap_capture_fifo = NULL;
static u_int16_t extcap_packet_filter = (u_int16_t)-1;
// struct associated to a workflow for a thread
struct reader_thread {
struct ndpi_workflow *workflow;
pthread_t pthread;
u_int64_t last_idle_scan_time;
u_int32_t idle_scan_idx;
u_int32_t num_idle_flows;
struct ndpi_flow_info *idle_flows[IDLE_SCAN_BUDGET];
};
// array for every thread created for a flow
static struct reader_thread ndpi_thread_info[MAX_NUM_READER_THREADS];
// ID tracking
typedef struct ndpi_id {
u_int8_t ip[4]; // Ip address
struct ndpi_id_struct *ndpi_id; // nDpi worker structure
} ndpi_id_t;
// used memory counters
static u_int32_t current_ndpi_memory = 0, max_ndpi_memory = 0;
#ifdef USE_DPDK
static int dpdk_port_id = 0, dpdk_run_capture = 1;
#endif
void test_lib(); /* Forward */
extern void ndpi_report_payload_stats(FILE *out);
extern int parse_proto_name_list(char *str, NDPI_PROTOCOL_BITMASK *bitmask, int inverted_logic);
/* ********************************** */
// #define DEBUG_TRACE
#ifdef DEBUG_TRACE
FILE *trace = NULL;
#endif
/* ***************************************************** */
static u_int32_t reader_slot_malloc_bins(u_int64_t v)
{
int i;
/* 0-2,3-4,5-8,9-16,17-32,33-64,65-128,129-256,257-512,513-1024,1025-2048,2049-4096,4097-8192,8193- */
for(i=0; i < max_malloc_bins - 1; i++)
if((1ULL << (i + 1)) >= v)
return i;
return i;
}
/**
* @brief ndpi_malloc wrapper function
*/
static void *ndpi_malloc_wrapper(size_t size) {
current_ndpi_memory += size;
if(current_ndpi_memory > max_ndpi_memory)
max_ndpi_memory = current_ndpi_memory;
if(enable_malloc_bins && malloc_size_stats)
ndpi_inc_bin(&malloc_bins, reader_slot_malloc_bins(size), 1);
return(malloc(size)); /* Don't change to ndpi_malloc !!!!! */
}
/* ***************************************************** */
/**
* @brief free wrapper function
*/
static void free_wrapper(void *freeable) {
free(freeable); /* Don't change to ndpi_free !!!!! */
}
/* ***************************************************** */
#define NUM_DOH_BINS 2
static struct ndpi_bin doh_ndpi_bins[NUM_DOH_BINS];
static u_int8_t doh_centroids[NUM_DOH_BINS][PLEN_NUM_BINS] = {
{ 23,25,3,0,26,0,0,0,0,0,0,0,0,0,2,0,0,15,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
{ 35,30,21,0,0,0,2,4,0,0,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
static float doh_max_distance = 35.5;
static void init_doh_bins() {
u_int i;
for(i=0; i<NUM_DOH_BINS; i++) {
ndpi_init_bin(&doh_ndpi_bins[i], ndpi_bin_family8, PLEN_NUM_BINS);
ndpi_free_bin(&doh_ndpi_bins[i]); /* Hack: we use static bins (see below), so we need to free the dynamic ones just allocated */
doh_ndpi_bins[i].u.bins8 = doh_centroids[i];
}
}
/* *********************************************** */
static u_int check_bin_doh_similarity(struct ndpi_bin *bin, float *similarity) {
u_int i;
float lowest_similarity = 9999999999.0f;
for(i=0; i<NUM_DOH_BINS; i++) {
*similarity = ndpi_bin_similarity(&doh_ndpi_bins[i], bin, 0, 0);
if(*similarity < 0) /* Error */
return(0);
if(*similarity <= doh_max_distance)
return(1);
if(*similarity < lowest_similarity) lowest_similarity = *similarity;
}
*similarity = lowest_similarity;
return(0);
}
/* *********************************************** */
void ndpiCheckHostStringMatch(char *testChar) {
ndpi_protocol_match_result match = { NDPI_PROTOCOL_UNKNOWN,
NDPI_PROTOCOL_CATEGORY_UNSPECIFIED, NDPI_PROTOCOL_UNRATED };
int testRes;
char appBufStr[64];
ndpi_protocol detected_protocol;
struct ndpi_detection_module_struct *ndpi_str;
NDPI_PROTOCOL_BITMASK all;
if(!testChar)
return;
ndpi_str = ndpi_init_detection_module(NULL);
NDPI_BITMASK_SET_ALL(all);
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
ndpi_finalize_initialization(ndpi_str);
testRes = ndpi_match_string_subprotocol(ndpi_str,
testChar, strlen(testChar), &match);
if(testRes) {
memset(&detected_protocol, 0, sizeof(ndpi_protocol) );
detected_protocol.app_protocol = match.protocol_id;
detected_protocol.master_protocol = 0;
detected_protocol.category = match.protocol_category;
ndpi_protocol2name(ndpi_str, detected_protocol, appBufStr,
sizeof(appBufStr));
printf("Match Found for string [%s] -> P(%d) B(%d) C(%d) => %s %s %s\n",
testChar, match.protocol_id, match.protocol_breed,
match.protocol_category,
appBufStr,
ndpi_get_proto_breed_name(match.protocol_breed ),
ndpi_category_get_name(ndpi_str, match.protocol_category));
} else
printf("Match NOT Found for string: %s\n\n", testChar );
ndpi_exit_detection_module(ndpi_str);
}
/* *********************************************** */
static char const *
ndpi_cfg_error2string(ndpi_cfg_error const err)
{
switch (err)
{
case NDPI_CFG_INVALID_CONTEXT:
return "Invalid context";
case NDPI_CFG_NOT_FOUND:
return "Configuration not found";
case NDPI_CFG_INVALID_PARAM:
return "Invalid configuration parameter";
case NDPI_CFG_CONTEXT_ALREADY_INITIALIZED:
return "Configuration context already initialized";
case NDPI_CFG_CALLBACK_ERROR:
return "Configuration callback error";
case NDPI_CFG_OK:
return "Success";
}
return "Unknown";
}
static void ndpiCheckIPMatch(char *testChar) {
struct ndpi_detection_module_struct *ndpi_str;
u_int16_t ret = NDPI_PROTOCOL_UNKNOWN;
u_int16_t port = 0;
char *saveptr, *ip_str, *port_str;
struct in_addr addr;
char appBufStr[64];
ndpi_protocol detected_protocol;
int i;
ndpi_cfg_error rc;
NDPI_PROTOCOL_BITMASK all;
if(!testChar)
return;
ndpi_str = ndpi_init_detection_module(NULL);
NDPI_BITMASK_SET_ALL(all);
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
if(_protoFilePath != NULL)
ndpi_load_protocols_file(ndpi_str, _protoFilePath);
for(i = 0; i < num_cfgs; i++) {
rc = ndpi_set_config(ndpi_str,
cfgs[i].proto, cfgs[i].param, cfgs[i].value);
if (rc != NDPI_CFG_OK) {
fprintf(stderr, "Error setting config [%s][%s][%s]: %s (%d)\n",
(cfgs[i].proto != NULL ? cfgs[i].proto : ""),
cfgs[i].param, cfgs[i].value, ndpi_cfg_error2string(rc), rc);
exit(-1);
}
}
ndpi_finalize_initialization(ndpi_str);
ip_str = strtok_r(testChar, ":", &saveptr);
if(!ip_str)
return;
addr.s_addr = inet_addr(ip_str);
port_str = strtok_r(NULL, "\n", &saveptr);
if(port_str)
port = atoi(port_str);
ret = ndpi_network_port_ptree_match(ndpi_str, &addr, htons(port));
if(ret != NDPI_PROTOCOL_UNKNOWN) {
memset(&detected_protocol, 0, sizeof(ndpi_protocol));
detected_protocol.app_protocol = ndpi_map_ndpi_id_to_user_proto_id(ndpi_str, ret);
ndpi_protocol2name(ndpi_str, detected_protocol, appBufStr,
sizeof(appBufStr));
printf("Match Found for IP %s, port %d -> %s (%d)\n",
ip_str, port, appBufStr, detected_protocol.app_protocol);
} else {
printf("Match NOT Found for IP: %s\n", testChar);
}
ndpi_exit_detection_module(ndpi_str);
}
/********************** FUNCTIONS ********************* */
static double ndpi_flow_get_byte_count_entropy(const uint32_t byte_count[256],
unsigned int num_bytes)
{
int i;
double sum = 0.0;
for(i=0; i<256; i++) {
double tmp = (double) byte_count[i] / (double) num_bytes;
if(tmp > FLT_EPSILON) {
sum -= tmp * logf(tmp);
}
}
return(sum / log(2.0));
}
/**
* @brief Set main components necessary to the detection
*/
static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle,
struct ndpi_global_context *g_ctx);
/**
* @brief Get flow byte distribution mean and variance
*/
static void
flowGetBDMeanandVariance(struct ndpi_flow_info* flow) {
FILE *out = results_file ? results_file : stdout;
const uint32_t *array = NULL;
uint32_t tmp[256], i;
unsigned int num_bytes;
double mean = 0.0, variance = 0.0;
struct ndpi_entropy *last_entropy = flow->last_entropy;
fflush(out);
if(!last_entropy)
return;
/*
* Sum up the byte_count array for outbound and inbound flows,
* if this flow is bidirectional
*/
/* TODO: we could probably use ndpi_data_* generic functions to simplify the code and
to get rid of `ndpi_flow_get_byte_count_entropy()` */
if (!flow->bidirectional) {
array = last_entropy->src2dst_byte_count;
num_bytes = last_entropy->src2dst_l4_bytes;
for(i=0; i<256; i++) {
tmp[i] = last_entropy->src2dst_byte_count[i];
}
if (last_entropy->src2dst_num_bytes != 0) {
mean = last_entropy->src2dst_bd_mean;
variance = last_entropy->src2dst_bd_variance/(last_entropy->src2dst_num_bytes - 1);
variance = sqrt(variance);
if (last_entropy->src2dst_num_bytes == 1) {
variance = 0.0;
}
}
} else {
for(i=0; i<256; i++) {
tmp[i] = last_entropy->src2dst_byte_count[i] + last_entropy->dst2src_byte_count[i];
}
array = tmp;
num_bytes = last_entropy->src2dst_l4_bytes + last_entropy->dst2src_l4_bytes;
if (last_entropy->src2dst_num_bytes + last_entropy->dst2src_num_bytes != 0) {
mean = ((double)last_entropy->src2dst_num_bytes)/((double)(last_entropy->src2dst_num_bytes+last_entropy->dst2src_num_bytes))*last_entropy->src2dst_bd_mean +
((double)last_entropy->dst2src_num_bytes)/((double)(last_entropy->dst2src_num_bytes+last_entropy->src2dst_num_bytes))*last_entropy->dst2src_bd_mean;
variance = ((double)last_entropy->src2dst_num_bytes)/((double)(last_entropy->src2dst_num_bytes+last_entropy->dst2src_num_bytes))*last_entropy->src2dst_bd_variance +
((double)last_entropy->dst2src_num_bytes)/((double)(last_entropy->dst2src_num_bytes+last_entropy->src2dst_num_bytes))*last_entropy->dst2src_bd_variance;
variance = variance/((double)(last_entropy->src2dst_num_bytes + last_entropy->dst2src_num_bytes - 1));
variance = sqrt(variance);
if (last_entropy->src2dst_num_bytes + last_entropy->dst2src_num_bytes == 1) {
variance = 0.0;
}
}
}
if(enable_flow_stats) {
/* Output the mean */
if(num_bytes != 0) {
double entropy = ndpi_flow_get_byte_count_entropy(array, num_bytes);
if(csv_fp) {
fprintf(csv_fp, ",%.3f,%.3f,%.3f,%.3f", mean, variance, entropy, entropy * num_bytes);
} else {
fprintf(out, "[byte_dist_mean: %.3f", mean);
fprintf(out, "][byte_dist_std: %.3f]", variance);
fprintf(out, "[entropy: %.3f]", entropy);
fprintf(out, "[total_entropy: %.3f]", entropy * num_bytes);
}
} else {
if(csv_fp)
fprintf(csv_fp, ",%.3f,%.3f,%.3f,%.3f", 0.0, 0.0, 0.0, 0.0);
}
}
}
/**
* @brief Print help instructions
*/
static void help(u_int long_help) {
printf("Welcome to nDPI %s\n\n", ndpi_revision());
printf("ndpiReader "
#ifndef USE_DPDK
"-i <file|device> "
#endif
"[-f <filter>][-s <duration>][-m <duration>][-b <num bin clusters>]\n"
" [-p <protos>][-l <loops> [-q][-d][-h][-H][-D][-e <len>][-E][-t][-v <level>]\n"
" [-n <threads>][-w <file>][-c <file>][-C <file>][-j <file>][-x <file>]\n"
" [-r <file>][-R][-j <file>][-S <file>][-T <num>][-U <num>] [-x <domain>]\n"
" [-a <mode>][-B proto_list]\n\n"
"Usage:\n"
" -i <file.pcap|device> | Specify a pcap file/playlist to read packets from or a\n"
" | device for live capture (comma-separated list)\n"
" -f <BPF filter> | Specify a BPF filter for filtering selected traffic\n"
" -s <duration> | Maximum capture duration in seconds (live traffic capture only)\n"
" -m <duration> | Split analysis duration in <duration> max seconds\n"
" -p <file>.protos | Specify a protocol file (eg. protos.txt)\n"
" -l <num loops> | Number of detection loops (test only)\n"
" -n <num threads> | Number of threads. Default: number of interfaces in -i.\n"
" | Ignored with pcap files.\n"
" -b <num bin clusters> | Number of bin clusters\n"
" -k <file> | Specify a file to write serialized detection results\n"
" -K <format> | Specify the serialization format for `-k'\n"
" | Valid formats are tlv, csv or json (default)\n"
#ifdef __linux__
" -g <id:id...> | Thread affinity mask (one core id per thread)\n"
#endif
" -a <mode> | Generates option values for GUIs\n"
" | 0 - List known protocols\n"
" | 1 - List known categories\n"
" | 2 - List known risks\n"
" -d | Disable protocol guess (by ip and by port) and use only DPI.\n"
" | It is a shortcut to --cfg=dpi.guess_on_giveup,0\n"
" -e <len> | Min human readeable string match len. Default %u\n"
" -q | Quiet mode\n"
" -F | Enable flow stats\n"
" -t | Dissect GTP/TZSP tunnels\n"
" -P <a>:<b>:<c>:<d>:<e> | Enable payload analysis:\n"
" | <a> = min pattern len to search\n"
" | <b> = max pattern len to search\n"
" | <c> = max num packets per flow\n"
" | <d> = max packet payload dissection\n"
" | <d> = max num reported payloads\n"
" | Default: %u:%u:%u:%u:%u\n"
" -c <path> | Load custom categories from the specified file\n"
" -C <path> | Write output in CSV format on the specified file\n"
" -r <path> | Load risky domain file\n"
" -R | Print detected realtime protocols\n"
" -j <path> | Load malicious JA3 fingeprints\n"
" -S <path> | Load malicious SSL certificate SHA1 fingerprints\n"
" -G <dir> | Bind domain names to categories loading files from <dir>\n"
" -w <path> | Write test output on the specified file. This is useful for\n"
" | testing purposes in order to compare results across runs\n"
" -h | This help\n"
" -H | This help plus some information about supported protocols/risks\n"
" -v <1|2|3|4> | Verbose 'unknown protocol' packet print.\n"
" | 1 = verbose\n"
" | 2 = very verbose\n"
" | 3 = port stats\n"
" | 4 = hash stats\n"
" -V <0-4> | nDPI logging level\n"
" | 0 - error, 1 - trace, 2 - debug, 3 - extra debug\n"
" | >3 - extra debug + log enabled for all protocols (i.e. '-u all')\n"
" -u all|proto|num[,...] | Enable logging only for such protocol(s)\n"
" | If this flag is present multiple times (directly, or via '-V'),\n"
" | only the last instance will be considered\n"
" -B all|proto|num[,...] | Disable such protocol(s). By defaul all protocols are enabled\n"
" -T <num> | Max number of TCP processed packets before giving up [default: %u]\n"
" -U <num> | Max number of UDP processed packets before giving up [default: %u]\n"
" -D | Enable DoH traffic analysis based on content (no DPI)\n"
" -x <domain> | Check domain name [Test only]\n"
" -I | Ignore VLAN id for flow hash calculation\n"
" -A | Dump internal statistics (LRU caches / Patricia trees / Ahocarasick automas / ...\n"
" -M | Memory allocation stats on data-path (only by the library).\n"
" | It works only on single-thread configuration\n"
" --cfg=proto,param,value | Configure the specific attribute of this protocol\n"
,
human_readeable_string_len,
min_pattern_len, max_pattern_len, max_num_packets_per_flow, max_packet_payload_dissection,
max_num_reported_top_payloads, max_num_tcp_dissected_pkts, max_num_udp_dissected_pkts);
NDPI_PROTOCOL_BITMASK all;
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
NDPI_BITMASK_SET_ALL(all);
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
if(_protoFilePath != NULL)
ndpi_load_protocols_file(ndpi_str, _protoFilePath);
ndpi_finalize_initialization(ndpi_str);
printf("\nProtocols configuration parameters:\n");
ndpi_dump_config(ndpi_str, stdout);
#ifndef WIN32
printf("\nExcap (wireshark) options:\n"
" --extcap-interfaces\n"
" --extcap-version\n"
" --extcap-dlts\n"
" --extcap-interface <name>\n"
" --extcap-config\n"
" --capture\n"
" --extcap-capture-filter <filter>\n"
" --fifo <path to file or pipe>\n"
" --ndpi-proto-filter <protocol>\n"
);
#endif
if(long_help) {
printf("\n\nSize of nDPI Flow structure: %u\n"
"Sizeof of nDPI Flow protocol union: %zu\n",
ndpi_detection_get_sizeof_ndpi_flow_struct(),
sizeof(((struct ndpi_flow_struct *)0)->protos));
printf("\n\nnDPI supported protocols:\n");
printf("%3s %8s %-22s %-10s %-8s %-12s %s\n",
"Id", "Userd-id", "Protocol", "Layer_4", "Nw_Proto", "Breed", "Category");
num_threads = 1;
ndpi_dump_protocols(ndpi_str, stdout);
printf("\n\nnDPI supported risks:\n");
ndpi_dump_risks_score(stdout);
}
ndpi_exit_detection_module(ndpi_str);
exit(!long_help);
}
#define OPTLONG_VALUE_CFG 3000
static struct option longopts[] = {
/* mandatory extcap options */
{ "extcap-interfaces", no_argument, NULL, '0'},
{ "extcap-version", optional_argument, NULL, '1'},
{ "extcap-dlts", no_argument, NULL, '2'},
{ "extcap-interface", required_argument, NULL, '3'},
{ "extcap-config", no_argument, NULL, '4'},
{ "capture", no_argument, NULL, '5'},
{ "extcap-capture-filter", required_argument, NULL, '6'},
{ "fifo", required_argument, NULL, '7'},
{ "ndpi-proto-filter", required_argument, NULL, '9'},
/* ndpiReader options */
{ "enable-protocol-guess", no_argument, NULL, 'd'},
{ "categories", required_argument, NULL, 'c'},
{ "csv-dump", required_argument, NULL, 'C'},
{ "interface", required_argument, NULL, 'i'},
{ "filter", required_argument, NULL, 'f'},
{ "flow-stats", required_argument, NULL, 'F'},
{ "cpu-bind", required_argument, NULL, 'g'},
{ "load-categories", required_argument, NULL, 'G'},
{ "loops", required_argument, NULL, 'l'},
{ "num-threads", required_argument, NULL, 'n'},
{ "ignore-vlanid", no_argument, NULL, 'I'},
{ "protos", required_argument, NULL, 'p'},
{ "capture-duration", required_argument, NULL, 's'},
{ "decode-tunnels", no_argument, NULL, 't'},
{ "revision", no_argument, NULL, 'r'},
{ "verbose", required_argument, NULL, 'v'},
{ "version", no_argument, NULL, 'r'},
{ "ndpi-log-level", required_argument, NULL, 'V'},
{ "dbg-proto", required_argument, NULL, 'u'},
{ "help", no_argument, NULL, 'h'},
{ "long-help", no_argument, NULL, 'H'},
{ "serialization-outfile", required_argument, NULL, 'k'},
{ "serialization-format", required_argument, NULL, 'K'},
{ "payload-analysis", required_argument, NULL, 'P'},
{ "result-path", required_argument, NULL, 'w'},
{ "quiet", no_argument, NULL, 'q'},
{ "cfg", required_argument, NULL, OPTLONG_VALUE_CFG},
{0, 0, 0, 0}
};
/* ********************************** */
void extcap_interfaces() {
printf("extcap {version=%s}{help=https://github.com/ntop/nDPI/tree/dev/wireshark}\n", ndpi_revision());
printf("interface {value=ndpi}{display=nDPI interface}\n");
extcap_exit = 1;
}
/* ********************************** */
void extcap_dlts() {
u_int dlts_number = DLT_EN10MB;
printf("dlt {number=%u}{name=%s}{display=%s}\n", dlts_number, "ndpi", "nDPI Interface");
extcap_exit = 1;
}
/* ********************************** */
struct ndpi_proto_sorter {
int id;
char name[16];
};
/* ********************************** */
int cmpProto(const void *_a, const void *_b) {
struct ndpi_proto_sorter *a = (struct ndpi_proto_sorter*)_a;
struct ndpi_proto_sorter *b = (struct ndpi_proto_sorter*)_b;
return(strcmp(a->name, b->name));
}
/* ********************************** */
int cmpFlows(const void *_a, const void *_b) {
struct ndpi_flow_info *fa = ((struct flow_info*)_a)->flow;
struct ndpi_flow_info *fb = ((struct flow_info*)_b)->flow;
uint64_t a_size = fa->src2dst_bytes + fa->dst2src_bytes;
uint64_t b_size = fb->src2dst_bytes + fb->dst2src_bytes;
if(a_size != b_size)
return a_size < b_size ? 1 : -1;
// copy from ndpi_workflow_node_cmp();
if(fa->ip_version < fb->ip_version ) return(-1); else { if(fa->ip_version > fb->ip_version ) return(1); }
if(fa->protocol < fb->protocol ) return(-1); else { if(fa->protocol > fb->protocol ) return(1); }
if(htonl(fa->src_ip) < htonl(fb->src_ip) ) return(-1); else { if(htonl(fa->src_ip) > htonl(fb->src_ip) ) return(1); }
if(htons(fa->src_port) < htons(fb->src_port)) return(-1); else { if(htons(fa->src_port) > htons(fb->src_port)) return(1); }
if(htonl(fa->dst_ip) < htonl(fb->dst_ip) ) return(-1); else { if(htonl(fa->dst_ip) > htonl(fb->dst_ip) ) return(1); }
if(htons(fa->dst_port) < htons(fb->dst_port)) return(-1); else { if(htons(fa->dst_port) > htons(fb->dst_port)) return(1); }
if(fa->vlan_id < fb->vlan_id) return(-1); else { if(fa->vlan_id > fb->vlan_id) return(1); }
return(0);
}
/* ********************************** */
void extcap_config() {
int argidx = 0;
#if 0
struct ndpi_proto_sorter *protos;
u_int ndpi_num_supported_protocols;
int i;
ndpi_proto_defaults_t *proto_defaults;
#endif
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
#if 0
ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_str);
proto_defaults = ndpi_get_proto_defaults(ndpi_str);
#endif
/* -i <interface> */
printf("arg {number=%d}{call=-i}{display=Capture Interface}{type=string}{group=Live Capture}"
"{tooltip=The interface name}\n", argidx++);
printf("arg {number=%d}{call=-i}{display=Pcap File to Analyze}{type=fileselect}{mustexist=true}{group=Pcap}"
"{tooltip=The pcap file to analyze (if the interface is unspecified)}\n", argidx++);
#if 0
/* Removed as it breaks! extcap */
protos = (struct ndpi_proto_sorter*)ndpi_malloc(sizeof(struct ndpi_proto_sorter) * ndpi_num_supported_protocols);
if(!protos) exit(0);
printf("arg {number=%d}{call=--ndpi-proto-filter}{display=nDPI Protocol Filter}{type=selector}{group=Filter}"
"{tooltip=nDPI Protocol to be filtered}\n", argidx);
printf("value {arg=%d}{value=%d}{display=%s}{default=true}\n", argidx, 0, "No nDPI filtering");
for(i=0; i<(int) ndpi_num_supported_protocols; i++) {
protos[i].id = i;
ndpi_snprintf(protos[i].name, sizeof(protos[i].name), "%s", proto_defaults[i].protoName);
}
qsort(protos, ndpi_num_supported_protocols, sizeof(struct ndpi_proto_sorter), cmpProto);
for(i=0; i<(int)ndpi_num_supported_protocols; i++)
printf("value {arg=%d}{value=%d}{display=%s (%d)}{default=false}{enabled=true}\n", argidx, protos[i].id,
protos[i].name, protos[i].id);
ndpi_free(protos);
#endif
ndpi_exit_detection_module(ndpi_str);
extcap_exit = 1;
}
/* ********************************** */
void extcap_capture() {
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, " #### %s #### \n", __FUNCTION__);
#endif
if((extcap_fifo_h = pcap_open_dead(DLT_EN10MB, 16384 /* MTU */)) == NULL) {
fprintf(stderr, "Error pcap_open_dead");
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, "Error pcap_open_dead\n");
#endif
return;
}
if((extcap_dumper = pcap_dump_open(extcap_fifo_h,
extcap_capture_fifo)) == NULL) {
fprintf(stderr, "Unable to open the pcap dumper on %s", extcap_capture_fifo);
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, "Unable to open the pcap dumper on %s\n",
extcap_capture_fifo);
#endif
return;
}
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, "Starting packet capture [%p]\n", extcap_dumper);
#endif
}
/* ********************************** */
void printCSVHeader() {
if(!csv_fp) return;
fprintf(csv_fp, "#flow_id,protocol,first_seen,last_seen,duration,src_ip,src_port,dst_ip,dst_port,ndpi_proto_num,ndpi_proto,proto_by_ip,server_name_sni,");
fprintf(csv_fp, "c_to_s_pkts,c_to_s_bytes,c_to_s_goodput_bytes,s_to_c_pkts,s_to_c_bytes,s_to_c_goodput_bytes,");
fprintf(csv_fp, "data_ratio,str_data_ratio,c_to_s_goodput_ratio,s_to_c_goodput_ratio,");
/* IAT (Inter Arrival Time) */
fprintf(csv_fp, "iat_flow_min,iat_flow_avg,iat_flow_max,iat_flow_stddev,");
fprintf(csv_fp, "iat_c_to_s_min,iat_c_to_s_avg,iat_c_to_s_max,iat_c_to_s_stddev,");
fprintf(csv_fp, "iat_s_to_c_min,iat_s_to_c_avg,iat_s_to_c_max,iat_s_to_c_stddev,");
/* Packet Length */
fprintf(csv_fp, "pktlen_c_to_s_min,pktlen_c_to_s_avg,pktlen_c_to_s_max,pktlen_c_to_s_stddev,");
fprintf(csv_fp, "pktlen_s_to_c_min,pktlen_s_to_c_avg,pktlen_s_to_c_max,pktlen_s_to_c_stddev,");
/* TCP flags */
fprintf(csv_fp, "cwr,ece,urg,ack,psh,rst,syn,fin,");
fprintf(csv_fp, "c_to_s_cwr,c_to_s_ece,c_to_s_urg,c_to_s_ack,c_to_s_psh,c_to_s_rst,c_to_s_syn,c_to_s_fin,");
fprintf(csv_fp, "s_to_c_cwr,s_to_c_ece,s_to_c_urg,s_to_c_ack,s_to_c_psh,s_to_c_rst,s_to_c_syn,s_to_c_fin,");
/* TCP window */
fprintf(csv_fp, "c_to_s_init_win,s_to_c_init_win,");
/* Flow info */
fprintf(csv_fp, "server_info,");
fprintf(csv_fp, "tls_version,quic_version,ja3c,tls_client_unsafe,");
fprintf(csv_fp, "ja3s,tls_server_unsafe,");
fprintf(csv_fp, "advertised_alpns,negotiated_alpn,tls_supported_versions,");
#if 0
fprintf(csv_fp, "tls_issuerDN,tls_subjectDN,");
#endif
fprintf(csv_fp, "ssh_client_hassh,ssh_server_hassh,flow_info,plen_bins,http_user_agent");
if(enable_flow_stats) {
fprintf(csv_fp, ",byte_dist_mean,byte_dist_std,entropy,total_entropy");
}
fprintf(csv_fp, "\n");
}
static int parse_three_strings(char *param, char **s1, char **s2, char **s3)
{
char *saveptr, *tmp_str, *s1_str, *s2_str = NULL, *s3_str;
int num_commas;
unsigned int i;
tmp_str = ndpi_strdup(param);
if(tmp_str) {
/* First parameter might be missing */
num_commas = 0;
for(i = 0; i < strlen(tmp_str); i++) {
if(tmp_str[i] == ',')
num_commas++;
}
if(num_commas == 1) {
s1_str = NULL;
s2_str = strtok_r(tmp_str, ",", &saveptr);
} else if(num_commas == 2) {
s1_str = strtok_r(tmp_str, ",", &saveptr);
if(s1_str) {
s2_str = strtok_r(NULL, ",", &saveptr);
}
} else {
ndpi_free(tmp_str);
return -1;
}
if(s2_str) {
s3_str = strtok_r(NULL, ",", &saveptr);
if(s3_str) {
*s1 = ndpi_strdup(s1_str);
*s2 = ndpi_strdup(s2_str);
*s3 = ndpi_strdup(s3_str);
ndpi_free(tmp_str);
if(!s1 || !s2 || !s3) {
ndpi_free(s1);
ndpi_free(s2);
ndpi_free(s3);