-
Notifications
You must be signed in to change notification settings - Fork 62.7k
Expand file tree
/
Copy pathbpf.h
More file actions
1755 lines (1563 loc) · 55.1 KB
/
Copy pathbpf.h
File metadata and controls
1755 lines (1563 loc) · 55.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*/
#ifndef _LINUX_BPF_H
#define _LINUX_BPF_H 1
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/percpu.h>
#include <linux/err.h>
#include <linux/rbtree_latch.h>
#include <linux/numa.h>
#include <linux/mm_types.h>
#include <linux/wait.h>
#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/capability.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
struct perf_event;
struct bpf_prog;
struct bpf_prog_aux;
struct bpf_map;
struct sock;
struct seq_file;
struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
int (*map_alloc_check)(union bpf_attr *attr);
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
int (*map_pop_elem)(struct bpf_map *map, void *value);
int (*map_peek_elem)(struct bpf_map *map, void *value);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
void (*map_fd_put_ptr)(void *ptr);
u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
struct seq_file *m);
int (*map_check_btf)(const struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
/* Prog poke tracking helpers. */
int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux);
void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux);
void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old,
struct bpf_prog *new);
/* Direct value access helpers. */
int (*map_direct_value_addr)(const struct bpf_map *map,
u64 *imm, u32 off);
int (*map_direct_value_meta)(const struct bpf_map *map,
u64 imm, u32 *off);
int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);
};
struct bpf_map_memory {
u32 pages;
struct user_struct *user;
};
struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
*/
const struct bpf_map_ops *ops ____cacheline_aligned;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
void *security;
#endif
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
u32 max_entries;
u32 map_flags;
int spin_lock_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
struct bpf_map_memory memory;
char name[BPF_OBJ_NAME_LEN];
u32 btf_vmlinux_value_type_id;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
/* 22 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
atomic64_t refcnt ____cacheline_aligned;
atomic64_t usercnt;
struct work_struct work;
struct mutex freeze_mutex;
u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
};
static inline bool map_value_has_spin_lock(const struct bpf_map *map)
{
return map->spin_lock_off >= 0;
}
static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
{
if (likely(!map_value_has_spin_lock(map)))
return;
*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
(struct bpf_spin_lock){};
}
/* copy everything but bpf_spin_lock */
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
if (unlikely(map_value_has_spin_lock(map))) {
u32 off = map->spin_lock_off;
memcpy(dst, src, off);
memcpy(dst + off + sizeof(struct bpf_spin_lock),
src + off + sizeof(struct bpf_spin_lock),
map->value_size - off - sizeof(struct bpf_spin_lock));
} else {
memcpy(dst, src, map->value_size);
}
}
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
struct bpf_offload_dev;
struct bpf_offloaded_map;
struct bpf_map_dev_ops {
int (*map_get_next_key)(struct bpf_offloaded_map *map,
void *key, void *next_key);
int (*map_lookup_elem)(struct bpf_offloaded_map *map,
void *key, void *value);
int (*map_update_elem)(struct bpf_offloaded_map *map,
void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
};
struct bpf_offloaded_map {
struct bpf_map map;
struct net_device *netdev;
const struct bpf_map_dev_ops *dev_ops;
void *dev_priv;
struct list_head offloads;
};
static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
{
return container_of(map, struct bpf_offloaded_map, map);
}
static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
{
return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
}
static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
{
return (map->btf_value_type_id || map->btf_vmlinux_value_type_id) &&
map->ops->map_seq_show_elem;
}
int map_check_no_btf(const struct bpf_map *map,
const struct btf *btf,
const struct btf_type *key_type,
const struct btf_type *value_type);
extern const struct bpf_map_ops bpf_map_offload_ops;
/* function argument constraints */
enum bpf_arg_type {
ARG_DONTCARE = 0, /* unused argument in helper function */
/* the following constraints used to prototype
* bpf_map_lookup/update/delete_elem() functions
*/
ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
*/
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized,
* helper function must fill all bytes or clear
* them in error case.
*/
ARG_CONST_SIZE, /* number of bytes accessed from memory */
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
ARG_PTR_TO_CTX, /* pointer to context */
ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
};
/* type of values returned from helper functions */
enum bpf_return_type {
RET_INTEGER, /* function returns integer */
RET_VOID, /* function doesn't return anything */
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
* to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
* instructions after verifying
*/
struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
bool pkt_access;
enum bpf_return_type ret_type;
union {
struct {
enum bpf_arg_type arg1_type;
enum bpf_arg_type arg2_type;
enum bpf_arg_type arg3_type;
enum bpf_arg_type arg4_type;
enum bpf_arg_type arg5_type;
};
enum bpf_arg_type arg_type[5];
};
int *btf_id; /* BTF ids of arguments */
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
* the first argument to eBPF programs.
* For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
*/
struct bpf_context;
enum bpf_access_type {
BPF_READ = 1,
BPF_WRITE = 2
};
/* types of values stored in eBPF registers */
/* Pointer types represent:
* pointer
* pointer + imm
* pointer + (u16) var
* pointer + (u16) var + imm
* if (range > 0) then [ptr, ptr + range - off) is safe to access
* if (id > 0) means that some 'var' was added
* if (off > 0) means that 'imm' was added
*/
enum bpf_reg_type {
NOT_INIT = 0, /* nothing was written into register */
SCALAR_VALUE, /* reg doesn't contain a valid pointer */
PTR_TO_CTX, /* reg points to bpf_context */
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
PTR_TO_MAP_VALUE, /* reg points to map element value */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
PTR_TO_STACK, /* reg == frame_pointer + offset */
PTR_TO_PACKET_META, /* skb->data - meta_len */
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
PTR_TO_BTF_ID, /* reg points to kernel struct */
PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
};
/* The information passed from prog-specific *_is_valid_access
* back to the verifier.
*/
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
union {
int ctx_field_size;
u32 btf_id;
};
struct bpf_verifier_log *log; /* for verbose logs */
};
static inline void
bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
{
aux->ctx_field_size = size;
}
struct bpf_prog_ops {
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
};
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *
(*get_func_proto)(enum bpf_func_id func_id,
const struct bpf_prog *prog);
/* return true if 'size' wide access at offset 'off' within bpf_context
* with 'type' (read or write) is allowed
*/
bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
const struct bpf_prog *prog);
int (*gen_ld_abs)(const struct bpf_insn *orig,
struct bpf_insn *insn_buf);
u32 (*convert_ctx_access)(enum bpf_access_type type,
const struct bpf_insn *src,
struct bpf_insn *dst,
struct bpf_prog *prog, u32 *target_size);
int (*btf_struct_access)(struct bpf_verifier_log *log,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
};
struct bpf_prog_offload_ops {
/* verifier basic callbacks */
int (*insn_hook)(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx);
int (*finalize)(struct bpf_verifier_env *env);
/* verifier optimization callbacks (called after .finalize) */
int (*replace_insn)(struct bpf_verifier_env *env, u32 off,
struct bpf_insn *insn);
int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt);
/* program management callbacks */
int (*prepare)(struct bpf_prog *prog);
int (*translate)(struct bpf_prog *prog);
void (*destroy)(struct bpf_prog *prog);
};
struct bpf_prog_offload {
struct bpf_prog *prog;
struct net_device *netdev;
struct bpf_offload_dev *offdev;
void *dev_priv;
struct list_head offloads;
bool dev_state;
bool opt_failed;
void *jited_image;
u32 jited_len;
};
enum bpf_cgroup_storage_type {
BPF_CGROUP_STORAGE_SHARED,
BPF_CGROUP_STORAGE_PERCPU,
__BPF_CGROUP_STORAGE_MAX
};
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
/* The longest tracepoint has 12 args.
* See include/trace/bpf_probe.h
*/
#define MAX_BPF_FUNC_ARGS 12
struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
struct btf_func_model {
u8 ret_size;
u8 nr_args;
u8 arg_size[MAX_BPF_FUNC_ARGS];
};
/* Restore arguments before returning from trampoline to let original function
* continue executing. This flag is used for fentry progs when there are no
* fexit progs.
*/
#define BPF_TRAMP_F_RESTORE_REGS BIT(0)
/* Call original function after fentry progs, but before fexit progs.
* Makes sense for fentry/fexit, normal calls and indirect calls.
*/
#define BPF_TRAMP_F_CALL_ORIG BIT(1)
/* Skip current frame and return to parent. Makes sense for fentry/fexit
* programs only. Should not be used with normal calls and indirect calls.
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
#define BPF_MAX_TRAMP_PROGS 40
struct bpf_tramp_progs {
struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
int nr_progs;
};
/* Different use cases for BPF trampoline:
* 1. replace nop at the function entry (kprobe equivalent)
* flags = BPF_TRAMP_F_RESTORE_REGS
* fentry = a set of programs to run before returning from trampoline
*
* 2. replace nop at the function entry (kprobe + kretprobe equivalent)
* flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME
* orig_call = fentry_ip + MCOUNT_INSN_SIZE
* fentry = a set of program to run before calling original function
* fexit = a set of program to run after original function
*
* 3. replace direct call instruction anywhere in the function body
* or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid)
* With flags = 0
* fentry = a set of programs to run before returning from trampoline
* With flags = BPF_TRAMP_F_CALL_ORIG
* orig_call = original callback addr or direct function addr
* fentry = a set of program to run before calling original function
* fexit = a set of program to run after original function
*/
int arch_prepare_bpf_trampoline(void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_progs *tprogs,
void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
struct bpf_ksym {
unsigned long start;
unsigned long end;
char name[KSYM_NAME_LEN];
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
};
enum bpf_tramp_prog_type {
BPF_TRAMP_FENTRY,
BPF_TRAMP_FEXIT,
BPF_TRAMP_MODIFY_RETURN,
BPF_TRAMP_MAX,
BPF_TRAMP_REPLACE, /* more than MAX */
};
struct bpf_trampoline {
/* hlist for trampoline_table */
struct hlist_node hlist;
/* serializes access to fields of this trampoline */
struct mutex mutex;
refcount_t refcnt;
u64 key;
struct {
struct btf_func_model model;
void *addr;
bool ftrace_managed;
} func;
/* if !NULL this is BPF_PROG_TYPE_EXT program that extends another BPF
* program by replacing one of its functions. func.addr is the address
* of the function it replaced.
*/
struct bpf_prog *extension_prog;
/* list of BPF programs using this trampoline */
struct hlist_head progs_hlist[BPF_TRAMP_MAX];
/* Number of attached programs. A counter per kind. */
int progs_cnt[BPF_TRAMP_MAX];
/* Executable image of trampoline */
void *image;
u64 selector;
struct bpf_ksym ksym;
};
#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
struct bpf_dispatcher_prog {
struct bpf_prog *prog;
refcount_t users;
};
struct bpf_dispatcher {
/* dispatcher mutex */
struct mutex mutex;
void *func;
struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX];
int num_progs;
void *image;
u32 image_off;
struct bpf_ksym ksym;
};
static __always_inline unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
unsigned int (*bpf_func)(const void *,
const struct bpf_insn *))
{
return bpf_func(ctx, insnsi);
}
#ifdef CONFIG_BPF_JIT
struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
#define BPF_DISPATCHER_INIT(_name) { \
.mutex = __MUTEX_INITIALIZER(_name.mutex), \
.func = &_name##_func, \
.progs = {}, \
.num_progs = 0, \
.image = NULL, \
.image_off = 0, \
.ksym = { \
.name = #_name, \
.lnode = LIST_HEAD_INIT(_name.ksym.lnode), \
}, \
}
#define DEFINE_BPF_DISPATCHER(name) \
noinline unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
const struct bpf_insn *)) \
{ \
return bpf_func(ctx, insnsi); \
} \
EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \
struct bpf_dispatcher bpf_dispatcher_##name = \
BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
#define DECLARE_BPF_DISPATCHER(name) \
unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
unsigned int (*bpf_func)(const void *, \
const struct bpf_insn *)); \
extern struct bpf_dispatcher bpf_dispatcher_##name;
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
#define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
void *bpf_jit_alloc_exec_page(void);
void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
return NULL;
}
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
{
return -ENOTSUPP;
}
static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
{
return -ENOTSUPP;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
#define DECLARE_BPF_DISPATCHER(name)
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func
#define BPF_DISPATCHER_PTR(name) NULL
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
struct bpf_prog *from,
struct bpf_prog *to) {}
static inline bool is_bpf_image_address(unsigned long address)
{
return false;
}
#endif
struct bpf_func_info_aux {
u16 linkage;
bool unreliable;
};
enum bpf_jit_poke_reason {
BPF_POKE_REASON_TAIL_CALL,
};
/* Descriptor of pokes pointing /into/ the JITed image. */
struct bpf_jit_poke_descriptor {
void *ip;
union {
struct {
struct bpf_map *map;
u32 key;
} tail_call;
};
bool ip_stable;
u8 adj_off;
u16 reason;
};
/* reg_type info for ctx arguments */
struct bpf_ctx_arg_aux {
u32 offset;
enum bpf_reg_type reg_type;
};
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
u32 max_tp_access;
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
const struct bpf_ctx_arg_aux *ctx_arg_info;
struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool func_proto_unreliable;
enum bpf_tramp_prog_type trampoline_prog_type;
struct bpf_trampoline *trampoline;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
u32 size_poke_tab;
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
struct bpf_prog *prog;
struct user_struct *user;
u64 load_time; /* ns since boottime */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
#endif
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
struct bpf_func_info_aux *func_info_aux;
/* bpf_line_info loaded from userspace. linfo->insn_off
* has the xlated insn offset.
* Both the main and sub prog share the same linfo.
* The subprog can access its first linfo by
* using the linfo_idx.
*/
struct bpf_line_info *linfo;
/* jited_linfo is the jited addr of the linfo. It has a
* one to one mapping to linfo:
* jited_linfo[i] is the jited addr for the linfo[i]->insn_off.
* Both the main and sub prog share the same jited_linfo.
* The subprog can access its first jited_linfo by
* using the linfo_idx.
*/
void **jited_linfo;
u32 func_info_cnt;
u32 nr_linfo;
/* subprog can use linfo_idx to access its first linfo and
* jited_linfo.
* main prog always has linfo_idx == 0
*/
u32 linfo_idx;
u32 num_exentries;
struct exception_table_entry *extable;
struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
struct rcu_head rcu;
};
};
struct bpf_array_aux {
/* 'Ownership' of prog array is claimed by the first program that
* is going to use this map or by the first program which FD is
* stored in the map to make sure that all callers and callees have
* the same prog type and JITed flag.
*/
enum bpf_prog_type type;
bool jited;
/* Programs with direct jumps into programs part of this array. */
struct list_head poke_progs;
struct bpf_map *map;
struct mutex poke_mutex;
struct work_struct work;
};
struct bpf_struct_ops_value;
struct btf_type;
struct btf_member;
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
struct bpf_struct_ops {
const struct bpf_verifier_ops *verifier_ops;
int (*init)(struct btf *btf);
int (*check_member)(const struct btf_type *t,
const struct btf_member *member);
int (*init_member)(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata);
int (*reg)(void *kdata);
void (*unreg)(void *kdata);
const struct btf_type *type;
const struct btf_type *value_type;
const char *name;
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
u32 type_id;
u32 value_id;
};
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA))
const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
return bpf_struct_ops_get(data);
else
return try_module_get(owner);
}
static inline void bpf_module_put(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
bpf_struct_ops_put(data);
else
module_put(owner);
}
#else
static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
{
return NULL;
}
static inline void bpf_struct_ops_init(struct btf *btf,
struct bpf_verifier_log *log)
{
}
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
return try_module_get(owner);
}
static inline void bpf_module_put(const void *data, struct module *owner)
{
module_put(owner);
}
static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
void *key,
void *value)
{
return -EINVAL;
}
#endif
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
struct bpf_array_aux *aux;
union {
char value[0] __aligned(8);
void *ptrs[0] __aligned(8);
void __percpu *pptrs[0] __aligned(8);
};
};
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 32
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
BPF_F_RDONLY_PROG | \
BPF_F_WRONLY | \
BPF_F_WRONLY_PROG)
#define BPF_MAP_CAN_READ BIT(0)
#define BPF_MAP_CAN_WRITE BIT(1)
static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
{
u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
/* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
* not possible.
*/
if (access_flags & BPF_F_RDONLY_PROG)
return BPF_MAP_CAN_READ;
else if (access_flags & BPF_F_WRONLY_PROG)
return BPF_MAP_CAN_WRITE;
else
return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
}
static inline bool bpf_map_flags_access_ok(u32 access_flags)
{
return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
(BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
}
struct bpf_event_entry {
struct perf_event *event;
struct file *perf_file;
struct file *map_file;
struct rcu_head rcu;
};
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
int bpf_prog_calc_tag(struct bpf_prog *fp);
const char *kernel_type_name(u32 btf_type_id);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long off, unsigned long len);
typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
const struct bpf_insn *src,
struct bpf_insn *dst,
struct bpf_prog *prog,
u32 *target_size);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
* ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
* The user has to keep refcnt on the program and make sure the program
* is removed from the array before bpf_prog_put().
* The 'struct bpf_prog_array *' should only be replaced with xchg()
* since other cpus are walking the array of pointers in parallel.
*/
struct bpf_prog_array_item {
struct bpf_prog *prog;
struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};
struct bpf_prog_array {
struct rcu_head rcu;
struct bpf_prog_array_item items[];
};
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
void bpf_prog_array_free(struct bpf_prog_array *progs);
int bpf_prog_array_length(struct bpf_prog_array *progs);
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
__u32 __user *prog_ids, u32 cnt);
void bpf_prog_array_delete_safe(struct bpf_prog_array *progs,
struct bpf_prog *old_prog);
int bpf_prog_array_copy_info(struct bpf_prog_array *array,
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt);
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
_ret &= func(_prog, ctx); \
_item++; \
} \
_out: \
rcu_read_unlock(); \
migrate_enable(); \
_ret; \
})
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
* so BPF programs can request cwr for TCP packets.
*
* Current cgroup skb programs can only return 0 or 1 (0 to drop the
* packet. This macro changes the behavior so the low order bit
* indicates whether the packet should be dropped (0) or not (1)
* and the next bit is a congestion notification bit. This could be
* used by TCP to call tcp_enter_cwr()
*
* Hence, new allowed return values of CGROUP EGRESS BPF programs are:
* 0: drop packet
* 1: keep packet
* 2: drop packet and cn
* 3: keep packet and cn
*
* This macro then converts it to one of the NET_XMIT or an error
* code that is then interpreted as drop packet (and no cn):
* 0: NET_XMIT_SUCCESS skb should be transmitted
* 1: NET_XMIT_DROP skb should be dropped and cn
* 2: NET_XMIT_CN skb should be transmitted and cn
* 3: -EPERM skb should be dropped
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
while ((_prog = READ_ONCE(_item->prog))) { \
bpf_cgroup_storage_set(_item->cgroup_storage); \
ret = func(_prog, ctx); \
_ret &= (ret & 1); \
_cn |= (ret & 2); \
_item++; \
} \
rcu_read_unlock(); \
migrate_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
_ret = (_cn ? NET_XMIT_DROP : -EPERM); \
_ret; \
})