forked from illumos/illumos-gate
-
-
Notifications
You must be signed in to change notification settings - Fork 88
Expand file tree
/
Copy pathvnode.h
More file actions
1596 lines (1455 loc) · 57.6 KB
/
vnode.h
File metadata and controls
1596 lines (1455 loc) · 57.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2017 RackTop Systems.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#ifndef _SYS_VNODE_H
#define _SYS_VNODE_H
#include <sys/types.h>
#include <sys/t_lock.h>
#include <sys/rwstlock.h>
#include <sys/time_impl.h>
#include <sys/cred.h>
#include <sys/uio.h>
#include <sys/resource.h>
#include <vm/seg_enum.h>
#include <sys/kstat.h>
#include <sys/kmem.h>
#include <sys/list.h>
#ifdef _KERNEL
#include <sys/buf.h>
#include <sys/sdt.h>
#endif /* _KERNEL */
#ifdef __cplusplus
extern "C" {
#endif
/*
* Statistics for all vnode operations.
* All operations record number of ops (since boot/mount/zero'ed).
* Certain I/O operations (read, write, readdir) also record number
* of bytes transferred.
* This appears in two places in the system: one is embedded in each
* vfs_t. There is also an array of vopstats_t structures allocated
* on a per-fstype basis.
*/
#define VOPSTATS_STR "vopstats_" /* Initial string for vopstat kstats */
typedef struct vopstats {
kstat_named_t nopen; /* VOP_OPEN */
kstat_named_t nclose; /* VOP_CLOSE */
kstat_named_t nread; /* VOP_READ */
kstat_named_t read_bytes;
kstat_named_t nwrite; /* VOP_WRITE */
kstat_named_t write_bytes;
kstat_named_t nioctl; /* VOP_IOCTL */
kstat_named_t nsetfl; /* VOP_SETFL */
kstat_named_t ngetattr; /* VOP_GETATTR */
kstat_named_t nsetattr; /* VOP_SETATTR */
kstat_named_t naccess; /* VOP_ACCESS */
kstat_named_t nlookup; /* VOP_LOOKUP */
kstat_named_t ncreate; /* VOP_CREATE */
kstat_named_t nremove; /* VOP_REMOVE */
kstat_named_t nlink; /* VOP_LINK */
kstat_named_t nrename; /* VOP_RENAME */
kstat_named_t nmkdir; /* VOP_MKDIR */
kstat_named_t nrmdir; /* VOP_RMDIR */
kstat_named_t nreaddir; /* VOP_READDIR */
kstat_named_t readdir_bytes;
kstat_named_t nsymlink; /* VOP_SYMLINK */
kstat_named_t nreadlink; /* VOP_READLINK */
kstat_named_t nfsync; /* VOP_FSYNC */
kstat_named_t ninactive; /* VOP_INACTIVE */
kstat_named_t nfid; /* VOP_FID */
kstat_named_t nrwlock; /* VOP_RWLOCK */
kstat_named_t nrwunlock; /* VOP_RWUNLOCK */
kstat_named_t nseek; /* VOP_SEEK */
kstat_named_t ncmp; /* VOP_CMP */
kstat_named_t nfrlock; /* VOP_FRLOCK */
kstat_named_t nspace; /* VOP_SPACE */
kstat_named_t nrealvp; /* VOP_REALVP */
kstat_named_t ngetpage; /* VOP_GETPAGE */
kstat_named_t nputpage; /* VOP_PUTPAGE */
kstat_named_t nmap; /* VOP_MAP */
kstat_named_t naddmap; /* VOP_ADDMAP */
kstat_named_t ndelmap; /* VOP_DELMAP */
kstat_named_t npoll; /* VOP_POLL */
kstat_named_t ndump; /* VOP_DUMP */
kstat_named_t npathconf; /* VOP_PATHCONF */
kstat_named_t npageio; /* VOP_PAGEIO */
kstat_named_t ndumpctl; /* VOP_DUMPCTL */
kstat_named_t ndispose; /* VOP_DISPOSE */
kstat_named_t nsetsecattr; /* VOP_SETSECATTR */
kstat_named_t ngetsecattr; /* VOP_GETSECATTR */
kstat_named_t nshrlock; /* VOP_SHRLOCK */
kstat_named_t nvnevent; /* VOP_VNEVENT */
kstat_named_t nreqzcbuf; /* VOP_REQZCBUF */
kstat_named_t nretzcbuf; /* VOP_RETZCBUF */
} vopstats_t;
/*
* The vnode is the focus of all file activity in UNIX.
* A vnode is allocated for each active file, each current
* directory, each mounted-on file, and the root.
*
* Each vnode is usually associated with a file-system-specific node (for
* UFS, this is the in-memory inode). Generally, a vnode and an fs-node
* should be created and destroyed together as a pair.
*
* If a vnode is reused for a new file, it should be reinitialized by calling
* either vn_reinit() or vn_recycle().
*
* vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
* The caller is responsible for setting up the entire vnode after calling
* vn_reinit(). This is important when using kmem caching where the vnode is
* allocated by a constructor, for instance.
*
* vn_recycle() is used when the file system keeps some state around in both
* the vnode and the associated FS-node. In UFS, for example, the inode of
* a deleted file can be reused immediately. The v_data, v_vfsp, v_op, etc.
* remains the same but certain fields related to the previous instance need
* to be reset. In particular:
* v_femhead
* v_path
* v_rdcnt, v_wrcnt
* v_mmap_read, v_mmap_write
*/
/*
* vnode types. VNON means no type. These values are unrelated to
* values in on-disk inodes.
*/
typedef enum vtype {
VNON = 0,
VREG = 1,
VDIR = 2,
VBLK = 3,
VCHR = 4,
VLNK = 5,
VFIFO = 6,
VDOOR = 7,
VPROC = 8,
VSOCK = 9,
VPORT = 10,
VBAD = 11
} vtype_t;
/*
* VSD - Vnode Specific Data
* Used to associate additional private data with a vnode.
*/
struct vsd_node {
list_node_t vs_nodes; /* list of all VSD nodes */
uint_t vs_nkeys; /* entries in value array */
void **vs_value; /* array of value/key */
};
/*
* Many of the fields in the vnode are read-only once they are initialized
* at vnode creation time. Other fields are protected by locks.
*
* IMPORTANT: vnodes should be created ONLY by calls to vn_alloc(). They
* may not be embedded into the file-system specific node (inode). The
* size of vnodes may change.
*
* The v_lock protects:
* v_flag
* v_stream
* v_count
* v_shrlocks
* v_path
* v_phantom_count
* v_vsd
* v_xattrdir
*
* A special lock (implemented by vn_vfswlock in vnode.c) protects:
* v_vfsmountedhere
*
* The global flock_lock mutex (in flock.c) protects:
* v_filocks
*
* IMPORTANT NOTE:
*
* The following vnode fields are considered public and may safely be
* accessed by file systems or other consumers:
*
* v_lock
* v_flag
* v_count
* v_phantom_count
* v_data
* v_vfsp
* v_stream
* v_type
* v_rdev
*
* ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
* In particular, file systems should not access other fields; they may
* change or even be removed. The functionality which was once provided
* by these fields is available through vn_* functions.
*
* VNODE PATH THEORY:
* In each vnode, the v_path field holds a cached version of the canonical
* filesystem path which that node represents. Because vnodes lack contextual
* information about their own name or position in the VFS hierarchy, this path
* must be calculated when the vnode is instantiated by operations such as
* fop_create, fop_lookup, or fop_mkdir. During said operations, both the
* parent vnode (and its cached v_path) and future name are known, so the
* v_path of the resulting object can easily be set.
*
* The caching nature of v_path is complicated in the face of directory
* renames. Filesystem drivers are responsible for calling vn_renamepath when
* a fop_rename operation succeeds. While the v_path on the renamed vnode will
* be updated, existing children of the directory (direct, or at deeper levels)
* will now possess v_path caches which are stale.
*
* It is expensive (and for non-directories, impossible) to recalculate stale
* v_path entries during operations such as vnodetopath. The best time during
* which to correct such wrongs is the same as when v_path is first
* initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
* context is available to generate the current path.
*
* In order to quickly detect stale v_path entries (without full lookup
* verification) to trigger a v_path update, the v_path_stamp field has been
* added to vnode_t. As part of successful fop_create/fop_lookup/fop_mkdir
* operations, where the name and parent vnode are available, the following
* rules are used to determine updates to the child:
*
* 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
* on the child. Until the parent v_path is refreshed to a valid state, the
* child v_path must be considered invalid too.
*
* 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
* v_path_stamp value from its parent and its v_path is updated.
*
* 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
* an indication that the child v_path is stale. The v_path is updated and
* v_path_stamp in the child is set to the current hrtime().
*
* It does _not_ inherit the parent v_path_stamp in order to propagate the
* the time of v_path invalidation through the directory structure. This
* prevents concurrent invalidations (operating with a now-incorrect v_path)
* at deeper levels in the tree from persisting.
*
* 4. If the child v_path_stamp is greater or equal to the parent, no action
* needs to be taken.
*
* Note that fop_rename operations do not follow this ruleset. They perform an
* explicit update of v_path and v_path_stamp (setting it to the current time)
*
* With these constraints in place, v_path invalidations and updates should
* proceed in a timely manner as vnodes are accessed. While there still are
* limited cases where vnodetopath operations will fail, the risk is minimized.
*/
struct fem_head; /* from fem.h */
typedef struct vnode {
kmutex_t v_lock; /* protects vnode fields */
uint_t v_flag; /* vnode flags (see below) */
uint_t v_count; /* reference count */
/* non vn_count() ref count (see below) */
uint_t v_phantom_count;
void *v_data; /* private data for fs */
struct vfs *v_vfsp; /* ptr to containing VFS */
struct stdata *v_stream; /* associated stream */
enum vtype v_type; /* vnode type */
dev_t v_rdev; /* device (VCHR, VBLK) */
/* PRIVATE FIELDS BELOW - DO NOT USE */
struct vfs *v_vfsmountedhere; /* ptr to vfs mounted here */
struct vnodeops *v_op; /* vnode operations */
struct page *v_pages; /* vnode pages list */
struct filock *v_filocks; /* ptr to filock list */
struct shrlocklist *v_shrlocks; /* ptr to shrlock list */
krwlock_t v_nbllock; /* sync for NBMAND locks */
kcondvar_t v_cv; /* synchronize locking */
void *v_locality; /* hook for locality info */
struct fem_head *v_femhead; /* fs monitoring */
char *v_path; /* cached path */
hrtime_t v_path_stamp; /* timestamp for cached path */
uint_t v_rdcnt; /* open for read count (VREG only) */
uint_t v_wrcnt; /* open for write count (VREG only) */
u_longlong_t v_mmap_read; /* mmap read count */
u_longlong_t v_mmap_write; /* mmap write count */
void *v_mpssdata; /* info for large page mappings */
void *v_fopdata; /* list of file ops event watches */
kmutex_t v_vsd_lock; /* protects v_vsd field */
struct vsd_node *v_vsd; /* vnode specific data */
struct vnode *v_xattrdir; /* unnamed extended attr dir (GFS) */
uint_t v_count_dnlc; /* dnlc reference count */
} vnode_t;
#define IS_DEVVP(vp) \
((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
#define VNODE_ALIGN 64
/* Count of low-order 0 bits in a vnode *, based on size and alignment. */
#if defined(_LP64)
#define VNODE_ALIGN_LOG2 8
#else
#define VNODE_ALIGN_LOG2 7
#endif
/*
* vnode flags.
*/
#define VROOT 0x01 /* root of its file system */
#define VNOCACHE 0x02 /* don't keep cache pages on vnode */
#define VNOMAP 0x04 /* file cannot be mapped/faulted */
#define VDUP 0x08 /* file should be dup'ed rather then opened */
#define VNOSWAP 0x10 /* file cannot be used as virtual swap device */
#define VNOMOUNT 0x20 /* file cannot be covered by mount */
#define VISSWAP 0x40 /* vnode is being used for swap */
#define VSWAPLIKE 0x80 /* vnode acts like swap (but may not be) */
#define IS_SWAPVP(vp) (((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
#ifdef _KERNEL
typedef struct vn_vfslocks_entry {
rwstlock_t ve_lock;
void *ve_vpvfs;
struct vn_vfslocks_entry *ve_next;
uint32_t ve_refcnt;
char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \
sizeof (uint32_t)];
} vn_vfslocks_entry_t;
#endif
/*
* The following two flags are used to lock the v_vfsmountedhere field
*/
#define VVFSLOCK 0x100
#define VVFSWAIT 0x200
/*
* Used to serialize VM operations on a vnode
*/
#define VVMLOCK 0x400
/*
* Tell vn_open() not to fail a directory open for writing but
* to go ahead and call VOP_OPEN() to let the filesystem check.
*/
#define VDIROPEN 0x800
/*
* Flag to let the VM system know that this file is most likely a binary
* or shared library since it has been mmap()ed EXEC at some time.
*/
#define VVMEXEC 0x1000
#define VPXFS 0x2000 /* clustering: global fs proxy vnode */
#define IS_PXFSVP(vp) ((vp)->v_flag & VPXFS)
#define V_XATTRDIR 0x4000 /* attribute unnamed directory */
#define IS_XATTRDIR(vp) ((vp)->v_flag & V_XATTRDIR)
#define V_LOCALITY 0x8000 /* whether locality aware */
/*
* Flag that indicates the VM should maintain the v_pages list with all modified
* pages on one end and unmodified pages at the other. This makes finding dirty
* pages to write back to disk much faster at the expense of taking a minor
* fault on the first store instruction which touches a writable page.
*/
#define VMODSORT (0x10000)
#define IS_VMODSORT(vp) \
(pvn_vmodsort_supported != 0 && ((vp)->v_flag & VMODSORT) != 0)
#define VISSWAPFS 0x20000 /* vnode is being used for swapfs */
/*
* The mdb memstat command assumes that IS_SWAPFSVP only uses the
* vnode's v_flag field. If this changes, cache the additional
* fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
*/
#define IS_SWAPFSVP(vp) (((vp)->v_flag & VISSWAPFS) != 0)
#define V_SYSATTR 0x40000 /* vnode is a GFS system attribute */
/*
* Indication that VOP_LOOKUP operations on this vnode may yield results from a
* different VFS instance. The main use of this is to suppress v_path
* calculation logic when filesystems such as procfs emit results which defy
* expectations about normal VFS behavior.
*/
#define VTRAVERSE 0x80000
/*
* Vnode attributes. A bit-mask is supplied as part of the
* structure to indicate the attributes the caller wants to
* set (setattr) or extract (getattr).
*/
/*
* Note that va_nodeid and va_nblocks are 64bit data type.
* We support large files over NFSV3. With Solaris client and
* Server that generates 64bit ino's and sizes these fields
* will overflow if they are 32 bit sizes.
*/
typedef struct vattr {
uint_t va_mask; /* bit-mask of attributes */
vtype_t va_type; /* vnode type (for create) */
mode_t va_mode; /* file access mode */
uid_t va_uid; /* owner user id */
gid_t va_gid; /* owner group id */
dev_t va_fsid; /* file system id (dev for now) */
u_longlong_t va_nodeid; /* node id */
nlink_t va_nlink; /* number of references to file */
u_offset_t va_size; /* file size in bytes */
timestruc_t va_atime; /* time of last access */
timestruc_t va_mtime; /* time of last modification */
timestruc_t va_ctime; /* time of last status change */
dev_t va_rdev; /* device the file represents */
uint_t va_blksize; /* fundamental block size */
u_longlong_t va_nblocks; /* # of blocks allocated */
uint_t va_seq; /* sequence number */
} vattr_t;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
/*
* Structure of all optional attributes.
*/
typedef struct xoptattr {
timestruc_t xoa_createtime; /* Create time of file */
uint8_t xoa_archive;
uint8_t xoa_system;
uint8_t xoa_readonly;
uint8_t xoa_hidden;
uint8_t xoa_nounlink;
uint8_t xoa_immutable;
uint8_t xoa_appendonly;
uint8_t xoa_nodump;
uint8_t xoa_opaque;
uint8_t xoa_av_quarantined;
uint8_t xoa_av_modified;
uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ];
uint8_t xoa_reparse;
uint64_t xoa_generation;
uint8_t xoa_offline;
uint8_t xoa_sparse;
uint8_t xoa_projinherit;
uint64_t xoa_projid;
} xoptattr_t;
/*
* The xvattr structure is really a variable length structure that
* is made up of:
* - The classic vattr_t (xva_vattr)
* - a 32 bit quantity (xva_mapsize) that specifies the size of the
* attribute bitmaps in 32 bit words.
* - A pointer to the returned attribute bitmap (needed because the
* previous element, the requested attribute bitmap) is variable lenth.
* - The requested attribute bitmap, which is an array of 32 bit words.
* Callers use the XVA_SET_REQ() macro to set the bits corresponding to
* the attributes that are being requested.
* - The returned attribute bitmap, which is an array of 32 bit words.
* File systems that support optional attributes use the XVA_SET_RTN()
* macro to set the bits corresponding to the attributes that are being
* returned.
* - The xoptattr_t structure which contains the attribute values
*
* xva_mapsize determines how many words in the attribute bitmaps.
* Immediately following the attribute bitmaps is the xoptattr_t.
* xva_getxoptattr() is used to get the pointer to the xoptattr_t
* section.
*/
#define XVA_MAPSIZE 3 /* Size of attr bitmaps */
#define XVA_MAGIC 0x78766174 /* Magic # for verification */
/*
* The xvattr structure is an extensible structure which permits optional
* attributes to be requested/returned. File systems may or may not support
* optional attributes. They do so at their own discretion but if they do
* support optional attributes, they must register the VFSFT_XVATTR feature
* so that the optional attributes can be set/retrived.
*
* The fields of the xvattr structure are:
*
* xva_vattr - The first element of an xvattr is a legacy vattr structure
* which includes the common attributes. If AT_XVATTR is set in the va_mask
* then the entire structure is treated as an xvattr. If AT_XVATTR is not
* set, then only the xva_vattr structure can be used.
*
* xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
*
* xva_mapsize - Size of requested and returned attribute bitmaps.
*
* xva_rtnattrmapp - Pointer to xva_rtnattrmap[]. We need this since the
* size of the array before it, xva_reqattrmap[], could change which means
* the location of xva_rtnattrmap[] could change. This will allow unbundled
* file systems to find the location of xva_rtnattrmap[] when the sizes change.
*
* xva_reqattrmap[] - Array of requested attributes. Attributes are
* represented by a specific bit in a specific element of the attribute
* map array. Callers set the bits corresponding to the attributes
* that the caller wants to get/set.
*
* xva_rtnattrmap[] - Array of attributes that the file system was able to
* process. Not all file systems support all optional attributes. This map
* informs the caller which attributes the underlying file system was able
* to set/get. (Same structure as the requested attributes array in terms
* of each attribute corresponding to specific bits and array elements.)
*
* xva_xoptattrs - Structure containing values of optional attributes.
* These values are only valid if the corresponding bits in xva_reqattrmap
* are set and the underlying file system supports those attributes.
*/
typedef struct xvattr {
vattr_t xva_vattr; /* Embedded vattr structure */
uint32_t xva_magic; /* Magic Number */
uint32_t xva_mapsize; /* Size of attr bitmap (32-bit words) */
uint32_t *xva_rtnattrmapp; /* Ptr to xva_rtnattrmap[] */
uint32_t xva_reqattrmap[XVA_MAPSIZE]; /* Requested attrs */
uint32_t xva_rtnattrmap[XVA_MAPSIZE]; /* Returned attrs */
xoptattr_t xva_xoptattrs; /* Optional attributes */
} xvattr_t;
#ifdef _SYSCALL32
/*
* For bigtypes time_t changed to 64 bit on the 64-bit kernel.
* Define an old version for user/kernel interface
*/
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack(4)
#endif
typedef struct vattr32 {
uint32_t va_mask; /* bit-mask of attributes */
vtype_t va_type; /* vnode type (for create) */
mode32_t va_mode; /* file access mode */
uid32_t va_uid; /* owner user id */
gid32_t va_gid; /* owner group id */
dev32_t va_fsid; /* file system id (dev for now) */
u_longlong_t va_nodeid; /* node id */
nlink_t va_nlink; /* number of references to file */
u_offset_t va_size; /* file size in bytes */
timestruc32_t va_atime; /* time of last access */
timestruc32_t va_mtime; /* time of last modification */
timestruc32_t va_ctime; /* time of last status change */
dev32_t va_rdev; /* device the file represents */
uint32_t va_blksize; /* fundamental block size */
u_longlong_t va_nblocks; /* # of blocks allocated */
uint32_t va_seq; /* sequence number */
} vattr32_t;
#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
#pragma pack()
#endif
#else /* not _SYSCALL32 */
#define vattr32 vattr
typedef vattr_t vattr32_t;
#endif /* _SYSCALL32 */
/*
* Attributes of interest to the caller of setattr or getattr.
*/
#define AT_TYPE 0x00001
#define AT_MODE 0x00002
#define AT_UID 0x00004
#define AT_GID 0x00008
#define AT_FSID 0x00010
#define AT_NODEID 0x00020
#define AT_NLINK 0x00040
#define AT_SIZE 0x00080
#define AT_ATIME 0x00100
#define AT_MTIME 0x00200
#define AT_CTIME 0x00400
#define AT_RDEV 0x00800
#define AT_BLKSIZE 0x01000
#define AT_NBLOCKS 0x02000
/* 0x04000 */ /* unused */
#define AT_SEQ 0x08000
/*
* If AT_XVATTR is set then there are additional bits to process in
* the xvattr_t's attribute bitmap. If this is not set then the bitmap
* MUST be ignored. Note that this bit must be set/cleared explicitly.
* That is, setting AT_ALL will NOT set AT_XVATTR.
*/
#define AT_XVATTR 0x10000
#define AT_ALL (AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
#define AT_STAT (AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE)
#define AT_TIMES (AT_ATIME|AT_MTIME|AT_CTIME)
#define AT_NOSET (AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
/*
* Attribute bits used in the extensible attribute's (xva's) attribute
* bitmaps. Note that the bitmaps are made up of a variable length number
* of 32-bit words. The convention is to use XAT{n}_{attrname} where "n"
* is the element in the bitmap (starting at 1). This convention is for
* the convenience of the maintainer to keep track of which element each
* attribute belongs to.
*
* NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY. CONSUMERS
* MUST USE THE XAT_* DEFINES.
*/
#define XAT0_INDEX 0LL /* Index into bitmap for XAT0 attrs */
#define XAT0_CREATETIME 0x00000001 /* Create time of file */
#define XAT0_ARCHIVE 0x00000002 /* Archive */
#define XAT0_SYSTEM 0x00000004 /* System */
#define XAT0_READONLY 0x00000008 /* Readonly */
#define XAT0_HIDDEN 0x00000010 /* Hidden */
#define XAT0_NOUNLINK 0x00000020 /* Nounlink */
#define XAT0_IMMUTABLE 0x00000040 /* immutable */
#define XAT0_APPENDONLY 0x00000080 /* appendonly */
#define XAT0_NODUMP 0x00000100 /* nodump */
#define XAT0_OPAQUE 0x00000200 /* opaque */
#define XAT0_AV_QUARANTINED 0x00000400 /* anti-virus quarantine */
#define XAT0_AV_MODIFIED 0x00000800 /* anti-virus modified */
#define XAT0_AV_SCANSTAMP 0x00001000 /* anti-virus scanstamp */
#define XAT0_REPARSE 0x00002000 /* FS reparse point */
#define XAT0_GEN 0x00004000 /* object generation number */
#define XAT0_OFFLINE 0x00008000 /* offline */
#define XAT0_SPARSE 0x00010000 /* sparse */
#define XAT0_PROJINHERIT 0x00020000 /* Create with parent projid */
#define XAT0_PROJID 0x00040000 /* Project ID */
#define XAT0_ALL_ATTRS (XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED| XAT0_AV_MODIFIED| \
XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE| \
XAT0_PROJINHERIT | XAT0_PROJID)
/* Support for XAT_* optional attributes */
#define XVA_MASK 0xffffffff /* Used to mask off 32 bits */
#define XVA_SHFT 32 /* Used to shift index */
/*
* Used to pry out the index and attribute bits from the XAT_* attributes
* defined below. Note that we're masking things down to 32 bits then
* casting to uint32_t.
*/
#define XVA_INDEX(attr) ((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
#define XVA_ATTRBIT(attr) ((uint32_t)((attr) & XVA_MASK))
/*
* The following defines present a "flat namespace" so that consumers don't
* need to keep track of which element belongs to which bitmap entry.
*
* NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
*/
#define XAT_CREATETIME ((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
#define XAT_ARCHIVE ((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
#define XAT_SYSTEM ((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
#define XAT_READONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
#define XAT_HIDDEN ((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
#define XAT_NOUNLINK ((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
#define XAT_IMMUTABLE ((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
#define XAT_APPENDONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
#define XAT_NODUMP ((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
#define XAT_OPAQUE ((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
#define XAT_AV_QUARANTINED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
#define XAT_AV_MODIFIED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
#define XAT_AV_SCANSTAMP ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
#define XAT_REPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
#define XAT_GEN ((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
#define XAT_OFFLINE ((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
#define XAT_SPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
#define XAT_PROJINHERIT ((XAT0_INDEX << XVA_SHFT) | XAT0_PROJINHERIT)
#define XAT_PROJID ((XAT0_INDEX << XVA_SHFT) | XAT0_PROJID)
/*
* The returned attribute map array (xva_rtnattrmap[]) is located past the
* requested attribute map array (xva_reqattrmap[]). Its location changes
* when the array sizes change. We use a separate pointer in a known location
* (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[]. This is
* set in xva_init()
*/
#define XVA_RTNATTRMAP(xvap) ((xvap)->xva_rtnattrmapp)
/*
* XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
* of requested attributes (xva_reqattrmap[]).
*/
#define XVA_SET_REQ(xvap, attr) \
ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
ASSERT((xvap)->xva_magic == XVA_MAGIC); \
(xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
/*
* XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
* of requested attributes (xva_reqattrmap[]).
*/
#define XVA_CLR_REQ(xvap, attr) \
ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
ASSERT((xvap)->xva_magic == XVA_MAGIC); \
(xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr)
/*
* XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
* of returned attributes (xva_rtnattrmap[]).
*/
#define XVA_SET_RTN(xvap, attr) \
ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
ASSERT((xvap)->xva_magic == XVA_MAGIC); \
(XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
/*
* XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
* to see of the corresponding attribute bit is set. If so, returns non-zero.
*/
#define XVA_ISSET_REQ(xvap, attr) \
((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \
((xvap)->xva_magic == XVA_MAGIC) && \
((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \
((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
/*
* XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
* to see of the corresponding attribute bit is set. If so, returns non-zero.
*/
#define XVA_ISSET_RTN(xvap, attr) \
((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \
((xvap)->xva_magic == XVA_MAGIC) && \
((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \
((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
/*
* Modes. Some values same as S_xxx entries from stat.h for convenience.
*/
#define VSUID 04000 /* set user id on execution */
#define VSGID 02000 /* set group id on execution */
#define VSVTX 01000 /* save swapped text even after use */
/*
* Permissions.
*/
#define VREAD 00400
#define VWRITE 00200
#define VEXEC 00100
#define MODEMASK 07777 /* mode bits plus permission bits */
#define PERMMASK 00777 /* permission bits */
/*
* VOP_ACCESS flags
*/
#define V_ACE_MASK 0x1 /* mask represents NFSv4 ACE permissions */
#define V_APPEND 0x2 /* want to do append only check */
/*
* Check whether mandatory file locking is enabled.
*/
#define MANDMODE(mode) (((mode) & (VSGID|(VEXEC>>3))) == VSGID)
#define MANDLOCK(vp, mode) ((vp)->v_type == VREG && MANDMODE(mode))
/*
* Flags for vnode operations.
*/
enum rm { RMFILE, RMDIRECTORY }; /* rm or rmdir (remove) */
enum symfollow { NO_FOLLOW, FOLLOW }; /* follow symlinks (or not) */
enum vcexcl { NONEXCL, EXCL }; /* (non)excl create */
enum create { CRCREAT, CRMKNOD, CRMKDIR }; /* reason for create */
typedef enum rm rm_t;
typedef enum symfollow symfollow_t;
typedef enum vcexcl vcexcl_t;
typedef enum create create_t;
/*
* Vnode Events - Used by VOP_VNEVENT
* The VE_PRE_RENAME_* events fire before the rename operation and are
* primarily used for specialized applications, such as NFSv4 delegation, which
* need to know about rename before it occurs.
*/
typedef enum vnevent {
VE_SUPPORT = 0, /* Query */
VE_RENAME_SRC = 1, /* Rename, with vnode as source */
VE_RENAME_DEST = 2, /* Rename, with vnode as target/destination */
VE_REMOVE = 3, /* Remove of vnode's name */
VE_RMDIR = 4, /* Remove of directory vnode's name */
VE_CREATE = 5, /* Create with vnode's name which exists */
VE_LINK = 6, /* Link with vnode's name as source */
VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
VE_TRUNCATE = 9, /* Truncate */
VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */
VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
VE_PRE_RENAME_DEST_DIR = 12, /* Pre-rename with vnode as target dir */
VE_RENAME_SRC_DIR = 13, /* Rename with vnode as source dir */
VE_RESIZE = 14 /* Resize/truncate to non-zero offset */
} vnevent_t;
/*
* Values for checking vnode open and map counts
*/
enum v_mode { V_READ, V_WRITE, V_RDORWR, V_RDANDWR };
typedef enum v_mode v_mode_t;
#define V_TRUE 1
#define V_FALSE 0
/*
* Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
*/
typedef struct vsecattr {
uint_t vsa_mask; /* See below */
int vsa_aclcnt; /* ACL entry count */
void *vsa_aclentp; /* pointer to ACL entries */
int vsa_dfaclcnt; /* default ACL entry count */
void *vsa_dfaclentp; /* pointer to default ACL entries */
size_t vsa_aclentsz; /* ACE size in bytes of vsa_aclentp */
uint_t vsa_aclflags; /* ACE ACL flags */
} vsecattr_t;
/* vsa_mask values */
#define VSA_ACL 0x0001
#define VSA_ACLCNT 0x0002
#define VSA_DFACL 0x0004
#define VSA_DFACLCNT 0x0008
#define VSA_ACE 0x0010
#define VSA_ACECNT 0x0020
#define VSA_ACE_ALLTYPES 0x0040
#define VSA_ACE_ACLFLAGS 0x0080 /* get/set ACE ACL flags */
/*
* Structure used by various vnode operations to determine
* the context (pid, host, identity) of a caller.
*
* The cc_caller_id is used to identify one or more callers who invoke
* operations, possibly on behalf of others. For example, the NFS
* server could have it's own cc_caller_id which can be detected by
* vnode/vfs operations or (FEM) monitors on those operations. New
* caller IDs are generated by fs_new_caller_id().
*/
typedef struct caller_context {
pid_t cc_pid; /* Process ID of the caller */
int cc_sysid; /* System ID, used for remote calls */
u_longlong_t cc_caller_id; /* Identifier for (set of) caller(s) */
ulong_t cc_flags;
} caller_context_t;
/*
* Flags for caller context. The caller sets CC_DONTBLOCK if it does not
* want to block inside of a FEM monitor. The monitor will set CC_WOULDBLOCK
* and return EAGAIN if the operation would have blocked.
*/
#define CC_WOULDBLOCK 0x01
#define CC_DONTBLOCK 0x02
/*
* Structure tags for function prototypes, defined elsewhere.
*/
struct pathname;
struct fid;
struct flock64;
struct flk_callback;
struct shrlock;
struct page;
struct seg;
struct as;
struct pollhead;
struct taskq;
#ifdef _KERNEL
/*
* VNODE_OPS defines all the vnode operations. It is used to define
* the vnodeops structure (below) and the fs_func_p union (vfs_opreg.h).
*/
#define VNODE_OPS \
int (*vop_open)(vnode_t **, int, cred_t *, \
caller_context_t *); \
int (*vop_close)(vnode_t *, int, int, offset_t, cred_t *, \
caller_context_t *); \
int (*vop_read)(vnode_t *, uio_t *, int, cred_t *, \
caller_context_t *); \
int (*vop_write)(vnode_t *, uio_t *, int, cred_t *, \
caller_context_t *); \
int (*vop_ioctl)(vnode_t *, int, intptr_t, int, cred_t *, \
int *, caller_context_t *); \
int (*vop_setfl)(vnode_t *, int, int, cred_t *, \
caller_context_t *); \
int (*vop_getattr)(vnode_t *, vattr_t *, int, cred_t *, \
caller_context_t *); \
int (*vop_setattr)(vnode_t *, vattr_t *, int, cred_t *, \
caller_context_t *); \
int (*vop_access)(vnode_t *, int, int, cred_t *, \
caller_context_t *); \
int (*vop_lookup)(vnode_t *, char *, vnode_t **, \
struct pathname *, \
int, vnode_t *, cred_t *, \
caller_context_t *, int *, \
struct pathname *); \
int (*vop_create)(vnode_t *, char *, vattr_t *, vcexcl_t, \
int, vnode_t **, cred_t *, int, \
caller_context_t *, vsecattr_t *); \
int (*vop_remove)(vnode_t *, char *, cred_t *, \
caller_context_t *, int); \
int (*vop_link)(vnode_t *, vnode_t *, char *, cred_t *, \
caller_context_t *, int); \
int (*vop_rename)(vnode_t *, char *, vnode_t *, char *, \
cred_t *, caller_context_t *, int); \
int (*vop_mkdir)(vnode_t *, char *, vattr_t *, vnode_t **, \
cred_t *, caller_context_t *, int, \
vsecattr_t *); \
int (*vop_rmdir)(vnode_t *, char *, vnode_t *, cred_t *, \
caller_context_t *, int); \
int (*vop_readdir)(vnode_t *, uio_t *, cred_t *, int *, \
caller_context_t *, int); \
int (*vop_symlink)(vnode_t *, char *, vattr_t *, char *, \
cred_t *, caller_context_t *, int); \
int (*vop_readlink)(vnode_t *, uio_t *, cred_t *, \
caller_context_t *); \
int (*vop_fsync)(vnode_t *, int, cred_t *, \
caller_context_t *); \
void (*vop_inactive)(vnode_t *, cred_t *, \
caller_context_t *); \
int (*vop_fid)(vnode_t *, struct fid *, \
caller_context_t *); \
int (*vop_rwlock)(vnode_t *, int, caller_context_t *); \
void (*vop_rwunlock)(vnode_t *, int, caller_context_t *); \
int (*vop_seek)(vnode_t *, offset_t, offset_t *, \
caller_context_t *); \
int (*vop_cmp)(vnode_t *, vnode_t *, caller_context_t *); \
int (*vop_frlock)(vnode_t *, int, struct flock64 *, \
int, offset_t, \
struct flk_callback *, cred_t *, \
caller_context_t *); \
int (*vop_space)(vnode_t *, int, struct flock64 *, \
int, offset_t, \
cred_t *, caller_context_t *); \
int (*vop_realvp)(vnode_t *, vnode_t **, \
caller_context_t *); \
int (*vop_getpage)(vnode_t *, offset_t, size_t, uint_t *, \
struct page **, size_t, struct seg *, \
caddr_t, enum seg_rw, cred_t *, \
caller_context_t *); \
int (*vop_putpage)(vnode_t *, offset_t, size_t, \
int, cred_t *, caller_context_t *); \
int (*vop_map)(vnode_t *, offset_t, struct as *, \
caddr_t *, size_t, \
uchar_t, uchar_t, uint_t, cred_t *, \
caller_context_t *); \
int (*vop_addmap)(vnode_t *, offset_t, struct as *, \
caddr_t, size_t, \
uchar_t, uchar_t, uint_t, cred_t *, \
caller_context_t *); \
int (*vop_delmap)(vnode_t *, offset_t, struct as *, \
caddr_t, size_t, \
uint_t, uint_t, uint_t, cred_t *, \
caller_context_t *); \
int (*vop_poll)(vnode_t *, short, int, short *, \
struct pollhead **, \
caller_context_t *); \
int (*vop_dump)(vnode_t *, caddr_t, offset_t, offset_t, \
caller_context_t *); \
int (*vop_pathconf)(vnode_t *, int, ulong_t *, cred_t *, \
caller_context_t *); \
int (*vop_pageio)(vnode_t *, struct page *, \
u_offset_t, size_t, int, cred_t *, \
caller_context_t *); \
int (*vop_dumpctl)(vnode_t *, int, offset_t *, \