-
Notifications
You must be signed in to change notification settings - Fork 3
/
mvfs_base.h
4041 lines (3539 loc) · 113 KB
/
mvfs_base.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* * (C) Copyright IBM Corporation 2006, 2013. */
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
Author: IBM Corporation
This module is part of the IBM (R) Rational (R) ClearCase (R)
Multi-version file system (MVFS).
For support, please visit http://www.ibm.com/software/support
*/
#ifndef MVFS_BASE_H_
#define MVFS_BASE_H_
#include "view_rpc_kernel.h"
#include "mfs_stats.h"
#include "mfs_ioctl.h"
#include "mfs_mount.h"
#include "mfs_audit.h"
#define MFS_MAXRPCDATA 8192 /* Max data in clnt calls */
#define MFS_BLOCKSIZE 8192 /* FS block size */
typedef char mfs_pn_char_t; /* Pathname character type */
typedef char mfs_hn_char_t; /* Hostname character type */
#define PN_STRDUP(s) STRDUP(s)
#define PN_STRFREE(s) STRFREE(s)
/*
* Case-insensitive string comparison.
*/
#define STRCASECMP mvfs_ansi_strcasecmp
/*
* First arg is boolean whether case-insensitive pathname compare
* or not
*/
#define PN_STRCMP(ci,s1,s2) ((ci) ? STRCASECMP(s1,s2) : STRCMP(s1,s2))
/*
* The following macro is tricky, because the PN_STRFREE
* will NULL out the pointers we are trying to compare.
* Therefore, all comparisons must be done BEFORE any calls
* to PN_STRFREE
*/
#define PNPAIR_STRFREE(p) { \
if ((p)->kpn.s != (p)->upn.s) { /* Free both */ \
if ((p)->kpn.s != NULL) PN_STRFREE((p)->kpn.s); \
if ((p)->upn.s != NULL) PN_STRFREE((p)->upn.s); \
} else { \
if ((p)->kpn.s != NULL) PN_STRFREE((p)->kpn.s); \
(p)->upn.s = NULL; \
} \
}
#define PN_SET_CASE_CORRECT_COMP(pnp, nm) (0)
#define PN_GET_CASE_CORRECT_COMP(pnp, nm, bufp) (nm)
#define HN_STRDUP(s) STRDUP(s)
#define HN_STRFREE(s) STRFREE(s)
#define HN_STRCMP(s1,s2) STRCMP(s1,s2)
#define MVFS_INIT_TIMEVAL(tv) { \
tv.tv_sec = 0; \
tv.tv_usec = 0; \
}
/*
* PROTOTYPES and subroutine definitions
*/
#include <stdarg.h>
#include "mvfs_debug.h"
#include "mvfs_param.h"
/*
* Common list macros for circularly linked object lists.
* LISTHDR defines the list head. Initialize the lock, and
* next=prev=&listhdr.
* ADD_TO_EOL - add an object to the end of the list
* ADD_TO_BOL - add an object to the beginning of the list
* RM_LIST - remove an object from its list.
*
* Second version is for a "freelist" for objects that have
* one (mnodes and name cache entries)
*/
#define LISTHDR(name, t) \
struct { \
t *next; \
t *prev; \
} name
#define INIT_LIST(name, t) \
(name).next = (name).prev = (t *) &(name)
#define ADD_TO_EOL(lp, item_p) { \
(item_p)->prev = (lp)->prev; \
(item_p)->next = (lp)->prev->next; \
(lp)->prev->next = item_p; \
(lp)->prev = item_p; \
}
#define ADD_TO_BOL(lp, item_p) { \
(item_p)->next = (lp)->next; \
(item_p)->prev = (lp)->next->prev; \
(lp)->next->prev = item_p; \
(lp)->next = item_p; \
}
#define RM_LIST(item_p) if ((item_p)->next != NULL) { \
(item_p)->next->prev = (item_p)->prev; \
(item_p)->prev->next = (item_p)->next; \
(item_p)->next = NULL; \
(item_p)->prev = NULL; \
}
/*
* Flag manipulation macros
*/
#define MVFS_FLAGON(flags, flagvalue) \
(((flags) & (flagvalue)) != 0)
#define MVFS_FLAGOFF(flags, flagvalue) \
(((flags) & (flagvalue)) == 0)
#define MVFS_SETFLAG(flags, flagvalue) { \
(flags) |= (flagvalue); \
}
#define MVFS_CLEARFLAG(flags, flagvalue) { \
(flags) &= ~(flagvalue); \
}
/*
* MFS structures for mfscall().
* mfs_callinfo: from procedure code. Indicates the 'RPC trait' which
* is being called for mfscall() error messages.
* Currently one of mfs_viewcall, mfs_vobcall.
* mfs_svr: from an object (like view obj or vob mount point).
* Identifies whom to make the call to.
* mfs_retryinfo: from the mount point. Indicates retry handling
* parameters for the call.
*/
struct mfs_callinfo {
u_long proto; /* Server protocol */
u_long version; /* Server protocol version */
char *svrname; /* Trait name */
char **opnames; /* Trait operation names */
int *optimeoshft; /* Trait timeout shift per-op */
int (*get_status)(P1(void *resp)); /* Get status from response */
XID_T (*get_xid)(P1(void *resp)); /* Get XID from response */
void (*set_xid)(P1(void *req)
PN(time_t bt)
PN(XID_T xid)); /* Set XID in rqst */
};
/* Following are used in mnt args from mount command too */
struct mfs_svr {
u_int down : 1; /* Server down */
u_int dprinted : 1; /* Server down msg printed */
u_int uprinted : 1; /* Server down msg printed to user */
u_int svrbound : 1; /* Server addr valid (else find) */
u_int mbz : 28;
ks_sockaddr_storage_t addr; /* Server address */
mfs_strbufpn_pair_t lpn; /* Local pathname (to server dir) */
mfs_hn_char_t *host; /* Server host name */
mfs_pn_char_t *rpn; /* Remote pathname (for svr to use) */
mfs_pn_char_t *net_pn; /* Remote pathname (to server dir) */
tbs_uuid_t uuid; /* UID for albd (location daemon) */
};
struct mfs_retryinfo {
u_int soft : 1; /* Server soft mount */
u_int nointr : 1; /* Don't allow intr on RPC calls */
u_int rebind : 1; /* Return EAGAIN and timeo for rebind */
u_int mbz : 29;
u_long timeo; /* Server timeout base in .1 secs */
u_long retries; /* Server retry count */
};
/* RPC traits used from MFS code */
extern struct mfs_callinfo *mfs_viewcall;
extern struct mfs_callinfo *mfs_albdcall;
/* A cache of RPC handles, number of handles to keep is tunable */
typedef struct client_cache {
int proto;
int version;
int inuse;
int used;
/*
* XXX worry about 64-bit alignment/padding? usually the cache
* isn't huge, probably not a lot of wasted space.
*/
ks_uint32_t boottime;
CLIENT *client;
} client_cache_t;
#define CLIENT_CACHE_SIZE_SMALL 5
#define CLIENT_CACHE_SIZE_LARGE 10
#define CLIENT_CACHE_SIZE_AUTOMAX 240
typedef struct mvfs_rpc_data {
LOCK_T mfs_client_lock;
sa_family_t mvfs_client_cache_family;
client_cache_t *mvfs_client_cache;
} mvfs_rpc_data_t;
/*
* Define the structure of an MFS FID & Export FID
* For non-Atria (XFID) support, the fid is limited to 12 bytes
* (including the len field) so that NFS can construct a file handle from it.
*/
#define MFS_UNK_DBID 0 /* Unkown value for partial vfh */
#define MFS_UNK_GEN 0 /* Unkown value for partial vfh */
#define MFS_NULL_DBID 0 /* Value for "null" fid */
#define MFS_NULL_GEN 0 /* Value for "null" gen */
struct mfs_fid {
union {
u_long dbid; /* Database ID (VOB) */
CLR_VNODE_T *realvp; /* Cleartext ptr (LOOP) */
u_long mnum; /* Mnode number (others) */
} fid_un;
union {
ks_uint32_t gen; /* Generation number (others) */
VFS_T *realvfsp; /* realvp VFS pointer (LOOP) */
} fid_un2;
};
#define mf_dbid fid_un.dbid
#define mf_realvp fid_un.realvp
#define mf_mnum fid_un.mnum
#define mf_gen fid_un2.gen
#define mf_realvfsp fid_un2.realvfsp
typedef struct mfs_fid mfs_fid_t;
#define MFS_FIDEQ(fid1, fid2) \
((fid1).mf_dbid == (fid2).mf_dbid && (fid1).mf_gen == (fid2).mf_gen)
#define MFS_FIDNULL(fid) \
((fid).mf_dbid == MFS_NULL_DBID && (fid).mf_gen == MFS_NULL_GEN)
#define MFS_FID_TO_PARTIAL_VFH(fid, vfsp, vw, vfh) { \
(vfh).vob_uuid = VFS_TO_MMI(vfsp)->mmi_svr.uuid; \
(vfh).ver_dbid = (fid).mf_dbid; \
(vfh).elem_dbid = MFS_UNK_DBID; \
(vfh).gen = (fid).mf_gen; \
(vfh).flags=VTOM(vw)->mn_view.hm ? VIEW_FHANDLE_FLAGS_HISTORY_MODE:0; \
(vfh).pad0 = 0; \
}
#define MFS_UPDATE_PARTIAL_VFH(pvfh, fvfh) { \
if ((pvfh).elem_dbid == MFS_UNK_DBID) { \
(pvfh).elem_dbid = (fvfh).elem_dbid; \
if ((fvfh).elem_dbid == MFS_UNK_DBID) \
mvfs_log(MFS_LOG_INFO, \
"update vfh: no elem dbid for dbid 0x%x\n", \
(pvfh).ver_dbid); \
} \
if ((pvfh).gen == MFS_UNK_GEN) { \
(pvfh).gen = (fvfh).gen; \
} \
if (((pvfh).flags & VIEW_FHANDLE_FLAGS_HISTORY_MODE) == \
((fvfh).flags & VIEW_FHANDLE_FLAGS_HISTORY_MODE)) \
(pvfh).flags = (fvfh).flags; \
}
/*
* NFS (non-atria) fid format. Only VOB objects are "exportable".
*
*/
struct mfs_xfid { /* FID for NFS support (vfs_vget/vop_fid ops) */
u_short mfx_len; /* Must match struct fid in vfs.h */
u_short mfx_vid; /* View id of view object is in */
ks_uint32_t mfx_dbid; /* Database ID (VOB objs only) */
ks_uint32_t mfx_gen; /* Generation number (for stale detection) */
};
typedef struct mfs_xfid mfs_xfid_t;
/* mfs_xfid_t data size (size of fields after mfx_len) */
#define MFS_XFIDDATASZ (sizeof(mfs_xfid_t) - sizeof(u_short))
#define MFS_VIDINRANGE(vid) \
((vid) >= 0 && (vid) <= 0xfffe)
#define MFS_RESERVED_DBIDS 0xfffffc00 /* Reserved for MFS above here */
#define MVFS_ROOTDBID 0xffffffff /* unbound Vob Root dbid */
#define MVFS_ROOTGEN 0 /* unbound Vob Root generation */
#define MFS_NULLVID 0 /* Null view id */
typedef fsid_t mfs_fsid_t; /* FSID from mount table */
typedef int mfs_class_t;
#define MVFS_LH(x) /**/
/*
* MFS vnode info structure. All the info structs must
* have the same "hdr" in them so they can be manipulated
* in basic ways without testing the type of MVFS object.
*
* This is also used to determine (with other optional structures)
* the size of FS-dependent data to allocate.
*
* Locking:
* Different fields of the mnode are protected by different locks.
* The "hdr" fields (with exceptions listed below) are protected
* the mnode header lock found in each header. These are not marked.
*
* The next and prev pointers are protected by the hash chain lock
* for the hash chain on which they reside. Marked with "HS".
*
* The free_next and free_prev link the mnode to either the free list
* lock or the destroy list lock. Think of both these lists as lists
* where free mnodes are linked; the vobfreelist is a special freelist
* used only by vob mnodes that still have a view attached.
* Marked with "FD".
*
* The mcount and mnum are protected by the header lock (not marked).
*
* The mfree and trans_destroy flags are protected by the mfs_vobfreelock.
* mfree marks the mnode as being on the freelist without searching
* down the free chain. Similarly, trans_destroy indicates the mnode
* is (almost) on the destroy list and should be ignored in searches.
* Marked with "F".
*
* The on_destroy flag is protected by the mvfs_mndestroylock. Once set,
* it is never cleared. Marked with a "D".
*
* The stale flag is protected by the mnode lock as is the rest of the flag
* word. Marked with "M".
*
* Several offsets: mclass, msize, dncgen are set in mvfs_mnnew when
* the mnode is allocated. Since no one can have the mnode yet, no lock
* is used. If future enhancements required a lock for these offsets, it
* should be the header lock. Not marked.
*
* The lock offset (mnode lock) needs to be taken *without* the
* mnode header lock for lock ordering reasons. Marked with "*".
*
* The vfsp is set in mvfs_mninit_new_mnode under no lock when the mnode
* is allocated. It is not cleared until the mnode is destroyed, so it
* never changes for the life of the mnode. Not marked.
*
* The fid is set in mvfs_mninit_new_mnode under no lock when the mnode
* is allocated. And largely it is read under the mnode lock.
* However, it is set to mnp->mn_hdr.mnum in the case that a fid wasn't
* sent in (in mvfs_mninit_new_mnode) prior to the mnum being set.
* XXX This seems like a bug, but has always been this way.
* The mf_gen portion of the fid is updated in mfs_getmnode (mvfs_vfsops.c)
* under the mnode lock. It is returned by mfs_vp2dbid in mvfs_utils.c
* look at this further. XXX Marked with an "M".
*
* The vp is protected by the mnode lock. (It is set to null in
* mvfs_mnnew under no lock.) Marked with an "M".
*
* The realvp continues to be protected by the mnode lock. Marked "M".
*
* The viewvp is set in mvfs_mninit_new_mnode and shouldn't change for
* the life of the mnode. (There is a bit of code in mfs_makevobnode
* that thinks it is updating it, but I don't think we ever hit this.)
* It is cleared in mvfs_mnclean. Not marked.
*
* The m_bhv is protected by the mnode lock. Marked with an "M".
*
* freelist_time is set under the header lock. Not marked.
*
* Beware that some systems cannot lock bitfields
* on less than quadword (8 byte) boundaries due to vagaries of the
* compiler load/modify/store code sequences. All processors
* without direct memory operations cannot lock anything less than
* a byte (or more likely, a 4-byte word).
*
*/
struct mfs_mnhdr { /* Basic hdr info for all mfs objects */
/*HS*/ struct mfs_mnode *next; /* Next ptr */
/*HS*/ struct mfs_mnode *prev; /* Prev ptr */
/*FD*/ struct mfs_mnode *free_next; /* Free list next */
/*FD*/ struct mfs_mnode *free_prev; /* Free list previous */
LOCK_T hdr_lock; /* Mnode header lock (see above) */
u_int mcount; /* Reference count */
mfs_class_t mclass; /* Mnode Class of MFS object */
int mnum; /* Mnode table number */
/*F*/ u_int mfree; /* Mnode is on vobfree list flag */
/*F*/ u_int trans_destroy;/* Mnode moving to destroy list flag */
/*D*/ u_int on_destroy; /* Mnode is on destroy list flag */
/*M*/ u_int stale; /* Flag to mark stale mnodes in hash */
/*M*/ u_int cached_pages : 1; /* vnode has cached pages */
/*M*/ u_int clear_dirty : 1; /* cleartext pages are dirty */
/*M*/ u_int clear_mmap : 1; /* mmap switched to cleartext */
/*M*/ u_int pad : 29; /* unused flag bits */
size_t msize; /* Size (in bytes) of this mnode */
/***/ LOCK_T lock; /* Lock on structure */
MVFS_LH(LOCK_T lock_high;) /* High Level lock on structure (for I/O) */
VFS_T *vfsp; /* VFSP of object */
/*M*/ mfs_fid_t fid; /* File ID */
u_long dncgen; /* Mnode generation # for name cache */
/*M*/ VNODE_T *vp; /* Back ptr to mfs vnode */
/*M*/ CLR_VNODE_T *realvp; /* Real object or cleartext vnode ptr */
#ifdef NFSV4_SHADOW_VNODE
/*M*/ CLR_VNODE_T *realvp_master; /* Master "shadow" cleartext vnode ptr */
#endif
VNODE_T *viewvp; /* View for object (may be NULL) */
time_t freelist_time; /* Time added to freelist */
};
/* Define the classes of MFS objects & macros to test for them.
Use "class" to avoid confusion with object "type" */
#define MFS_SDEVCLAS 1 /* Special device vnode for ioctl's */
#define MFS_VIEWCLAS 2 /* Inode which represents a "view" itself */
#define MFS_LOOPCLAS 3 /* Cover vnode for auditing non-mfs files */
#define MFS_VIEWDIRCLAS 4 /* Dir containing "views" (view root for now) */
#define MFS_VOBRTCLAS 5 /* VOB root placehold inode */
#define MFS_VOBCLAS 6 /* Object in vob/view */
#define MFS_NTVWCLAS 7 /* New non-loopback NT-style view-tags */
#define MFS_MAXCLAS 7 /* Max class number */
#define MFS_ISSDEV(mnp) ((mnp)->mn_hdr.mclass == MFS_SDEVCLAS)
#define MFS_ISVOBRT(mnp) ((mnp)->mn_hdr.mclass == MFS_VOBRTCLAS)
#define MFS_ISLOOP(mnp) ((mnp)->mn_hdr.mclass == MFS_LOOPCLAS)
#define MFS_ISLOOPVIEW(mnp) ((mnp)->mn_hdr.mclass == MFS_VIEWCLAS)
#define MFS_ISVIEWDIR(mnp) ((mnp)->mn_hdr.mclass == MFS_VIEWDIRCLAS)
#define MFS_ISVOB(mnp) ((mnp)->mn_hdr.mclass == MFS_VOBCLAS)
/* This macro checks for NT-style view for VOB tag mounts */
#define MFS_ISNTVIEW(mnp) ((mnp)->mn_hdr.mclass == MFS_NTVWCLAS)
/* Generic for either kind of view-class */
#define MFS_ISVIEW(mnp) (MFS_ISLOOPVIEW(mnp) || MFS_ISNTVIEW(mnp))
extern V_OP_T *mfs_vopp; /* vnode ops ptr */
extern VFSOPS_T *mfs_vfsopp; /* vfs ops ptr */
/* see mdep or mvfs_systm.h for MFS_VPISMFS() */
/*
* Define macro to check for "root" synonyms. A root synonym is
* found as follows:
* Not history mode: Any version of the "mount point" element
* found by comparing element dbid's
* History mode: Only the version with both the version and
* element dbid's matching the mount point.
* (The versions of the directory 2 levels down
* e.g. <root>/main/2 have a version dbid
* different from the element dbid, but have
* and element dbid matching the root!)
*/
#define MFS_ISROOTSYNONYM(vp, rootdbid) \
((!VTOM(MFS_VIEW(vp))->mn_view.hm && \
VTOM(vp)->mn_vob.vfh.elem_dbid == (rootdbid)) || \
(VTOM(MFS_VIEW(vp))->mn_view.hm && \
VTOM(vp)->mn_vob.vfh.elem_dbid == (rootdbid) && \
VTOM(vp)->mn_vob.vfh.ver_dbid == (rootdbid)))
struct mfs_rebindent { /* Dir version cache entry */
u_int valid : 1; /* Entry valid */
u_int self : 1; /* Entry rebinds to self */
u_int mbz : 31; /* unused */
view_bhandle_t bh; /* BuildHandle for entry */
mfs_fid_t fid; /* Rebind vnode fid */
struct timeval evtime; /* Event time of dir rebound to */
};
typedef struct mvfs_clr_creds {
struct mvfs_clr_creds *next;
CRED_T *cred;
} mvfs_clr_creds_t;
struct mfs_clearinfo { /* Cleartext information */
mfs_pn_char_t *nm; /* Cleartext pname */
LOCK_T cl_info_lock; /* Lock for cred cache */
mvfs_clr_creds_t *ok_creds; /* creds that have looked up the name */
u_long revalidate_time; /* Time (secs) to do revalidate */
u_int isvob : 1; /* Cleartext in vob */
u_int rwerr : 1; /* Cleartext RW error */
u_int purge_nm : 1; /* Cleartext pathname wrong */
u_int purge_cvp : 1; /* Cleartext vp wrong */
u_int hadonce : 1; /* had cltxt in past (for stats) */
u_int used : 1; /* used cltxt since last reclaim */
u_int delete_on_close : 1; /* cltxt is marked for deletion */
u_int ostale_logged : 1; /* have logged open stale warning */
u_int pad : 24; /* Pad space */
VATTR_T va; /* Stat of cleartext */
time_t atime_pushed; /* vob container setattr time, for scrubber */
};
#define MVFS_CTXT_ATIME_REFRESH_DEF 3600 /* default interval 1 hr between forced setattr */
#define MFS_REBINDINVAL(mnp) { \
(mnp)->mn_vob.rebind.valid = 0; \
(mnp)->mn_vob.rebind.self = 0; \
}
/*
* On unix, we need to check access to the cleartext path as well as to the
* MVFS path. This gets tedious, but we try to keep track of creds we've already
* looked at to save some redundant work. See the comments near mfs_getcleartext()
* for a full explanation.
*/
#define MVFS_CRED_HASHSZ 61 /* FIXME: nice prime number? */
typedef struct mvfs_credlist_data {
/* System-wide credlist chains and spinlock.
* Mnode cred lists are protected by the mnode locks.
*/
mvfs_clr_creds_t *mvfs_sys_credlist[MVFS_CRED_HASHSZ];
mvfs_clr_creds_t *mvfs_free_creds;
SPLOCK_T mvfs_sys_credlist_lock;
#ifdef MVFS_DEBUG
/* buckets for chain length accounting. Useful for determining if the hash
* algorithm is distributing well.
*/
ks_uint32_t mvfs_sys_crlen[MVFS_CRED_HASHSZ];
#endif
} mvfs_credlist_data_t;
#define MFS_INDEX_CACHE_LOOKUP_BEST(vp,o,bip,bop) { *(bip) = *(bop) = o; }
#define mfs_index_cache_add(vp,i,o) /* do nothing */
#define mfs_index_cache_flush(vp) /* do nothing */
#define mfs_index_cache_destroy(mnp) /* do nothing */
struct mvfs_rce { /* readdir cache entry */
tbs_boolean_t valid; /* entry valid? */
tbs_boolean_t eof; /* is this the last block in dir? */
MOFFSET_T offset; /* uio_offset for this block */
MOFFSET_T endoffset; /* uio_offset after reading this block */
size_t size; /* size of entries to copy */
size_t bsize; /* size of allocated block */
void *block; /* block of entries */
};
struct mvfs_rddir_cache {
int nentries;
struct mvfs_rce entries[1]; /* really nentries long */
};
#define RDDIR_CACHE_SIZE(mrc) \
(sizeof(*(mrc)) + ((mrc)->nentries - 1) * sizeof((mrc)->entries[0]))
#define RDDIR_CACHE_SIZE_N(N) \
(sizeof(struct mvfs_rddir_cache) + ((N) - 1) * sizeof(((struct mvfs_rddir_cache*)0)->entries[0]))
EXTERN void
mvfs_rddir_cache_destroy(struct mfs_mnode *mnp);
/* call with mnode locked */
EXTERN void
mvfs_rddir_cache_flush(struct mfs_mnode *mnp);
EXTERN tbs_boolean_t
mvfs_rddir_cache_get(
struct mfs_mnode *mnp,
struct uio *uiop,
CRED_T *cred,
int *eofp,
int *errorp
);
EXTERN void
mvfs_rddir_cache_enter(
struct mfs_mnode *mnp,
struct mvfs_rce *entryp
);
/* Expects, and asserts, that the given mnode is locked.
*/
EXTERN void
mvfs_rddir_cache_enter_mnlocked(
struct mfs_mnode *mnp,
struct mvfs_rce *entryp
);
EXTERN int
mvfs_rddir_cache_setcaches(mvfs_cache_sizes_t *szp);
EXTERN int
mvfs_rddir_cache_getcaches(mvfs_cache_sizes_t *szp);
EXTERN int
mvfs_rddir_compute_caches(
ks_int32_t scale,
mvfs_cache_sizes_t *szp
);
EXTERN void
mvfs_rddir_cache_init(mvfs_cache_sizes_t *szp);
EXTERN void
mvfs_rddir_cache_unload(void);
#define MVFS_CL(x) /**/
struct mfs_vobnode {
u_int rcred : 1; /* cred has read credentials */
u_int wcred : 1; /* cred has write credentials */
u_int dir_eof : 1; /* rddir_off (eof cookie) valid */
u_int choid_audited: 1; /* choid'd under audit */
u_int sync_mtime : 1; /* Mtime needs update */
u_int sync_ctime : 1; /* Ctime needs update (indirectly) */
u_int pad : 26;
view_fhandle_t vfh; /* View object file handle */
struct mfs_clearinfo cleartext; /* Cleartext info */
int open_count; /* Count of open's done */
int open_wcount; /* Count of open's done with FWRITE */
view_bhandle_t choid_bh; /* BH choid'd under */
u_long choid_bh_seq; /* Audit sequence # for choid logic */
MVFS_CL(LOCK_T cred_lock;) /* Lock while changing cred */
CRED_T *cred; /* Cred for delayed IO/setattr */
mfs_pn_char_t *rmv_name; /* Name to remove on mfs_inactive */
VNODE_T *rmv_dvp; /* Dir vnode for remove */
CRED_T *rmv_cred; /* Credentials for remove */
struct mfs_rebindent rebind; /* Rebind info */
u_long rddir_off; /* rddir EOF offset */
struct mvfs_rddir_cache *rddir_cache; /* readdir results, if any */
mfs_pn_char_t *slinktext; /* Symlink text */
int slinklen; /* Symlink text length */
u_long attrgen; /* Attribute generation number */
timestruc_t attrsettime; /* Last time attrs set */
timestruc_t attrtime; /* Time attributes valid until */
struct timeval lvut; /* last VOB update time from getattr */
view_vstat_t attr; /* Cached attributes */
/*
* The user and group identities in the view_vstat_t struct are not
* in native form. Therefore the following two fields were added
* to store the converted native ids. They are to be kept in sync
* with view_vstat_t.
*/
MVFS_USER_ID user_id; /* uid or union of uid/user sid on NT */
MVFS_GROUP_ID group_id; /* gid or union of gid/group sid on NT */
int pages_mapped; /* number of pages mapped */
};
/*
* Name cache locking macros (may be overridden on a per-platform basis).
*/
#define DNCLOCK_T SPLOCK_T
#define INITDNCLOCK(lnm, lstr)
#define DNCLOCK(lnm,s)
#define DNCUNLOCK(lnm,s)
#define FREEDNCLOCK(lnm)
/* Spinlock pools
* To reduce contention for spinlocks on MP systems, in some places
* we use a pool of spinlocks in place of one single lock.
* Then, given some value indicating one instance of an object, a mapping
* function (or macro) is used to select one of the spinlocks to protect
* that instance.
* For example, rather than using a single spinlock for an entire hash
* table, we can use a pool of locks such that every n chains (n may be 1)
* has its own spinlock. This is designed to allow greater parallelism on
* MP machines.
*/
typedef struct splock_pool {
SPLOCK_T *(*spl_func)(struct splock_pool *, unsigned int); /* ptr to selector function */
int spl_count; /* num spinlocks in pool */
SPLOCK_T *spl_table; /* dynamic splock table */
} splock_pool_t;
/* Select a spinlock from a spinlock pool
* IN: pool - ptr to the struct anchoring the pool
* IN: val - value to be used in the selection function
* IN: func - macro or function to choose lock, given value
* IN/OUT: lockpp - ptr to lock ptr (SPLOCK_T**), return lock addr to be
* used subsequently to unlock the spinlock
*
* Use SPLOCK, SPUNLOCK to obtain/release the returned lock
*/
#define SPLOCK_SELECT(pool, val, func, lockpp) \
*(lockpp) = func(pool, val)
/* Templates for a mapping macro for a spinlock pool for hash tables,
* to be used as input to SPLOCK_SELECT().
* The default is to have 2 SPLOCK_T per table; platforms
* wishing to override this need to define:
* HASH_SPLOCK_MAP to be HASH_SPLOCK_PER_CHAIN or HASH_SPLOCK_PER_GROUP
* HASH_SPLOCK_RATIO - the desired ratio of hash chains per SPLOCK_T
*/
#define HASH_SPLOCK_PER_CHAIN(pool, hash_val) (&(pool)->spl_table[hash_val])
#define HASH_SPLOCK_PER_GROUP(pool, hash_val) (&(pool)->spl_table[(hash_val) % (pool)->spl_count])
#define HASH_SPLOCK_SET_POOLSIZE(sp_poolsize, hash_size) {sp_poolsize = hash_size;}
/* mvfs_lock pools
* To reduce lock contention on MP systems, in some places
* we use a pool of LOCK_T's in place of one single lock.
* Then, given some value indicating one instance of an object, a mapping
* function (or macro) is used to select one of the mvfs_lock to protect
* that instance.
* For example, rather than using a single mvfs_lock for an entire hash
* table, we can use a pool of locks such that every n chains (n may be 1)
* has its own mvfs_lock. This is designed to allow greater parallelism on
* MP machines.
* The difference between spinlock pools and mvfs_lock pools is that
* mvfs_lock pools have locks that can pend; use this type of lock pool
* when you have the potential to pend while holding the lock.
*/
typedef struct mvfs_lock_pool {
LOCK_T *(*mlp_func)(struct mvfs_lock_pool *, unsigned int); /* selector func ptr */
int mlp_count; /* num mvfs_locks in pool */
LOCK_T *mlp_table; /* dynamic mvfs_lock table */
} mvfs_lock_pool_t;
/* Select a mvfs_lock from a mvfs_lock pool
* IN: pool - ptr to the struct anchoring the pool
* IN: val - value to be used in the selection function
* IN: func - macro or function to choose lock, given value
* IN/OUT: lockpp - ptr to lock ptr (LOCK_T**), return lock addr to be
* used subsequently to unlock the spinlock
*
* Use MVFS_LOCK, MVFS_UNLOCK to obtain/release the returned lock
*/
#define MVFS_LOCK_SELECT(pool, val, func, lockpp) \
*(lockpp) = func(pool, val)
/*
* Templates for a mapping macro for a mvfs_lock pool for hash tables,
* to be used as input to MVFS_LOCK_SELECT().
* The default is to have 2 MVFS_LOCK_T per table; platforms
* wishing to override this need to define:
* HASH_MVFS_LOCK_MAP to be HASH_MVFS_LOCK_PER_CHAIN or HASH_MVFS_LOCK_PER_GROUP
* HASH_MVFS_LOCK_RATIO - the desired ratio of hash chains per LOCK_T
*/
#define HASH_MVFS_LOCK_PER_CHAIN(pool, hash_val) (&(pool)->mlp_table[hash_val])
#define HASH_MVFS_LOCK_PER_GROUP(pool, hash_val) (&(pool)->mlp_table[(hash_val) % (pool)->mlp_count])
#define HASH_MVFS_LOCK_MAP HASH_MVFS_LOCK_PER_GROUP
#ifndef HASH_MVFS_LOCK_RATIO
#define HASH_MVFS_LOCK_RATIO -2 /* default: 2 locks in total */
#endif
#if HASH_MVFS_LOCK_RATIO < 0
#define HASH_MVFS_LOCK_SET_POOLSIZE(mlp_poolsize, hash_size) {mlp_poolsize = -(HASH_MVFS_LOCK_RATIO);}
#endif
#if HASH_MVFS_LOCK_RATIO == 0
#define HASH_MVFS_LOCK_SET_POOLSIZE(mlp_poolsize, hash_size) {mlp_poolsize = 1;}
#endif
#if HASH_MVFS_LOCK_RATIO == 1
#define HASH_MVFS_LOCK_SET_POOLSIZE(mlp_poolsize, hash_size) {mlp_poolsize = hash_size;}
#endif
#if HASH_MVFS_LOCK_RATIO > 1
#define HASH_MVFS_LOCK_SET_POOLSIZE(mlp_poolsize, hash_size) {mlp_poolsize = ((hash_size)/HASH_MVFS_LOCK_RATIO) + 1;}
#endif
/*
* Macro to tell if copy-on-write required. If we have an
* "audited file" and it hasn't been choided under this build
* handle, then the file must be choided.
*/
#define MFS_FSTAT_AUDITED(mnp) \
(((mnp)->mn_vob.attr.fstat.mode & TBS_FMODE_AUDITED_OBJ) != 0)
#define MFS_REMEMBER_CHOID_BH(mth, mnp) \
if ((mth)->thr_auditon) { \
(mnp)->mn_vob.choid_audited = 1; \
(mnp)->mn_vob.choid_bh = (mth)->thr_bh; \
(mnp)->mn_vob.choid_bh_seq = (mth)->thr_aud_seq; \
}
#define MFS_CLRTEXT_RO(mnp) \
((mnp)->mn_vob.cleartext.isvob)
/* A vobstamp structure holds a last VOB update time (LVUT) plus its
* update timestamp. All vobstamps in all view nodes, as well as all
* vobstamp_next fields, are accessed under the mvfs_vobstamp_lock.
*/
extern SPLOCK_T mvfs_vobstamp_lock;
struct mvfs_vobstamp {
struct timeval lvut; /* the LVUT itself */
tbs_uuid_t vobuuid; /* VOB uuid for this VOB */
time_t valid_thru; /* how long is this valid? */
};
#define MVFS_NUM_VOB_STAMPS 20 /* FIXME: select a size? */
/* Structure for MVFS statistics. The statistics are maintained on a per-CPU
* basis. They are allocated and initialized in mvfs_misc_init when the
* viewroot is mounted. Macros for initializing and manipulating the values
* are also defined in this header file. Preemption disabling is used to ensure
* data integrity. MVFS makes RPC calls to various view_servers and to the
* albd server. We keep stats only on the view RPCs for performance evaluation.
* The ALBD RPCs are not considered an area of concern for performance.
*/
typedef struct mvfs_statistics_data {
tbs_boolean_t zero_me; /* set if stats should be zerod */
struct mfs_mnstat mfs_mnstat; /* mnode statistics */
struct mfs_dncstat mfs_dncstat; /* name cache stats */
struct mfs_rvcstat mfs_rvcstat; /* RVC stats separate from DNLC */
struct mfs_clntstat mfs_clntstat; /* RPC stats */
struct mfs_acstat mfs_acstat; /* Attr cache */
struct mfs_rlstat mfs_rlstat; /* Readlink cache */
struct mfs_clearstat mfs_clearstat; /* Cleartext operations */
struct mfs_austat mfs_austat; /* Audit operations */
MVFS_STAT_CNT_T mfs_vnopcnt[MFS_VNOPCNT]; /* Vnode op calls counted */
MVFS_STAT_CNT_T mfs_vfsopcnt[MFS_VFSOPCNT];/* VFS op calls counted */
MVFS_STAT_CNT_T mfs_viewopcnt[VIEW_NUM_PROCS]; /* RPC ops to viewserver */
timestruc_t mfs_viewoptime[VIEW_NUM_PROCS];/* time for the RPCs */
struct mfs_rpchist mfs_viewophist; /* Histogram of the RPC times */
} mvfs_stats_data_t;
/*
* Per-view statistics structure. These are maintained on a per-view basis and
* not on a per-CPU basis. So, we need a lock to protect these. The mnode,
* vnode structs could be allocated pageable memory. Taking this pvstatlock
* which is a spin lock and then accessing the paged memory could lead to
* problems. So, wherever this lock is used, care should be taken not to
* touch any paged memory after taking this lock. In some of the macros below
* where this lock is used, the per-view stat pointer was read into a local
* variable before taking the lock and that is used to access the stats after
* the lock is taken.
*/
struct mvfs_pvstat {
SPLOCK_T mvfs_pvstatlock;
struct mfs_clntstat clntstat; /* Client Statistics */
struct mfs_acstat acstat; /* Attribute Cache stats */
struct mfs_dncstat dncstat; /* DNC stats */
};
/* Histogram of RPC delays. This is used to initiliaze the corresponding
* structure in mvfs_statistics_data.
*/
extern struct mfs_rpchist mvfs_init_viewophist;
/* View objects - vnodes that describe a view itself */
struct mfs_viewnode {
view_handle_t vh; /* View handle */
u_int hm : 1; /* History mode flag */
u_int nocfg : 1; /* No config spec error printed */
u_int needs_recovery : 1; /* Needs recovery printed */
u_int needs_reformat : 1; /* Needs reformat printed */
u_int always_cover : 1; /* Always make loopback vnodes */
u_int zombie_view : 1; /* View stg error printed */
u_int windows_view : 1; /* View on Windows NT or 2k */
u_int lfs_view : 1; /* LFS view */
u_int downrev_view : 1; /* view at prev release */
u_int pad : 23;
mfs_pn_char_t *viewname; /* View tag name */
struct mfs_svr svr; /* View server info */
u_int id; /* View index in /view (for making inums, etc) */
u_int exid; /* View export ID (for making xfid's) */
MVFS_USER_ID cuid; /* Creator's uid or uid/SID for nt view */
MVFS_GROUP_ID cgid; /* Creator's gid or gid/SID for nt view */
timestruc_t ctime; /* Created time */
time_t usedtime; /* Last used time (for cleanup) */
LOCK_T stamplock; /* Lock on vobstamps */
struct mvfs_vobstamp
vobstamps[MVFS_NUM_VOB_STAMPS]; /* VOB update timestamps */
int vobstamp_next; /* round-robin replacement ptr */
time_t rpctime; /* Last RPC time (for cleanup) */
void *mdep_datap; /* Machine dep data */
struct mvfs_pvstat
*pvstat; /* Per-view statistics */
};
struct mfs_ramdirent {
mfs_pn_char_t *nm; /* Ptr to name */
VNODE_T *vp; /* Ptr to vnode */
};
typedef struct mfs_ramdirent mfs_ramdirent_t;
struct mfs_ramdirnode {
int max; /* Max cnt of elements allowed */
int hwm; /* High water mark for table */
mfs_ramdirent_t *ents; /* Ptr to table of entries */
timestruc_t atime; /* Accessed time */
timestruc_t mtime; /* Last modified time */
timestruc_t ctime; /* Created time */
int export_hwm; /* High water mark for export table */
int *export_ents; /* Export table mappings */
int lnk_cnt; /* Current # of directory entries */
};
#define MVFS_EXPORT_CHUNK 16 /* grow export table in chunks of 16 */
/* MFS objects - have different extra info based on 'kind' of MFS object.
Loopback, vobroot vnodes only have the header (no extra info) */
struct mfs_mnode {
struct mfs_mnhdr mn_hdr; /* MUST BE FIRST! */
union mn_u {
struct mfs_vobnode vob;
struct mfs_viewnode view;
struct mfs_ramdirnode ramdir; /* viewdir */
} mn_un;
};
typedef struct mfs_mnode mfs_mnode_t;
/* Shorthands past the union. Cdview's use the
view structure, Other object classes just use the hdr. */
#define mn_vob mn_un.vob
#define mn_view mn_un.view
#define mn_ramdir mn_un.ramdir
/*
* Hash lists for mnodes. All in-use mnodes reside on one of 3 hashed mnode
* lists: mvfs_vobhash, mvfs_cvphash, mvfs_otherhash. Each of these lists are
* made up of n mfs_mnhash_slot_t's (where n is the hash size for that list).
*/
typedef struct mfs_mnhash_slot {
struct {
mfs_mnode_t *next;
mfs_mnode_t *prev;
} mn_hdr;
} mfs_mnhash_slot_t;
/*
* VOB freelist. VOB mnodes are cached on the freelist when no longer in use.
* The VOB freelist is made up of n mvfs_vobfreehash_slot_t's (where n is the
* mvfs_vobhashsize).
*/
typedef struct mvfs_vobfreehash_slot {
struct {
mfs_mnode_t *next;
mfs_mnode_t *prev;
mfs_mnode_t *free_next;
mfs_mnode_t *free_prev;
} mn_hdr;
} mvfs_vobfreehash_slot_t;
/*
* The mnode destroy list. See mvfs_mnode.c for full details on this LRU.
*/
typedef struct mvfs_mndestroylist {
struct {
mfs_mnode_t *next;
mfs_mnode_t *prev;
mfs_mnode_t *free_next; /* destroy uses the freelist links */
mfs_mnode_t *free_prev; /* destroy uses the freelist links */
} mn_hdr;
} mvfs_mndestroylist_t;
/*
* Audit file structure.
* There is one of these for each active auditfile.
* All processes in the same audit will reference the same auditfile structure,
* sharing one buffer, so that the auditfile entries are ordered, and properly
* appended in a sequential manner to the file.
*
* Lock Ordering: The global mfs_aflock is taken before the individual
* auditfile's lock.
*/
struct mfs_auditfile {
/*
* Following are protected by global mfs_aflock
*/
struct mfs_auditfile *next; /* List of auditfile structs */
struct mfs_auditfile *prev;
/*
* The following 3 flags are locked by the auditfile's own lock
*
* The 'obsolete' field is a little funky. It is used
* to tell other processes (long running daemons spawned
* under an audit) that they should close out their reference
* to this audit file, and stop auditing. Even though it
* is modified and readers hold no lock, this is OK since
* a process will pick up the flag on the next attempted
* audit write.
*/
u_int obsolete; /* Audit stopped, cleanup reference */
u_int destroy; /* destroy flag, 1 marked for destroy */
u_int refcnt; /* Refcnt of procs using file */
LOCK_T lock; /* Lock for the structure */
/*
* The following are protected by the global mfs_aflock,
* and in addition can be read without any locks held
* as long as there is a reference count on the auditfile
* struct.
*/
CLR_VNODE_T *cvp; /* Vnode of audit file */
mfs_pn_char_t *path; /* Audit output file pathname */
mfs_pn_char_t *upath; /* Audit output file pathname in uspace */
CRED_T *cred; /* Credentials from setaudit */