Skip to content

Commit 93848a9

Browse files
Christoph HellwigBen Myers
authored andcommitted
xfs: add version 3 inode format with CRCs
Add a new inode version with a larger core. The primary objective is to allow for a crc of the inode, and location information (uuid and ino) to verify it was written in the right place. We also extend it by: a creation time (for Samba); a changecount (for NFSv4); a flush sequence (in LSN format for recovery); an additional inode flags field; and some additional padding. These additional fields are not implemented yet, but already laid out in the structure. [dchinner@redhat.com] Added LSN and flags field, some factoring and rework to capture all the necessary information in the crc calculation. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
1 parent 3fe58f3 commit 93848a9

File tree

8 files changed

+254
-77
lines changed

8 files changed

+254
-77
lines changed

fs/xfs/xfs_buf_item.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ extern kmem_zone_t *xfs_buf_item_zone;
4848
#define XFS_BLF_AGF_BUF (1<<6)
4949
#define XFS_BLF_AGFL_BUF (1<<7)
5050
#define XFS_BLF_AGI_BUF (1<<8)
51+
#define XFS_BLF_DINO_BUF (1<<9)
5152

5253
#define XFS_BLF_TYPE_MASK \
5354
(XFS_BLF_UDQUOT_BUF | \
@@ -56,7 +57,8 @@ extern kmem_zone_t *xfs_buf_item_zone;
5657
XFS_BLF_BTREE_BUF | \
5758
XFS_BLF_AGF_BUF | \
5859
XFS_BLF_AGFL_BUF | \
59-
XFS_BLF_AGI_BUF)
60+
XFS_BLF_AGI_BUF | \
61+
XFS_BLF_DINO_BUF)
6062

6163
#define XFS_BLF_CHUNK 128
6264
#define XFS_BLF_SHIFT 7

fs/xfs/xfs_dinode.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#define __XFS_DINODE_H__
2020

2121
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
22-
#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2))
22+
#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
2323

2424
typedef struct xfs_timestamp {
2525
__be32 t_sec; /* timestamp seconds */
@@ -70,10 +70,35 @@ typedef struct xfs_dinode {
7070

7171
/* di_next_unlinked is the only non-core field in the old dinode */
7272
__be32 di_next_unlinked;/* agi unlinked list ptr */
73-
} __attribute__((packed)) xfs_dinode_t;
73+
74+
/* start of the extended dinode, writable fields */
75+
__le32 di_crc; /* CRC of the inode */
76+
__be64 di_changecount; /* number of attribute changes */
77+
__be64 di_lsn; /* flush sequence */
78+
__be64 di_flags2; /* more random flags */
79+
__u8 di_pad2[16]; /* more padding for future expansion */
80+
81+
/* fields only written to during inode creation */
82+
xfs_timestamp_t di_crtime; /* time created */
83+
__be64 di_ino; /* inode number */
84+
uuid_t di_uuid; /* UUID of the filesystem */
85+
86+
/* structure must be padded to 64 bit alignment */
87+
} xfs_dinode_t;
7488

7589
#define DI_MAX_FLUSH 0xffff
7690

91+
/*
92+
* Size of the core inode on disk. Version 1 and 2 inodes have
93+
* the same size, but version 3 has grown a few additional fields.
94+
*/
95+
static inline uint xfs_dinode_size(int version)
96+
{
97+
if (version == 3)
98+
return sizeof(struct xfs_dinode);
99+
return offsetof(struct xfs_dinode, di_crc);
100+
}
101+
77102
/*
78103
* The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
79104
* Since the pathconf interface is signed, we use 2^31 - 1 instead.
@@ -105,7 +130,7 @@ typedef enum xfs_dinode_fmt {
105130
* Inode size for given fs.
106131
*/
107132
#define XFS_LITINO(mp, version) \
108-
((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
133+
((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version)))
109134

110135
#define XFS_BROOT_SIZE_ADJ(ip) \
111136
(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))
@@ -133,7 +158,7 @@ typedef enum xfs_dinode_fmt {
133158
* Return pointers to the data or attribute forks.
134159
*/
135160
#define XFS_DFORK_DPTR(dip) \
136-
((char *)(dip) + sizeof(struct xfs_dinode))
161+
((char *)dip + xfs_dinode_size(dip->di_version))
137162
#define XFS_DFORK_APTR(dip) \
138163
(XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
139164
#define XFS_DFORK_PTR(dip,w) \

fs/xfs/xfs_ialloc.c

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ xfs_ialloc_inode_init(
167167
int version;
168168
int i, j;
169169
xfs_daddr_t d;
170+
xfs_ino_t ino = 0;
170171

171172
/*
172173
* Loop over the new block(s), filling in the inodes.
@@ -185,13 +186,29 @@ xfs_ialloc_inode_init(
185186
}
186187

187188
/*
188-
* Figure out what version number to use in the inodes we create.
189-
* If the superblock version has caught up to the one that supports
190-
* the new inode format, then use the new inode version. Otherwise
191-
* use the old version so that old kernels will continue to be
192-
* able to use the file system.
189+
* Figure out what version number to use in the inodes we create. If
190+
* the superblock version has caught up to the one that supports the new
191+
* inode format, then use the new inode version. Otherwise use the old
192+
* version so that old kernels will continue to be able to use the file
193+
* system.
194+
*
195+
* For v3 inodes, we also need to write the inode number into the inode,
196+
* so calculate the first inode number of the chunk here as
197+
* XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not
198+
* across multiple filesystem blocks (such as a cluster) and so cannot
199+
* be used in the cluster buffer loop below.
200+
*
201+
* Further, because we are writing the inode directly into the buffer
202+
* and calculating a CRC on the entire inode, we have ot log the entire
203+
* inode so that the entire range the CRC covers is present in the log.
204+
* That means for v3 inode we log the entire buffer rather than just the
205+
* inode cores.
193206
*/
194-
if (xfs_sb_version_hasnlink(&mp->m_sb))
207+
if (xfs_sb_version_hascrc(&mp->m_sb)) {
208+
version = 3;
209+
ino = XFS_AGINO_TO_INO(mp, agno,
210+
XFS_OFFBNO_TO_AGINO(mp, agbno, 0));
211+
} else if (xfs_sb_version_hasnlink(&mp->m_sb))
195212
version = 2;
196213
else
197214
version = 1;
@@ -214,17 +231,32 @@ xfs_ialloc_inode_init(
214231
* individual transactions causing a lot of log traffic.
215232
*/
216233
fbuf->b_ops = &xfs_inode_buf_ops;
217-
xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
234+
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
218235
for (i = 0; i < ninodes; i++) {
219236
int ioffset = i << mp->m_sb.sb_inodelog;
220-
uint isize = sizeof(struct xfs_dinode);
237+
uint isize = xfs_dinode_size(version);
221238

222239
free = xfs_make_iptr(mp, fbuf, i);
223240
free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
224241
free->di_version = version;
225242
free->di_gen = cpu_to_be32(gen);
226243
free->di_next_unlinked = cpu_to_be32(NULLAGINO);
227-
xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
244+
245+
if (version == 3) {
246+
free->di_ino = cpu_to_be64(ino);
247+
ino++;
248+
uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
249+
xfs_dinode_calc_crc(mp, free);
250+
} else {
251+
/* just log the inode core */
252+
xfs_trans_log_buf(tp, fbuf, ioffset,
253+
ioffset + isize - 1);
254+
}
255+
}
256+
if (version == 3) {
257+
/* need to log the entire buffer */
258+
xfs_trans_log_buf(tp, fbuf, 0,
259+
BBTOB(fbuf->b_length) - 1);
228260
}
229261
xfs_trans_inode_alloc_buf(tp, fbuf);
230262
}

0 commit comments

Comments
 (0)