Skip to content

Commit

Permalink
Adjust the dspace reported from metaslab_class such that we look full…
Browse files Browse the repository at this point in the history
… when

one rotor vector category (SSD or HDD) becomes full.  This since large
content for the HDD cannot spill into the SSD, and as we do not want to
spill small content that should be on the SSD onto the HDD.
  • Loading branch information
inkdot7 committed Sep 1, 2016
1 parent ea1fe0d commit 40b1210
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 29 deletions.
2 changes: 1 addition & 1 deletion include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);

void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
void metaslab_class_space_update(metaslab_class_t *, int, int64_t, int64_t,
int64_t, int64_t);
uint64_t metaslab_class_get_alloc(metaslab_class_t *);
uint64_t metaslab_class_get_space(metaslab_class_t *);
Expand Down
8 changes: 4 additions & 4 deletions include/sys/metaslab_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ struct metaslab_class {
uint64_t mc_aliquotv[METASLAB_CLASS_ROTORS];
int mc_max_nrot; /* highest rotor with member */
uint64_t mc_alloc_groups; /* # of allocatable groups */
uint64_t mc_alloc; /* total allocated space */
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
uint64_t mc_dspace; /* total deflated space */
uint64_t mc_allocv[METASLAB_CLASS_ROTORS]; /* total allocated space */
uint64_t mc_deferredv[METASLAB_CLASS_ROTORS]; /* total deferred frees */
uint64_t mc_spacev[METASLAB_CLASS_ROTORS]; /* total space (alloc + free) */
uint64_t mc_dspacev[METASLAB_CLASS_ROTORS]; /* total deflated space */
uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
};

Expand Down
2 changes: 1 addition & 1 deletion include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ extern void vdev_propagate_state(vdev_t *vd);
extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
vdev_aux_t aux);

extern void vdev_space_update(vdev_t *vd,
extern void vdev_space_update(vdev_t *vd, int nrot,
int64_t alloc_delta, int64_t defer_delta, int64_t space_delta);

extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2134,7 +2134,7 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);

vdev_space_update(dev->l2ad_vdev,
vdev_space_update(dev->l2ad_vdev, -1,
-l2hdr->b_asize, 0, 0);

(void) refcount_remove_many(&dev->l2ad_alloc,
Expand Down Expand Up @@ -6153,7 +6153,7 @@ l2arc_write_done(zio_t *zio)
kmem_cache_free(hdr_l2only_cache, head);
mutex_exit(&dev->l2ad_mtx);

vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0);
vdev_space_update(dev->l2ad_vdev, -1, -bytes_dropped, 0, 0);

l2arc_do_free_on_write();

Expand Down Expand Up @@ -6707,7 +6707,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
ARCSTAT_INCR(arcstat_l2_size, write_sz);
ARCSTAT_INCR(arcstat_l2_asize, stats_size);
vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
vdev_space_update(dev->l2ad_vdev, -1, stats_size, 0, 0);

/*
* Bump device hand to the device start if it is approaching the end.
Expand Down Expand Up @@ -7054,7 +7054,7 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
list_create(&adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
offsetof(arc_buf_hdr_t, b_l2hdr.b_l2node));

vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
vdev_space_update(vd, -1, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
refcount_create(&adddev->l2ad_alloc);

/*
Expand Down
117 changes: 100 additions & 17 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sys/zio.h>
#include <sys/spa_impl.h>
#include <sys/zfeature.h>
#include <linux/kernel.h>

#define WITH_DF_BLOCK_ALLOCATOR

Expand Down Expand Up @@ -218,10 +219,12 @@ metaslab_class_destroy(metaslab_class_t *mc)

for (i = 0; i < METASLAB_CLASS_ROTORS; i++)
ASSERT(mc->mc_rotorv[i] == NULL);
ASSERT(mc->mc_alloc == 0);
ASSERT(mc->mc_deferred == 0);
ASSERT(mc->mc_space == 0);
ASSERT(mc->mc_dspace == 0);
for (i = 0; i < METASLAB_CLASS_ROTORS; i++) {
ASSERT(mc->mc_allocv[i] == 0);
ASSERT(mc->mc_deferredv[i] == 0);
ASSERT(mc->mc_spacev[i] == 0);
ASSERT(mc->mc_dspacev[i] == 0);
}

kmem_free(mc, sizeof (metaslab_class_t));
}
Expand Down Expand Up @@ -256,37 +259,108 @@ metaslab_class_validate(metaslab_class_t *mc)
}

void
metaslab_class_space_update(metaslab_class_t *mc, int64_t alloc_delta,
metaslab_class_space_update(metaslab_class_t *mc, int nrot,
int64_t alloc_delta,
int64_t defer_delta, int64_t space_delta, int64_t dspace_delta)
{
atomic_add_64(&mc->mc_alloc, alloc_delta);
atomic_add_64(&mc->mc_deferred, defer_delta);
atomic_add_64(&mc->mc_space, space_delta);
atomic_add_64(&mc->mc_dspace, dspace_delta);
ASSERT(nrot >= 0 && nrot < METASLAB_CLASS_ROTORS);
atomic_add_64(&mc->mc_allocv[nrot], alloc_delta);
atomic_add_64(&mc->mc_deferredv[nrot], defer_delta);
atomic_add_64(&mc->mc_spacev[nrot], space_delta);
atomic_add_64(&mc->mc_dspacev[nrot], dspace_delta);
}

uint64_t
metaslab_class_get_alloc(metaslab_class_t *mc)
{
return (mc->mc_alloc);
uint64_t total_alloc = 0;
int i;

for (i = 0; i < METASLAB_CLASS_ROTORS; i++)
total_alloc += mc->mc_allocv[i];

return total_alloc;
}

uint64_t
metaslab_class_get_deferred(metaslab_class_t *mc)
{
return (mc->mc_deferred);
uint64_t total_deferred = 0;
int i;

for (i = 0; i < METASLAB_CLASS_ROTORS; i++)
total_deferred += mc->mc_deferredv[i];

return total_deferred;
}

uint64_t
metaslab_class_get_space(metaslab_class_t *mc)
{
return (mc->mc_space);
uint64_t total_space = 0;
int i;

for (i = 0; i < METASLAB_CLASS_ROTORS; i++)
total_space += mc->mc_spacev[i];

return total_space;
}

uint64_t
metaslab_class_get_dspace(metaslab_class_t *mc)
{
return (spa_deflate(mc->mc_spa) ? mc->mc_dspace : mc->mc_space);
uint64_t total_dspace_adj = 0;
uint64_t total_dspace = 0;
int i;
uint64_t max_ratio = 1; /* 1 to avoid division by 0 */

/* When we have a vector of rotors, we (artificially) adjust
* the total dspace returned to reflect the fill fraction of
* the most filled rotor. This since the dspace value
* returned is used to determine if new writes can be made to
* the pool, and we do not want writes to continue if one of
* the vectors has gotten full.
*
* If there is only one component of the vector, we'll return
* the usual value.
*/

/* Counting in per-mille for the moment... */

for (i = 0; i < METASLAB_CLASS_ROTORS; i++) {
uint64_t ratio =
(1000 * mc->mc_allocv[i]) / (mc->mc_spacev[i] + 1);
if (ratio > max_ratio)
max_ratio = ratio;
}
for (i = 0; i < METASLAB_CLASS_ROTORS; i++) {
uint64_t dspace =
(spa_deflate(mc->mc_spa) ?
mc->mc_dspacev[i] : mc->mc_spacev[i]);
uint64_t ratio =
(1000 * mc->mc_allocv[i]) / (mc->mc_spacev[i] + 1);
total_dspace_adj += (dspace * ratio) / max_ratio;
total_dspace += dspace;
}

/* When max_ratio is small (we have a *lot* of free space),
* then the values will fluctuate considerably. But does not
* matter, since what matters is the values when little space
* is free.
*
* However, the value is (luckily) also the value given to the
* user in e.g. df(1), so would be nice to be accurate. Below
* 25 % we return the normal value, and above 75 % the
* adjusted. In between we give a sliding value.
*/

if (max_ratio < 250)
return total_dspace;
if (max_ratio > 750)
return total_dspace_adj;

return (total_dspace_adj * (max_ratio - 250) +
total_dspace * (750 - max_ratio)) / 500;
}

void
Expand Down Expand Up @@ -1354,7 +1428,9 @@ metaslab_fini(metaslab_t *msp)
mutex_enter(&msp->ms_lock);

VERIFY(msp->ms_group == NULL);
vdev_space_update(mg->mg_vd, -space_map_allocated(msp->ms_sm),
ASSERT(mg->mg_nrot != -1);
vdev_space_update(mg->mg_vd, mg->mg_nrot,
-space_map_allocated(msp->ms_sm),
0, -msp->ms_size);
space_map_close(msp->ms_sm);

Expand Down Expand Up @@ -2014,7 +2090,12 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
&msp->ms_lock);
}

vdev_space_update(vd, 0, 0, msp->ms_size);
/* Decide which rotor of vector to place in. */
mg->mg_nrot = 0;
if (!mg->mg_vd->vdev_nonrot)
mg->mg_nrot = 1;

vdev_space_update(vd, mg->mg_nrot, 0, 0, msp->ms_size);
}

freed_tree = &msp->ms_freetree[TXG_CLEAN(txg) & TXG_MASK];
Expand All @@ -2024,7 +2105,8 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
defer_delta = range_tree_space(*freed_tree) -
range_tree_space(*defer_tree);

vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
vdev_space_update(vd, mg->mg_nrot,
alloc_delta + defer_delta, defer_delta, 0);

ASSERT0(range_tree_space(msp->ms_alloctree[txg & TXG_MASK]));
ASSERT0(range_tree_space(msp->ms_freetree[txg & TXG_MASK]));
Expand Down Expand Up @@ -2413,7 +2495,8 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
metaslab_bias_enabled) {
vdev_stat_t *vs = &vd->vdev_stat;
int64_t vs_free = vs->vs_space - vs->vs_alloc;
int64_t mc_free = mc->mc_space - mc->mc_alloc;
int64_t mc_free = mc->mc_spacev[mg->mg_nrot] -
mc->mc_allocv[mg->mg_nrot];
int64_t ratio;

/*
Expand Down
5 changes: 3 additions & 2 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3098,7 +3098,8 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
* and the root vdev.
*/
void
vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta,
vdev_space_update(vdev_t *vd, int nrot,
int64_t alloc_delta, int64_t defer_delta,
int64_t space_delta)
{
int64_t dspace_delta = space_delta;
Expand Down Expand Up @@ -3138,7 +3139,7 @@ vdev_space_update(vdev_t *vd, int64_t alloc_delta, int64_t defer_delta,
ASSERT(rvd == vd->vdev_parent);
ASSERT(vd->vdev_ms_count != 0);

metaslab_class_space_update(mc,
metaslab_class_space_update(mc, nrot,
alloc_delta, defer_delta, space_delta, dspace_delta);
}
}
Expand Down

0 comments on commit 40b1210

Please sign in to comment.