Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dnode_sync panic #519

Open
lundman opened this issue Jul 6, 2016 · 3 comments
Open

dnode_sync panic #519

lundman opened this issue Jul 6, 2016 · 3 comments

Comments

@lundman
Copy link
Contributor

lundman commented Jul 6, 2016

Master has had several panics, MAF related problems.

One example is

0xffffff9102613a50 : 0xffffff7f96cc6f50 net.lundman.spl : _spl_mutex_enter + 0x10
0xffffff9102613b50 : 0xffffff7f96d47d5c net.lundman.zfs : _dnode_sync + 0x53c
0xffffff9102613bc0 : 0xffffff7f96d2e321 net.lundman.zfs : _dmu_objset_sync_dnodes + 0x91
0xffffff9102613c00 : 0xffffff7f96d2e036 net.lundman.zfs : _dmu_objset_sync + 0x3d6
0xffffff9102613d70 : 0xffffff7f96d62753 net.lundman.zfs : _dsl_pool_sync_mos + 0x43
0xffffff9102613da0 : 0xffffff7f96d62534 net.lundman.zfs : _dsl_pool_sync + 0x3a4
0xffffff9102613e20 : 0xffffff7f96d8fc21 net.lundman.zfs : _spa_sync + 0x391

frame #1: 0xffffff7f96d47d5c zfs`dnode_sync(dn=0xffffff90e807e370, tx=0xffffff90bcc4f680) + 1340 at dnode_sync.c:697
   694
   695      if (dn->dn_num_slots > DNODE_MIN_SLOTS) {
   696          dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
-> 697          mutex_enter(&ds->ds_lock);
   698          ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_DNODE] =
   699              B_TRUE;
   700          mutex_exit(&ds->ds_lock);
(lldb) p ds
(dsl_dataset_t *) $1 = 0x0000000000000000
(lldb) p *dn
(dnode_t) $5 = {
  dn_struct_rwlock = {
    rw_lock = ([0] = 2701131776, [1] = 0, [2] = 3135097598, [3] = 3135097598)
    rw_owner = 0x0000000000000000
    rw_readers = 0
    rw_pad = 305419896
  }
  dn_link = {
    list_next = 0xffffff90e8be8d60
    list_prev = 0xffffff90e8dcdb08
  }
  dn_objset = 0xffffff90ad898100
  dn_object = 517
  dn_dbuf = 0xffffff90af982b88
  dn_handle = 0xffffff801bcc2280
  dn_phys = 0xffffff90e8864a00
  dn_type = DMU_OT_DEADLIST
  dn_bonuslen = 320
  dn_bonustype = '3'
  dn_nblkptr = '\x01'
  dn_checksum = '\0'
  dn_compress = '\0'
  dn_nlevels = '\x01'
  dn_indblkshift = '\x0e'
  dn_datablkshift = '\t'
  dn_moved = '\x01'
  dn_datablkszsec = 1
  dn_datablksz = 512
  dn_maxblkid = 0
  dn_next_type = ([0] = '�', [1] = '\0', [2] = '�', [3] = '�')
  dn_num_slots = '�'
  dn_next_nblkptr = ([0] = '\0', [1] = '\0', [2] = '\0', [3] = '\0')
  dn_next_nlevels = ([0] = '\0', [1] = '\0', [2] = '\0', [3] = '\0')
  dn_next_indblkshift = ([0] = '\0', [1] = '\0', [2] = '\0', [3] = '\0')
  dn_next_bonustype = ([0] = '\0', [1] = '\0', [2] = '\0', [3] = '\0')
  dn_rm_spillblk = ([0] = '\0', [1] = '\0', [2] = '\0', [3] = '\0')
  dn_next_bonuslen = ([0] = 0, [1] = 0, [2] = 0, [3] = 0)
  dn_next_blksz = ([0] = 0, [1] = 0, [2] = 0, [3] = 0)
  dn_dbufs_count = 2
  dn_unlisted_l0_blkid = 1
  dn_dirty_link = {
    [0] = {
      list_next = 0x0000000000000000
      list_prev = 0x0000000000000000
    }
    [1] = {
      list_next = 0x0000000000000000
      list_prev = 0x0000000000000000
    }
    [2] = {
      list_next = 0x0000000000000000
      list_prev = 0x0000000000000000
    }
    [3] = {
      list_next = 0x0000000000000000
      list_prev = 0x0000000000000000
    }
  }
  dn_mtx = {
    m_owner = 0x0000000000000000
    m_lock = {
      opaque = ([0] = 0, [1] = 0, [2] = 0, [3] = 4294967295)
    }
  }
  dn_dirty_records = {
    [0] = {
      list_size = 344
      list_offset = 0
      list_head = {
        list_next = 0xffffff90e807e490
        list_prev = 0xffffff90e807e490
      }
    }
    [1] = {
      list_size = 344
      list_offset = 0
      list_head = {
        list_next = 0xffffff90eabb6540
        list_prev = 0xffffff90eaac6580
      }
    }
    [2] = {
      list_size = 344
      list_offset = 0
      list_head = {
        list_next = 0xffffff90e807e4d0
        list_prev = 0xffffff90e807e4d0
      }
    }
    [3] = {
      list_size = 344
      list_offset = 0
      list_head = {
        list_next = 0xffffff90e807e4f0
        list_prev = 0xffffff90e807e4f0
      }
    }
  }
  dn_free_ranges = {
    [0] = 0x0000000000000000
    [1] = 0x0000000000000000
    [2] = 0x0000000000000000
    [3] = 0x0000000000000000
  }
  dn_allocated_txg = 132
  dn_free_txg = 0
  dn_assigned_txg = 0
  dn_notxholds = (pad = 13465141656313252606)
  dn_dirtyctx = DN_DIRTY_SYNC
  dn_dirtyctx_firstset = 0xffffff90ad3cd628 "\xfffffffe\xffffffbb\xffffffdd\xffffffba\xfffffffe\xffffffca\xffffffdd\xffffffba\xffffffce\xfffffffa\xffffffed\xfffffffe\xfffffffc"
  dn_tx_holds = (rc_count = 0)
  dn_holds = (rc_count = 3)
  dn_dbufs_mtx = {
    m_owner = 0x0000000000000000
    m_lock = {
      opaque = ([0] = 0, [1] = 0, [2] = 0, [3] = 4294967295)
    }
  }
  dn_dbufs = {
    avl_root = 0xffffff90b7246560
    avl_compar = 0xffffff7f96d47120 (zfs`dbuf_compare at dnode.c:68)
    avl_offset = 168
    avl_numnodes = 1
    avl_size = 208
  }
  dn_bonus = 0xffffff90b6d509a0
  dn_have_spill = 0
  dn_zio = 0xffffff90ba2b5e08
  dn_oldused = 0
  dn_oldflags = 0
  dn_olduid = 0
  dn_oldgid = 0
  dn_newuid = 0
  dn_newgid = 0
  dn_id_flags = 0
  dn_zfetch = {
    zf_rwlock = {
      rw_lock = ([0] = 2701131776, [1] = 0, [2] = 3135097598, [3] = 3135097598)
      rw_owner = 0x0000000000000000
      rw_readers = 0
      rw_pad = 305419896
    }
    zf_stream = {
      list_size = 72
      list_offset = 56
      list_head = {
        list_next = 0xffffff90e807e620
        list_prev = 0xffffff90e807e620
      }
    }
    zf_dnode = 0xffffff90e807e370
  }
}

@lundman lundman added the bug label Jul 6, 2016
@lundman
Copy link
Contributor Author

lundman commented Jul 6, 2016

Another:

panic(cpu 2 caller 0xffffff7f86b16efe): "VERIFY3(" "0" " " "==" " " "dmu_free_range(bpo->bpo_os, bpo->bpo_object, (i + 1) * sizeof (blkptr_t), -1ULL, tx)" ") " "failed (" "0" " " "==" " " "2" ")\n"@bpobj.c:260
Backtrace (CPU 2), Frame : Return Address
0xffffff90a06dbbb0 : 0xffffff8004edf792 mach_kernel : _panic + 0xe2
0xffffff90a06dbc30 : 0xffffff7f86b16efe net.lundman.zfs : _bpobj_iterate_impl + 0x4fe
0xffffff90a06dbdb0 : 0xffffff7f86b169f3 net.lundman.zfs : _bpobj_iterate + 0x33
0xffffff90a06dbde0 : 0xffffff7f86b90767 net.lundman.zfs : _spa_sync_deferred_frees + 0x57
0xffffff90a06dbe20 : 0xffffff7f86b8fd10 net.lundman.zfs : _spa_sync + 0x480

@lundman
Copy link
Contributor Author

lundman commented Jul 6, 2016

git bisect and zfs-tester seem to conclude that this is caused by
082fa81 Implement large_dnode pool feature so we have removed it from master for now, and placed it in branch large_dnode.

Since upstream ZOL is not having these issues, it is probably due to a porting problem.
openzfs/zfs#3542

@ilovezfs
Copy link
Contributor

ilovezfs commented Jul 6, 2016

@evansus you need to remove the offending commit from your branches. master was force pushed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants