From 5cb3d15ada2910ef174e0cf2d13ca0f81cdf24b5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 12:47:38 -0600 Subject: [PATCH 01/11] Add Kokkos support for atom sorting on device --- src/KOKKOS/atom_kokkos.cpp | 137 ++++++++++------------- src/KOKKOS/atom_kokkos.h | 4 + src/KOKKOS/atom_vec_angle_kokkos.cpp | 29 +++++ src/KOKKOS/atom_vec_angle_kokkos.h | 1 + src/KOKKOS/atom_vec_atomic_kokkos.cpp | 18 +++ src/KOKKOS/atom_vec_atomic_kokkos.h | 1 + src/KOKKOS/atom_vec_bond_kokkos.cpp | 24 ++++ src/KOKKOS/atom_vec_bond_kokkos.h | 1 + src/KOKKOS/atom_vec_charge_kokkos.cpp | 19 ++++ src/KOKKOS/atom_vec_charge_kokkos.h | 1 + src/KOKKOS/atom_vec_dipole_kokkos.cpp | 20 ++++ src/KOKKOS/atom_vec_dipole_kokkos.h | 1 + src/KOKKOS/atom_vec_dpd_kokkos.cpp | 24 ++++ src/KOKKOS/atom_vec_dpd_kokkos.h | 1 + src/KOKKOS/atom_vec_full_kokkos.cpp | 42 +++++++ src/KOKKOS/atom_vec_full_kokkos.h | 1 + src/KOKKOS/atom_vec_hybrid_kokkos.cpp | 10 ++ src/KOKKOS/atom_vec_hybrid_kokkos.h | 1 + src/KOKKOS/atom_vec_kokkos.h | 8 +- src/KOKKOS/atom_vec_molecular_kokkos.cpp | 41 +++++++ src/KOKKOS/atom_vec_molecular_kokkos.h | 1 + src/KOKKOS/atom_vec_sphere_kokkos.cpp | 21 ++++ src/KOKKOS/atom_vec_sphere_kokkos.h | 1 + src/KOKKOS/atom_vec_spin_kokkos.cpp | 19 ++++ src/KOKKOS/atom_vec_spin_kokkos.h | 2 +- src/KOKKOS/fix_acks2_reaxff_kokkos.cpp | 20 ++++ src/KOKKOS/fix_acks2_reaxff_kokkos.h | 4 +- src/KOKKOS/fix_langevin_kokkos.cpp | 20 ++++ src/KOKKOS/fix_langevin_kokkos.h | 4 +- src/KOKKOS/fix_minimize_kokkos.cpp | 17 +++ src/KOKKOS/fix_minimize_kokkos.h | 4 +- src/KOKKOS/fix_neigh_history_kokkos.cpp | 24 +++- src/KOKKOS/fix_neigh_history_kokkos.h | 1 + src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 21 +++- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 1 + src/KOKKOS/fix_shake_kokkos.cpp | 24 +++- src/KOKKOS/fix_shake_kokkos.h | 1 + src/KOKKOS/fix_wall_gran_kokkos.cpp | 18 ++- src/KOKKOS/fix_wall_gran_kokkos.h | 1 + src/KOKKOS/kokkos.cpp | 25 +++++ src/KOKKOS/kokkos.h | 2 + src/KOKKOS/kokkos_base.h | 7 ++ src/atom.cpp | 5 + src/atom.h | 2 +- src/fix.cpp | 2 +- src/fix.h | 1 + 46 files changed, 545 insertions(+), 87 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 938d9709e93..9bbbb2acc33 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -22,6 +22,11 @@ #include "kokkos.h" #include "memory_kokkos.h" #include "update.h" +#include "kokkos_base.h" +#include "modify.h" +#include "fix.h" + +#include using namespace LAMMPS_NS; @@ -103,6 +108,15 @@ AtomKokkos::~AtomKokkos() /* ---------------------------------------------------------------------- */ +void AtomKokkos::init() +{ + Atom::init(); + + sort_classic = lmp->kokkos->sort_classic; +} + +/* ---------------------------------------------------------------------- */ + void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) { if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask); @@ -140,8 +154,36 @@ void AtomKokkos::allocate_type_arrays() void AtomKokkos::sort() { - int i, m, n, ix, iy, iz, ibin, empty; + // check if all fixes with atom-based arrays support sort on device + + if (!sort_classic) { + int flag = 1; + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + if (!fix_iextra->sort_device) { + flag = 0; + break; + } + } + if (!flag) { + if (comm->me == 0) { + error->warning(FLERR,"Fix with atom-based arrays not compatible with Kokkos sorting on device, " + "switching to classic host sorting"); + } + } + } + + if (sort_classic) { + sync(Host, ALL_MASK); + Atom::sort(); + modified(Host, ALL_MASK); + } else sort_device(); +} + +/* ---------------------------------------------------------------------- */ +void AtomKokkos::sort_device() +{ // set next timestep for sorting to take place nextsort = (update->ntimestep / sortfreq) * sortfreq + sortfreq; @@ -151,88 +193,32 @@ void AtomKokkos::sort() if (domain->box_change) setup_sort_bins(); if (nbins == 1) return; - // reallocate per-atom vectors if needed - - if (atom->nmax > maxnext) { - memory->destroy(next); - memory->destroy(permute); - maxnext = atom->nmax; - memory->create(next, maxnext, "atom:next"); - memory->create(permute, maxnext, "atom:permute"); - } + auto d_x = k_x.d_view; + sync(Device, X_MASK); - // ensure there is one extra atom location at end of arrays for swaps + // sort - if (nlocal == nmax) avec->grow(0); + int max_bins[3]; + max_bins[0] = nbinx; + max_bins[1] = nbiny; + max_bins[2] = nbinz; - sync(Host, ALL_MASK); - modified(Host, ALL_MASK); + using KeyViewType = DAT::t_x_array; + using BinOp = Kokkos::BinOp3DReverse; + BinOp binner(max_bins, bboxlo, bboxhi); + Kokkos::BinSort Sorter(d_x, 0, nlocal, binner, false); + Sorter.create_permute_vector(LMPDeviceType()); - // bin atoms in reverse order so linked list will be in forward order - - for (i = 0; i < nbins; i++) binhead[i] = -1; - - HAT::t_x_array_const h_x = k_x.view(); - for (i = nlocal - 1; i >= 0; i--) { - ix = static_cast((h_x(i, 0) - bboxlo[0]) * bininvx); - iy = static_cast((h_x(i, 1) - bboxlo[1]) * bininvy); - iz = static_cast((h_x(i, 2) - bboxlo[2]) * bininvz); - ix = MAX(ix, 0); - iy = MAX(iy, 0); - iz = MAX(iz, 0); - ix = MIN(ix, nbinx - 1); - iy = MIN(iy, nbiny - 1); - iz = MIN(iz, nbinz - 1); - ibin = iz * nbiny * nbinx + iy * nbinx + ix; - next[i] = binhead[ibin]; - binhead[ibin] = i; - } + avecKK->sort_kokkos(Sorter); - // permute = desired permutation of atoms - // permute[I] = J means Ith new atom will be Jth old atom + if (atom->nextra_grow) { + for (int iextra = 0; iextra < atom->nextra_grow; iextra++) { + auto fix_iextra = modify->fix[atom->extra_grow[iextra]]; + KokkosBase *kkbase = dynamic_cast(fix_iextra); - n = 0; - for (m = 0; m < nbins; m++) { - i = binhead[m]; - while (i >= 0) { - permute[n++] = i; - i = next[i]; + kkbase->sort_kokkos(Sorter); } } - - // current = current permutation, just reuse next vector - // current[I] = J means Ith current atom is Jth old atom - - int *current = next; - for (i = 0; i < nlocal; i++) current[i] = i; - - // reorder local atom list, when done, current = permute - // perform "in place" using copy() to extra atom location at end of list - // inner while loop processes one cycle of the permutation - // copy before inner-loop moves an atom to end of atom list - // copy after inner-loop moves atom at end of list back into list - // empty = location in atom list that is currently empty - - for (i = 0; i < nlocal; i++) { - if (current[i] == permute[i]) continue; - avec->copy(i, nlocal, 0); - empty = i; - while (permute[empty] != i) { - avec->copy(permute[empty], empty, 0); - empty = current[empty] = permute[empty]; - } - avec->copy(nlocal, empty, 0); - current[empty] = permute[empty]; - } - - // sanity check that current = permute - - //int flag = 0; - //for (i = 0; i < nlocal; i++) - // if (current[i] != permute[i]) flag = 1; - //int flagall; - //MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - //if (flagall) errorX->all(FLERR,"Atom sort did not operate correctly"); } /* ---------------------------------------------------------------------- @@ -241,7 +227,6 @@ void AtomKokkos::sort() void AtomKokkos::grow(unsigned int mask) { - if (mask & SPECIAL_MASK) { memoryKK->destroy_kokkos(k_special, special); sync(Device, mask); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 8d2ae47f0e8..84bb7a56ebd 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -22,6 +22,8 @@ namespace LAMMPS_NS { class AtomKokkos : public Atom { public: + bool sort_classic; + DAT::tdual_tagint_1d k_tag; DAT::tdual_int_1d k_type, k_mask; DAT::tdual_imageint_1d k_image; @@ -108,6 +110,7 @@ class AtomKokkos : public Atom { return local; } + void init() override; void allocate_type_arrays() override; void sync(const ExecutionSpace space, unsigned int mask); void modified(const ExecutionSpace space, unsigned int mask); @@ -119,6 +122,7 @@ class AtomKokkos : public Atom { virtual void deallocate_topology(); void sync_modify(ExecutionSpace, unsigned int, unsigned int) override; private: + void sort_device(); class AtomVec *new_avec(const std::string &, int, int &) override; }; diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index f132298c2d3..dd6be164c07 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -155,6 +155,35 @@ void AtomVecAngleKokkos::grow_pointers() h_angle_atom3 = atomKK->k_angle_atom3.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_molecule); + Sorter.sort(LMPDeviceType(), d_num_bond); + Sorter.sort(LMPDeviceType(), d_bond_type); + Sorter.sort(LMPDeviceType(), d_bond_atom); + Sorter.sort(LMPDeviceType(), d_nspecial); + Sorter.sort(LMPDeviceType(), d_special); + Sorter.sort(LMPDeviceType(), d_num_angle); + Sorter.sort(LMPDeviceType(), d_angle_type); + Sorter.sort(LMPDeviceType(), d_angle_atom1); + Sorter.sort(LMPDeviceType(), d_angle_atom2); + Sorter.sort(LMPDeviceType(), d_angle_atom3); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h index a1c20c103b5..44f1d824b2b 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.h +++ b/src/KOKKOS/atom_vec_angle_kokkos.h @@ -34,6 +34,7 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, const int & iswap, const DAT::tdual_xfloat_2d &buf, diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index e37779ace51..1ea8377a688 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -100,6 +100,24 @@ void AtomVecAtomicKokkos::grow_pointers() h_f = atomKK->k_f.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecAtomicKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h index f72af735377..07631dda98e 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.h +++ b/src/KOKKOS/atom_vec_atomic_kokkos.h @@ -35,6 +35,7 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index dcbe1876f42..c45bdedf385 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -126,6 +126,30 @@ void AtomVecBondKokkos::grow_pointers() h_bond_atom = atomKK->k_bond_atom.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecBondKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_molecule); + Sorter.sort(LMPDeviceType(), d_num_bond); + Sorter.sort(LMPDeviceType(), d_bond_type); + Sorter.sort(LMPDeviceType(), d_bond_atom); + Sorter.sort(LMPDeviceType(), d_nspecial); + Sorter.sort(LMPDeviceType(), d_special); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h index fc3f02e916b..5ed59432de5 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.h +++ b/src/KOKKOS/atom_vec_bond_kokkos.h @@ -34,6 +34,7 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index a9975c1bb4b..22fc63ff910 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -106,6 +106,25 @@ void AtomVecChargeKokkos::grow_pointers() h_q = atomKK->k_q.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecChargeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_q); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h index 072b5e6894a..397a5ee4c01 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.h +++ b/src/KOKKOS/atom_vec_charge_kokkos.h @@ -35,6 +35,7 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp index b2357ccb41f..ad06570cdcc 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp @@ -107,6 +107,26 @@ void AtomVecDipoleKokkos::grow_pointers() h_mu = atomKK->k_mu.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecDipoleKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_q); + Sorter.sort(LMPDeviceType(), d_mu); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h index f9abfc9a2ae..97ec92c6c6f 100644 --- a/src/KOKKOS/atom_vec_dipole_kokkos.h +++ b/src/KOKKOS/atom_vec_dipole_kokkos.h @@ -35,6 +35,7 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 6fa32773508..eda26a92dc2 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -135,6 +135,30 @@ void AtomVecDPDKokkos::grow_pointers() h_duChem = atomKK->k_duChem.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecDPDKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_dpdTheta); + Sorter.sort(LMPDeviceType(), d_uCond); + Sorter.sort(LMPDeviceType(), d_uMech); + Sorter.sort(LMPDeviceType(), d_uChem); + Sorter.sort(LMPDeviceType(), d_uCG); + Sorter.sort(LMPDeviceType(), d_uCGnew); + + atomKK->modified(Device, ALL_MASK & ~F_MASK & ~DPDRHO_MASK & ~DUCHEM_MASK & ~DVECTOR_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h index c605246ebae..a76d7f908a6 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.h +++ b/src/KOKKOS/atom_vec_dpd_kokkos.h @@ -35,6 +35,7 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, const int & iswap, const DAT::tdual_xfloat_2d &buf, diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index bb61c7fb46b..829ebc75e64 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -225,6 +225,48 @@ void AtomVecFullKokkos::grow_pointers() h_improper_atom4 = atomKK->k_improper_atom4.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecFullKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_q); + Sorter.sort(LMPDeviceType(), d_molecule); + Sorter.sort(LMPDeviceType(), d_num_bond); + Sorter.sort(LMPDeviceType(), d_bond_type); + Sorter.sort(LMPDeviceType(), d_bond_atom); + Sorter.sort(LMPDeviceType(), d_nspecial); + Sorter.sort(LMPDeviceType(), d_special); + Sorter.sort(LMPDeviceType(), d_num_angle); + Sorter.sort(LMPDeviceType(), d_angle_type); + Sorter.sort(LMPDeviceType(), d_angle_atom1); + Sorter.sort(LMPDeviceType(), d_angle_atom2); + Sorter.sort(LMPDeviceType(), d_angle_atom3); + Sorter.sort(LMPDeviceType(), d_num_dihedral); + Sorter.sort(LMPDeviceType(), d_dihedral_type); + Sorter.sort(LMPDeviceType(), d_dihedral_atom1); + Sorter.sort(LMPDeviceType(), d_dihedral_atom2); + Sorter.sort(LMPDeviceType(), d_dihedral_atom3); + Sorter.sort(LMPDeviceType(), d_dihedral_atom4); + Sorter.sort(LMPDeviceType(), d_num_improper); + Sorter.sort(LMPDeviceType(), d_improper_type); + Sorter.sort(LMPDeviceType(), d_improper_atom1); + Sorter.sort(LMPDeviceType(), d_improper_atom2); + Sorter.sort(LMPDeviceType(), d_improper_atom3); + Sorter.sort(LMPDeviceType(), d_improper_atom4); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h index e6fcfd7e405..4937ef4152f 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.h +++ b/src/KOKKOS/atom_vec_full_kokkos.h @@ -34,6 +34,7 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index 03311d1c324..4e01ab57948 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -51,6 +51,16 @@ void AtomVecHybridKokkos::grow(int n) f = atom->f; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + for (int k = 0; k < nstyles; k++) + (dynamic_cast(styles[k]))->sort_kokkos(Sorter); +} + /* ---------------------------------------------------------------------- */ int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/, diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h index 862b43d80b0..6f81c936736 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.h +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h @@ -34,6 +34,7 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid { AtomVecHybridKokkos(class LAMMPS *); void grow(int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, const int & iswap, diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index dfb4aecfcf3..ef6a3fcbc82 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -20,6 +20,8 @@ #include "kokkos_type.h" #include +#include + namespace LAMMPS_NS { union d_ubuf { @@ -38,6 +40,11 @@ class AtomVecKokkos : virtual public AtomVec { AtomVecKokkos(class LAMMPS *); ~AtomVecKokkos() override; + using KeyViewType = DAT::t_x_array; + using BinOp = Kokkos::BinOp3DReverse; + virtual void + sort_kokkos(Kokkos::BinSort &Sorter) = 0; + virtual void sync(ExecutionSpace space, unsigned int mask) = 0; virtual void modified(ExecutionSpace space, unsigned int mask) = 0; virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0; @@ -117,7 +124,6 @@ class AtomVecKokkos : virtual public AtomVec { ExecutionSpace space, DAT::tdual_int_1d &k_indices) = 0; - int no_comm_vel_flag,no_border_vel_flag; int unpack_exchange_indices_flag; int size_exchange; diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 1bb75a1906e..471dd0ad587 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -217,6 +217,47 @@ void AtomVecMolecularKokkos::grow_pointers() h_improper_atom4 = atomKK->k_improper_atom4.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_molecule); + Sorter.sort(LMPDeviceType(), d_num_bond); + Sorter.sort(LMPDeviceType(), d_bond_type); + Sorter.sort(LMPDeviceType(), d_bond_atom); + Sorter.sort(LMPDeviceType(), d_nspecial); + Sorter.sort(LMPDeviceType(), d_special); + Sorter.sort(LMPDeviceType(), d_num_angle); + Sorter.sort(LMPDeviceType(), d_angle_type); + Sorter.sort(LMPDeviceType(), d_angle_atom1); + Sorter.sort(LMPDeviceType(), d_angle_atom2); + Sorter.sort(LMPDeviceType(), d_angle_atom3); + Sorter.sort(LMPDeviceType(), d_num_dihedral); + Sorter.sort(LMPDeviceType(), d_dihedral_type); + Sorter.sort(LMPDeviceType(), d_dihedral_atom1); + Sorter.sort(LMPDeviceType(), d_dihedral_atom2); + Sorter.sort(LMPDeviceType(), d_dihedral_atom3); + Sorter.sort(LMPDeviceType(), d_dihedral_atom4); + Sorter.sort(LMPDeviceType(), d_num_improper); + Sorter.sort(LMPDeviceType(), d_improper_type); + Sorter.sort(LMPDeviceType(), d_improper_atom1); + Sorter.sort(LMPDeviceType(), d_improper_atom2); + Sorter.sort(LMPDeviceType(), d_improper_atom3); + Sorter.sort(LMPDeviceType(), d_improper_atom4); + + atomKK->modified(Device, ALL_MASK & ~F_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h index af8a2258e15..eb976e90735 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.h +++ b/src/KOKKOS/atom_vec_molecular_kokkos.h @@ -34,6 +34,7 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, const int & iswap, const DAT::tdual_xfloat_2d &buf, diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index 40af56489b8..a9b64fc8354 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -123,6 +123,27 @@ void AtomVecSphereKokkos::grow_pointers() h_torque = atomKK->k_torque.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecSphereKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_radius); + Sorter.sort(LMPDeviceType(), d_rmass); + Sorter.sort(LMPDeviceType(), d_omega); + + atomKK->modified(Device, ALL_MASK & ~F_MASK & ~TORQUE_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h index 32357fb600e..34529320d9b 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.h +++ b/src/KOKKOS/atom_vec_sphere_kokkos.h @@ -35,6 +35,7 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere { void grow(int) override; void grow_pointers() override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist, const int & iswap, diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index 662072ead99..ac1c5a9294a 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -129,6 +129,25 @@ void AtomVecSpinKokkos::grow_pointers() h_fm_long = atomKK->k_fm_long.h_view; } +/* ---------------------------------------------------------------------- + sort atom arrays on device +------------------------------------------------------------------------- */ + +void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + atomKK->sync(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK); + + Sorter.sort(LMPDeviceType(), d_tag); + Sorter.sort(LMPDeviceType(), d_type); + Sorter.sort(LMPDeviceType(), d_mask); + Sorter.sort(LMPDeviceType(), d_image); + Sorter.sort(LMPDeviceType(), d_x); + Sorter.sort(LMPDeviceType(), d_v); + Sorter.sort(LMPDeviceType(), d_sp; + + atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h index 6a48d195a27..d14d01fb628 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.h +++ b/src/KOKKOS/atom_vec_spin_kokkos.h @@ -34,7 +34,7 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin { AtomVecSpinKokkos(class LAMMPS *); void grow(int) override; void grow_pointers() override; - // input lists to be checked + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap, int pbc_flag, int *pbc, ExecutionSpace space) override; diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 3a2447461e2..d49d60d6ce9 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -49,6 +49,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : FixACKS2ReaxFF(lmp, narg, arg) { kokkosable = 1; + sort_device = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -1912,6 +1913,25 @@ void FixACKS2ReaxFFKokkos::copy_arrays(int i, int j, int delflag) k_s_hist_X.template modify(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_s_hist.sync_device(); + k_s_hist_X.sync_device(); + + Sorter.sort(LMPDeviceType(), k_s_hist.d_view); + Sorter.sort(LMPDeviceType(), k_s_hist_X.d_view); + + k_s_hist.modify_device(); + k_s_hist_X.modify_device(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h index 664f4dcb816..f6f787523d9 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -27,6 +27,7 @@ FixStyle(acks2/reax/kk/host,FixACKS2ReaxFFKokkos); #include "fix_acks2_reaxff.h" #include "kokkos_type.h" +#include "kokkos_base.h" #include "neigh_list.h" #include "neigh_list_kokkos.h" @@ -57,7 +58,7 @@ struct TagACKS2ZeroQGhosts{}; struct TagACKS2CalculateQ{}; template -class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF { +class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { public: typedef DeviceType device_type; typedef double value_type; @@ -252,6 +253,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF { void grow_arrays(int); void copy_arrays(int, int, int); + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_exchange(int, double *); int unpack_exchange(int, double *); void get_chi_field(); diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index b7305644c98..6b8ffbb2ab6 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -44,6 +44,7 @@ FixLangevinKokkos::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a FixLangevin(lmp, narg, arg),rand_pool(seed + comm->me) { kokkosable = 1; + sort_device = 1; atomKK = (AtomKokkos *) atom; int ntypes = atomKK->ntypes; @@ -889,6 +890,25 @@ void FixLangevinKokkos::copy_arrays(int i, int j, int /*delflag*/) } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixLangevinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_franprev.sync_device(); + k_lv.sync_device(); + + Sorter.sort(LMPDeviceType(), k_franprev.d_view); + Sorter.sort(LMPDeviceType(), k_lv.d_view); + + k_franprev.modify_device(); + k_lv.modify_device(); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h index f7142e62862..3f23f57cc34 100644 --- a/src/KOKKOS/fix_langevin_kokkos.h +++ b/src/KOKKOS/fix_langevin_kokkos.h @@ -25,6 +25,7 @@ FixStyle(langevin/kk/host,FixLangevinKokkos); #include "fix_langevin.h" #include "kokkos_type.h" +#include "kokkos_base.h" #include "Kokkos_Random.hpp" #include "comm_kokkos.h" @@ -61,7 +62,7 @@ namespace LAMMPS_NS { template struct FixLangevinKokkosTallyEnergyFunctor; template - class FixLangevinKokkos : public FixLangevin { + class FixLangevinKokkos : public FixLangevin, public KokkosBase { public: FixLangevinKokkos(class LAMMPS *, int, char **); ~FixLangevinKokkos() override; @@ -73,6 +74,7 @@ namespace LAMMPS_NS { void reset_dt() override; void grow_arrays(int) override; void copy_arrays(int i, int j, int delflag) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; double compute_scalar() override; void end_of_step() override; diff --git a/src/KOKKOS/fix_minimize_kokkos.cpp b/src/KOKKOS/fix_minimize_kokkos.cpp index 07c78e86a37..5f4b62d67d5 100644 --- a/src/KOKKOS/fix_minimize_kokkos.cpp +++ b/src/KOKKOS/fix_minimize_kokkos.cpp @@ -27,6 +27,8 @@ using namespace FixConst; FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) : FixMinimize(lmp, narg, arg) { + kokkosable = 1; + sort_device = 1; atomKK = (AtomKokkos *) atom; } @@ -217,6 +219,21 @@ void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/) k_vectors.modify(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +void FixMinimizeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_vectors.sync_device(); + + Sorter.sort(LMPDeviceType(), k_vectors.d_view); + + k_vectors.modify_device(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_minimize_kokkos.h b/src/KOKKOS/fix_minimize_kokkos.h index e84cbd1ec29..121711b4e4b 100644 --- a/src/KOKKOS/fix_minimize_kokkos.h +++ b/src/KOKKOS/fix_minimize_kokkos.h @@ -25,10 +25,11 @@ FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos); #include "fix_minimize.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { -class FixMinimizeKokkos : public FixMinimize { +class FixMinimizeKokkos : public FixMinimize, public KokkosBase { friend class MinLineSearchKokkos; public: @@ -38,6 +39,7 @@ class FixMinimizeKokkos : public FixMinimize { void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index 198ab555f30..ba189e38d93 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -32,7 +32,7 @@ FixNeighHistoryKokkos::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, FixNeighHistory(lmp, narg, arg) { kokkosable = 1; - exchange_comm_device = 1; + exchange_comm_device = sort_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -325,6 +325,28 @@ void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag* k_valuepartner.modify_host(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixNeighHistoryKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_npartner.sync_device(); + k_partner.sync_device(); + k_valuepartner.sync_device(); + + Sorter.sort(LMPDeviceType(), k_npartner.d_view); + Sorter.sort(LMPDeviceType(), k_partner.d_view); + Sorter.sort(LMPDeviceType(), k_valuepartner.d_view); + + k_npartner.modify_device(); + k_partner.modify_device(); + k_valuepartner.modify_device(); +} + /* ---------------------------------------------------------------------- pack values in local atom-based array for exchange with another proc ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h index 6f29c817b82..9c07a953c4a 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.h +++ b/src/KOKKOS/fix_neigh_history_kokkos.h @@ -48,6 +48,7 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase { void post_neighbor() override; void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; double memory_usage() override; diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index c1695843a7f..e003f4b4f9f 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -58,7 +58,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : { kokkosable = 1; comm_forward = comm_reverse = 2; // fused - forward_comm_device = exchange_comm_device = 1; + forward_comm_device = exchange_comm_device = sort_device = 1; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; @@ -1338,6 +1338,25 @@ void FixQEqReaxFFKokkos::copy_arrays(int i, int j, int /*delflag*/) k_t_hist.template modify(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixQEqReaxFFKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_s_hist.sync_device(); + k_t_hist.sync_device(); + + Sorter.sort(LMPDeviceType(), k_s_hist.d_view); + Sorter.sort(LMPDeviceType(), k_t_hist.d_view); + + k_s_hist.modify_device(); + k_t_hist.modify_device(); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 29faefe56b3..9bc38b04921 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -280,6 +280,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; void get_chi_field() override; diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index b00195e5fda..465536d63f3 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -53,7 +53,7 @@ FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : FixShake(lmp, narg, arg) { kokkosable = 1; - forward_comm_device = exchange_comm_device = 1; + forward_comm_device = exchange_comm_device = sort_device = 1; maxexchange = 9; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -1484,6 +1484,28 @@ void FixShakeKokkos::copy_arrays(int i, int j, int delflag) k_shake_type.modify_host(); } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixShakeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_shake_flag.sync_device(); + k_shake_atom.sync_device(); + k_shake_type.sync_device(); + + Sorter.sort(LMPDeviceType(), k_shake_flag.d_view); + Sorter.sort(LMPDeviceType(), k_shake_atom.d_view); + Sorter.sort(LMPDeviceType(), k_shake_type.d_view); + + k_shake_flag.modify_device(); + k_shake_atom.modify_device(); + k_shake_type.modify_device(); +} + /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h index 650ad522879..185e69ce865 100644 --- a/src/KOKKOS/fix_shake_kokkos.h +++ b/src/KOKKOS/fix_shake_kokkos.h @@ -61,6 +61,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase { void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; void set_arrays(int) override; void update_arrays(int, int) override; void set_molecule(int, tagint, int, double *, double *, double *) override; diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 1569065bbee..04994776498 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -32,7 +32,7 @@ FixWallGranKokkos::FixWallGranKokkos(LAMMPS *lmp, int narg, char **a FixWallGranOld(lmp, narg, arg) { kokkosable = 1; - exchange_comm_device = 1; + exchange_comm_device = sort_device = 1; maxexchange = size_history; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; @@ -313,6 +313,22 @@ void FixWallGranKokkos::copy_arrays(int i, int j, int delflag) } } +/* ---------------------------------------------------------------------- + sort local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixWallGranKokkos::sort_kokkos(Kokkos::BinSort &Sorter) +{ + // always sort on the device + + k_history_one.sync_device(); + + Sorter.sort(LMPDeviceType(), k_history_one.d_view); + + k_history_one.modify_device(); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h index 4d80528fb88..c7d566ec726 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.h +++ b/src/KOKKOS/fix_wall_gran_kokkos.h @@ -47,6 +47,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase { void post_force(int) override; void grow_arrays(int) override; void copy_arrays(int, int, int) override; + void sort_kokkos(Kokkos::BinSort &Sorter) override; int pack_exchange(int, double *) override; int unpack_exchange(int, double *) override; diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 8b45c786e50..91ea6d37acb 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -93,6 +93,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) reverse_pair_comm_changed = 0; forward_fix_comm_changed = 0; reverse_comm_changed = 0; + sort_changed = 0; delete memory; memory = new MemoryKokkos(lmp); @@ -250,6 +251,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0; + sort_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } else { @@ -264,6 +266,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1; + sort_classic = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } @@ -478,6 +481,14 @@ void KokkosLMP::accelerator(int narg, char **arg) } else error->all(FLERR,"Illegal package kokkos command"); reverse_comm_changed = 0; iarg += 2; + } else if (strcmp(arg[iarg],"sort") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + else if (strcmp(arg[iarg+1],"no") == 0) sort_classic = 1; + else if (strcmp(arg[iarg+1],"host") == 0) sort_classic = 1; + else if (strcmp(arg[iarg+1],"device") == 0) sort_classic = 0; + else error->all(FLERR,"Illegal package kokkos command"); + sort_changed = 0; + iarg += 2; } else if ((strcmp(arg[iarg],"gpu/aware") == 0) || (strcmp(arg[iarg],"cuda/aware") == 0)) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); @@ -533,6 +544,13 @@ void KokkosLMP::accelerator(int narg, char **arg) } } + if (lmp->pair_only_flag) { + if (sort_classic == 0) { + sort_classic = 1; + sort_changed = 1; + } + } + // if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back if (gpu_aware_flag && !lmp->pair_only_flag) { @@ -562,6 +580,13 @@ void KokkosLMP::accelerator(int narg, char **arg) } } + if (lmp->pair_only_flag) { + if (sort_changed) { + sort_classic = 0; + sort_changed = 0; + } + } + #endif // set newton flags diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 08b6730e503..adfc1fc646e 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -33,6 +33,7 @@ class KokkosLMP : protected Pointers { int reverse_pair_comm_classic; int forward_fix_comm_classic; int reverse_comm_classic; + int sort_classic; int exchange_comm_on_host; int forward_comm_on_host; int reverse_comm_on_host; @@ -42,6 +43,7 @@ class KokkosLMP : protected Pointers { int reverse_pair_comm_changed; int forward_fix_comm_changed; int reverse_comm_changed; + int sort_changed; int nthreads,ngpus; int auto_sync; int gpu_aware_flag; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 463b271269f..b78c88eacdc 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -17,6 +17,8 @@ #include "kokkos_type.h" +#include + namespace LAMMPS_NS { class KokkosBase { @@ -51,6 +53,11 @@ class KokkosBase { virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, DAT::tdual_int_1d & /*indices*/, int /*nrecv*/, ExecutionSpace /*space*/) {} + + using KeyViewType = DAT::t_x_array; + using BinOp = Kokkos::BinOp3DReverse; + virtual void + sort_kokkos(Kokkos::BinSort & /*Sorter*/) {} }; } diff --git a/src/atom.cpp b/src/atom.cpp index 29e1bb6305a..00dc0369b0f 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -33,6 +33,7 @@ #include "tokenizer.h" #include "update.h" #include "variable.h" +#include "accelerator_kokkos.h" #include "library.h" @@ -2356,6 +2357,10 @@ void Atom::setup_sort_bins() return; } + if (userbinsize == 0 && lmp->kokkos && lmp->kokkos->ngpus > 0) { + binsize = neighbor->cutneighmax; + } + #ifdef LMP_GPU if (userbinsize == 0.0) { auto ifix = dynamic_cast(modify->get_fix_by_id("package_gpu")); diff --git a/src/atom.h b/src/atom.h index d2e80301086..810a2829ed9 100644 --- a/src/atom.h +++ b/src/atom.h @@ -312,7 +312,7 @@ class Atom : protected Pointers { void create_avec(const std::string &, int, char **, int); virtual AtomVec *new_avec(const std::string &, int, int &); - void init(); + virtual void init(); void setup(); std::string get_style(); diff --git a/src/fix.cpp b/src/fix.cpp index 1d41ad3943e..f0cc8a20ea9 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -109,7 +109,7 @@ Fix::Fix(LAMMPS *lmp, int /*narg*/, char **arg) : datamask_modify = ALL_MASK; kokkosable = 0; - forward_comm_device = exchange_comm_device = 0; + forward_comm_device = exchange_comm_device = sort_device = 0; copymode = 0; } diff --git a/src/fix.h b/src/fix.h index b47cfb2f4ab..334f61ff2b2 100644 --- a/src/fix.h +++ b/src/fix.h @@ -132,6 +132,7 @@ class Fix : protected Pointers { int kokkosable; // 1 if Kokkos fix int forward_comm_device; // 1 if forward comm on Device int exchange_comm_device; // 1 if exchange comm on Device + int sort_device; // 1 if sort on Device ExecutionSpace execution_space; unsigned int datamask_read, datamask_modify; From f5e55bb6d95a3a767931926f96453edfb4035a09 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 12:56:47 -0600 Subject: [PATCH 02/11] Need to set var --- src/KOKKOS/atom_kokkos.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 9bbbb2acc33..cb932a79edc 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -170,6 +170,7 @@ void AtomKokkos::sort() error->warning(FLERR,"Fix with atom-based arrays not compatible with Kokkos sorting on device, " "switching to classic host sorting"); } + sort_classic = true; } } From cf2e55f4acce00e12f7ee460c91786ba36277125 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 13:16:38 -0600 Subject: [PATCH 03/11] Update docs --- doc/src/atom_modify.rst | 4 +++- doc/src/package.rst | 28 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/doc/src/atom_modify.rst b/doc/src/atom_modify.rst index 9049a24fde7..f845c15b24e 100644 --- a/doc/src/atom_modify.rst +++ b/doc/src/atom_modify.rst @@ -176,7 +176,9 @@ larger than 1 million, otherwise the default is hash. By default, a "first" group is not defined. By default, sorting is enabled with a frequency of 1000 and a binsize of 0.0, which means the neighbor cutoff will be used to set the bin size. If no neighbor cutoff is -defined, sorting will be turned off. +defined, sorting will be turned off. When running with the KOKKOS +package on one or more GPUs, the default binsize for sorting is twice +the CPU default. ---------- diff --git a/doc/src/package.rst b/doc/src/package.rst index 76bf20a97fa..14d978c3c9a 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -71,7 +71,7 @@ Syntax *no_affinity* values = none *kokkos* args = keyword value ... zero or more keyword/value pairs may be appended - keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *gpu/aware* or *pair/only* + keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *neigh/transpose* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/pair/forward* or *comm/fix/forward* or *comm/reverse* or *comm/pair/reverse* or *sort* or *gpu/aware* or *pair/only* *neigh* value = *full* or *half* full = full neighbor list half = half neighbor list built in thread-safe manner @@ -102,6 +102,9 @@ Syntax *comm/pair/reverse* value = *no* or *device* *no* = perform communication pack/unpack in non-KOKKOS mode *device* = perform pack/unpack on device (e.g. on GPU) + *sort* value = *no* or *device* + *no* = perform atom sorting in non-KOKKOS mode + *device* = perform atom sorting on device (e.g. on GPU) *gpu/aware* = *off* or *on* *off* = do not use GPU-aware MPI *on* = use GPU-aware MPI (default) @@ -554,6 +557,17 @@ pack/unpack communicated data. When running small systems on a GPU, performing the exchange pack/unpack on the host CPU can give speedup since it reduces the number of CUDA kernel launches. +The *sort* keyword determines whether the host or device performs atom +sorting, see the :doc:`atom_modify sort ` command. The +value options for the *sort* keyword are *no* or *device* similar to the +*comm* keywords above. If a value of *host* is used it will be +automatically be changed to *no* since the *sort* keyword doesn't +support *host* mode. The value of *no* will also always be used when +running on the CPU, i.e. setting the value to *device* will have no +effect if the simulation is running on the CPU. Not all fix styles with +extra atom data support *device* mode and in that case a warning will be +given and atom sorting will run in *no* mode instead. + The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When this keyword is set to *on*, buffers in GPU memory are passed directly through MPI send/receive calls. This reduces overhead of first copying @@ -705,12 +719,12 @@ script or via the "-pk intel" :doc:`command-line switch `. For the KOKKOS package, the option defaults for GPUs are neigh = full, neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default -value, comm = device, neigh/transpose = off, gpu/aware = on. When -LAMMPS can safely detect that GPU-aware MPI is not available, the -default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the -option defaults are neigh = half, neigh/qeq = half, newton = on, -binsize = 0.0, and comm = no. The option neigh/thread = on when there -are 16K atoms or less on an MPI rank, otherwise it is "off". These +value, comm = device, sort = device, neigh/transpose = off, gpu/aware = +on. When LAMMPS can safely detect that GPU-aware MPI is not available, +the default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the +option defaults are neigh = half, neigh/qeq = half, newton = on, binsize += 0.0, comm = no, and sort = no. The option neigh/thread = on when +there are 16K atoms or less on an MPI rank, otherwise it is "off". These settings are made automatically by the required "-k on" :doc:`command-line switch `. You can change them by using the package kokkos command in your input script or via the :doc:`-pk From b58368dc341cd706a7e543ca8ed111416da8e9c6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 13:31:37 -0600 Subject: [PATCH 04/11] whitespace --- src/KOKKOS/fix_acks2_reaxff_kokkos.cpp | 10 +++++----- src/KOKKOS/fix_langevin_kokkos.cpp | 2 +- src/KOKKOS/fix_minimize_kokkos.cpp | 10 +++++----- src/KOKKOS/fix_neigh_history_kokkos.cpp | 2 +- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 2 +- src/KOKKOS/fix_shake_kokkos.cpp | 2 +- src/KOKKOS/fix_wall_gran_kokkos.cpp | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index d49d60d6ce9..1280a4d9a77 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -1919,15 +1919,15 @@ void FixACKS2ReaxFFKokkos::copy_arrays(int i, int j, int delflag) template void FixACKS2ReaxFFKokkos::sort_kokkos(Kokkos::BinSort &Sorter) -{ - // always sort on the device - +{ + // always sort on the device + k_s_hist.sync_device(); k_s_hist_X.sync_device(); - + Sorter.sort(LMPDeviceType(), k_s_hist.d_view); Sorter.sort(LMPDeviceType(), k_s_hist_X.d_view); - + k_s_hist.modify_device(); k_s_hist_X.modify_device(); } diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 6b8ffbb2ab6..96fa58f601f 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -897,7 +897,7 @@ void FixLangevinKokkos::copy_arrays(int i, int j, int /*delflag*/) template void FixLangevinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { - // always sort on the device + // always sort on the device k_franprev.sync_device(); k_lv.sync_device(); diff --git a/src/KOKKOS/fix_minimize_kokkos.cpp b/src/KOKKOS/fix_minimize_kokkos.cpp index 5f4b62d67d5..90ef0f45255 100644 --- a/src/KOKKOS/fix_minimize_kokkos.cpp +++ b/src/KOKKOS/fix_minimize_kokkos.cpp @@ -224,13 +224,13 @@ void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/) ------------------------------------------------------------------------- */ void FixMinimizeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) -{ - // always sort on the device - +{ + // always sort on the device + k_vectors.sync_device(); - + Sorter.sort(LMPDeviceType(), k_vectors.d_view); - + k_vectors.modify_device(); } diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp index ba189e38d93..b4a852ba702 100644 --- a/src/KOKKOS/fix_neigh_history_kokkos.cpp +++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp @@ -332,7 +332,7 @@ void FixNeighHistoryKokkos::copy_arrays(int i, int j, int /*delflag* template void FixNeighHistoryKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { - // always sort on the device + // always sort on the device k_npartner.sync_device(); k_partner.sync_device(); diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index e003f4b4f9f..6517036fa09 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -1345,7 +1345,7 @@ void FixQEqReaxFFKokkos::copy_arrays(int i, int j, int /*delflag*/) template void FixQEqReaxFFKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { - // always sort on the device + // always sort on the device k_s_hist.sync_device(); k_t_hist.sync_device(); diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 465536d63f3..1ea3ed1c5ae 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -1491,7 +1491,7 @@ void FixShakeKokkos::copy_arrays(int i, int j, int delflag) template void FixShakeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { - // always sort on the device + // always sort on the device k_shake_flag.sync_device(); k_shake_atom.sync_device(); diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp index 04994776498..f870b0f240f 100644 --- a/src/KOKKOS/fix_wall_gran_kokkos.cpp +++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp @@ -320,7 +320,7 @@ void FixWallGranKokkos::copy_arrays(int i, int j, int delflag) template void FixWallGranKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { - // always sort on the device + // always sort on the device k_history_one.sync_device(); From 28d31dedc8fcb50902269f2a6593dc760ec7a567 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 13:50:08 -0600 Subject: [PATCH 05/11] Add missing BinOp struct --- src/KOKKOS/atom_kokkos.cpp | 2 +- src/KOKKOS/atom_vec_kokkos.h | 2 +- src/KOKKOS/kokkos_base.h | 2 +- src/KOKKOS/kokkos_type.h | 58 ++++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index cb932a79edc..bda046c459d 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -205,7 +205,7 @@ void AtomKokkos::sort_device() max_bins[2] = nbinz; using KeyViewType = DAT::t_x_array; - using BinOp = Kokkos::BinOp3DReverse; + using BinOp = BinOp3DLAMMPS; BinOp binner(max_bins, bboxlo, bboxhi); Kokkos::BinSort Sorter(d_x, 0, nlocal, binner, false); Sorter.create_permute_vector(LMPDeviceType()); diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index ef6a3fcbc82..310f1f4d48e 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -41,7 +41,7 @@ class AtomVecKokkos : virtual public AtomVec { ~AtomVecKokkos() override; using KeyViewType = DAT::t_x_array; - using BinOp = Kokkos::BinOp3DReverse; + using BinOp = BinOp3DLAMMPS; virtual void sort_kokkos(Kokkos::BinSort &Sorter) = 0; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index b78c88eacdc..7d9ecb5d803 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -55,7 +55,7 @@ class KokkosBase { ExecutionSpace /*space*/) {} using KeyViewType = DAT::t_x_array; - using BinOp = Kokkos::BinOp3DReverse; + using BinOp = BinOp3DLAMMPS; virtual void sort_kokkos(Kokkos::BinSort & /*Sorter*/) {} }; diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 69ebc91fecc..555c7fa9aef 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -473,6 +473,64 @@ struct alignas(2*sizeof(F_FLOAT)) s_FLOAT2 { }; typedef struct s_FLOAT2 F_FLOAT2; +template +struct BinOp3DLAMMPS { + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; + + BinOp3DLAMMPS() = default; + + BinOp3DLAMMPS(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[]) { + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; + mul_[0] = static_cast(max_bins__[0]) / + (static_cast(max[0]) - static_cast(min[0])); + mul_[1] = static_cast(max_bins__[1]) / + (static_cast(max[1]) - static_cast(min[1])); + mul_[2] = static_cast(max_bins__[2]) / + (static_cast(max[2]) - static_cast(min[2])); + min_[0] = static_cast(min[0]); + min_[1] = static_cast(min[1]); + min_[2] = static_cast(min[2]); + } + + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + int ix = static_cast ((keys(i, 0) - min_[0]) * mul_[0]); + int iy = static_cast ((keys(i, 1) - min_[1]) * mul_[1]); + int iz = static_cast ((keys(i, 2) - min_[2]) * mul_[2]); + ix = MAX(ix,0); + iy = MAX(iy,0); + iz = MAX(iz,0); + ix = MIN(ix,max_bins_[0]-1); + iy = MIN(iy,max_bins_[1]-1); + iz = MIN(iz,max_bins_[2]-1); + const int ibin = iz*max_bins_[1]*max_bins_[0] + iy*max_bins_[0] + ix; + return ibin; + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 2) > keys(i2, 2)) + return true; + else if (keys(i1, 2) == keys(i2, 2)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 0) > keys(i2, 0)) return true; + } + } + return false; + } +}; + #ifndef PREC_POS #define PREC_POS PRECISION #endif From 313b3a69352f2786f2faa94ace4da39a8a3d0a94 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 19 Apr 2023 14:09:22 -0600 Subject: [PATCH 06/11] Fix typo --- src/KOKKOS/atom_vec_spin_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp index ac1c5a9294a..f5b86973529 100644 --- a/src/KOKKOS/atom_vec_spin_kokkos.cpp +++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp @@ -143,7 +143,7 @@ void AtomVecSpinKokkos::sort_kokkos(Kokkos::BinSort &Sorter) Sorter.sort(LMPDeviceType(), d_image); Sorter.sort(LMPDeviceType(), d_x); Sorter.sort(LMPDeviceType(), d_v); - Sorter.sort(LMPDeviceType(), d_sp; + Sorter.sort(LMPDeviceType(), d_sp); atomKK->modified(Device, TAG_MASK|TYPE_MASK|MASK_MASK|IMAGE_MASK|X_MASK|V_MASK|SP_MASK); } From b511681c2b02e949ad3d36c6d20ac56868e2d043 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 20 Apr 2023 14:07:57 -0600 Subject: [PATCH 07/11] Revert binsize change --- src/atom.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/atom.cpp b/src/atom.cpp index 00dc0369b0f..29e1bb6305a 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -33,7 +33,6 @@ #include "tokenizer.h" #include "update.h" #include "variable.h" -#include "accelerator_kokkos.h" #include "library.h" @@ -2357,10 +2356,6 @@ void Atom::setup_sort_bins() return; } - if (userbinsize == 0 && lmp->kokkos && lmp->kokkos->ngpus > 0) { - binsize = neighbor->cutneighmax; - } - #ifdef LMP_GPU if (userbinsize == 0.0) { auto ifix = dynamic_cast(modify->get_fix_by_id("package_gpu")); From 7c7e62609760bbf133dad16e5c50aacf80761aa4 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 20 Apr 2023 14:34:00 -0600 Subject: [PATCH 08/11] Revert docs --- doc/src/atom_modify.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/src/atom_modify.rst b/doc/src/atom_modify.rst index f845c15b24e..9049a24fde7 100644 --- a/doc/src/atom_modify.rst +++ b/doc/src/atom_modify.rst @@ -176,9 +176,7 @@ larger than 1 million, otherwise the default is hash. By default, a "first" group is not defined. By default, sorting is enabled with a frequency of 1000 and a binsize of 0.0, which means the neighbor cutoff will be used to set the bin size. If no neighbor cutoff is -defined, sorting will be turned off. When running with the KOKKOS -package on one or more GPUs, the default binsize for sorting is twice -the CPU default. +defined, sorting will be turned off. ---------- From 7791ab728fe40ba5c329c1973116893bb0bb3e05 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 27 Apr 2023 11:25:34 -0600 Subject: [PATCH 09/11] Fix small issue --- src/KOKKOS/atom_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 12db89d117f..f9fa31bf2ab 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -223,9 +223,10 @@ void AtomKokkos::sort_device() kkbase->sort_kokkos(Sorter); } + } // convert back to lamda coords - + if (domain->triclinic) domain->x2lamda(nlocal); } From 50adf2b340de20dfd94c61235a112971e44c4548 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 27 Apr 2023 15:17:54 -0600 Subject: [PATCH 10/11] Add a couple notes to the docs --- doc/src/Speed_kokkos.rst | 10 ++++++++++ doc/src/atom_modify.rst | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/doc/src/Speed_kokkos.rst b/doc/src/Speed_kokkos.rst index 569a24f1c28..dd417d7c798 100644 --- a/doc/src/Speed_kokkos.rst +++ b/doc/src/Speed_kokkos.rst @@ -285,6 +285,16 @@ one or more nodes, each with two GPUs: settings. Experimenting with its options can provide a speed-up for specific calculations. For example: +.. note:: + + The default binsize for :doc:`atom sorting ` on GPUs + is equal to the default CPU neighbor binsize (i.e. 2x smaller than the + default neighbor binsize on GPUs). When running simple pair-wise + potentials like Lennard Jones on GPUs, using a 2x larger binsize for + atom sorting (equal to the default binsize for building the neighbor + list on GPUs) and a more frequent sorting than default (e.g. sorting + every 100 time steps instead of 1000) may improve performance. + .. code-block:: bash mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj # Newton on, half neighbor list, set binsize = neighbor ghost cutoff diff --git a/doc/src/atom_modify.rst b/doc/src/atom_modify.rst index 9049a24fde7..1e5a3d49ff5 100644 --- a/doc/src/atom_modify.rst +++ b/doc/src/atom_modify.rst @@ -153,6 +153,13 @@ cache locality will be undermined. order of atoms in a :doc:`dump ` file will also typically change if sorting is enabled. +.. note:: + + When running simple pair-wise potentials like Lennard Jones on GPUs + with the KOKKOS package, using a larger binsize (e.g. 2x larger than + default) and a more frequent reordering than default (e.g. every 100 + time steps) may improve performance. + Restrictions """""""""""" From b17f9ac10e6efecbc4110a0aa5d8896046932c7f Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 27 Apr 2023 15:21:30 -0600 Subject: [PATCH 11/11] Small tweak to docs --- doc/src/Speed_kokkos.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/Speed_kokkos.rst b/doc/src/Speed_kokkos.rst index dd417d7c798..8161e69a1c2 100644 --- a/doc/src/Speed_kokkos.rst +++ b/doc/src/Speed_kokkos.rst @@ -289,11 +289,11 @@ one or more nodes, each with two GPUs: The default binsize for :doc:`atom sorting ` on GPUs is equal to the default CPU neighbor binsize (i.e. 2x smaller than the - default neighbor binsize on GPUs). When running simple pair-wise + default GPU neighbor binsize). When running simple pair-wise potentials like Lennard Jones on GPUs, using a 2x larger binsize for - atom sorting (equal to the default binsize for building the neighbor - list on GPUs) and a more frequent sorting than default (e.g. sorting - every 100 time steps instead of 1000) may improve performance. + atom sorting (equal to the default GPU neighbor binsize) and a more + frequent sorting than default (e.g. sorting every 100 time steps + instead of 1000) may improve performance. .. code-block:: bash