Skip to content

Commit

Permalink
Factor architecture dependent code out of loop.cu
Browse files Browse the repository at this point in the history
Summary:
[libomptarget] Factor architecture dependent code out of loop.cu

Related to the patch series starting D64217. Added subscribers to said series as reviewers. This effort is smaller in scope.

This patch factors out just enough architecture dependent code from loop.cu to allow the same source to be used with amdgcn, given a different target_impl.h. Testing is that the same bitcode (modulo variable names) is generated for libomptarget before and after the refactor, for nvptx and the out of tree amdgcn.

Reviewers: jdoerfert, ABataev, bollu, jfb, tra, grokos, Hahnfeld, guansong, xtian, gregrodgers, ronlieb, hfinkel, gtbercea, guraypp, arpith-jacob

Reviewed By: jdoerfert, ABataev

Subscribers: dexonsmith, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D65836

llvm-svn: 368751
  • Loading branch information
JonChesterfield committed Aug 13, 2019
1 parent 8a503e4 commit ed3324f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 9 deletions.
17 changes: 8 additions & 9 deletions openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
Expand Up @@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//

#include "omptarget-nvptx.h"
#include "target_impl.h"

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -381,20 +382,18 @@ public:

INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
int lo, hi;
asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
__kmpc_impl_unpack(val, lo, hi);
hi = __SHFL_SYNC(active, hi, leader);
lo = __SHFL_SYNC(active, lo, leader);
asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
return val;
return __kmpc_impl_pack(lo, hi);
}

INLINE static uint64_t NextIter() {
unsigned int active = __ACTIVEMASK();
int leader = __ffs(active) - 1;
int change = __popc(active);
unsigned lane_mask_lt;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lane_mask_lt));
unsigned int rank = __popc(active & lane_mask_lt);
__kmpc_impl_lanemask_t active = __ACTIVEMASK();
int leader = __kmpc_impl_ffs(active) - 1;
int change = __kmpc_impl_popc(active);
__kmpc_impl_lanemask_t lane_mask_lt = __kmpc_impl_lanemask_lt();
unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
uint64_t warp_res;
if (rank == 0) {
warp_res = atomicAdd(
Expand Down
41 changes: 41 additions & 0 deletions openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -0,0 +1,41 @@
//===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Definitions of target specific functions
//
//===----------------------------------------------------------------------===//
#ifndef _TARGET_IMPL_H_
#define _TARGET_IMPL_H_

#include <stdint.h>

#include "option.h"

INLINE void __kmpc_impl_unpack(int64_t val, int32_t &lo, int32_t &hi) {
asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
}

INLINE int64_t __kmpc_impl_pack(int32_t lo, int32_t hi) {
int64_t val;
asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
return val;
}

typedef uint32_t __kmpc_impl_lanemask_t;

INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
__kmpc_impl_lanemask_t res;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(res));
return res;
}

INLINE int __kmpc_impl_ffs(uint32_t x) { return __ffs(x); }

INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }

#endif

0 comments on commit ed3324f

Please sign in to comment.