Skip to content

Commit

Permalink
Merge pull request #61 from mpbl/devel
Browse files Browse the repository at this point in the history
Merge gpu best hit
  • Loading branch information
dan131riley committed Oct 14, 2016
2 parents da5624c + 53cf9c0 commit 393b7ea
Show file tree
Hide file tree
Showing 56 changed files with 4,035 additions and 699 deletions.
12 changes: 12 additions & 0 deletions BinInfoUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,37 @@ typedef std::pair<int, int> BinInfo;
typedef std::vector<std::vector<BinInfo>> BinInfoLayerMap;
typedef std::vector<BinInfoLayerMap> BinInfoMap;

#ifdef __CUDACC__
__host__ __device__
#endif
inline float downPhi(float phi)
{
while (phi >= Config::PI) {phi-=Config::TwoPI;}
return phi;
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline float upPhi(float phi)
{
while (phi <= -Config::PI) {phi+=Config::TwoPI;}
return phi;
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline float normalizedPhi(float phi)
{
// return std::fmod(phi, (float) Config::PI); // return phi +pi out of phase for |phi| beyond boundary!
if (std::abs(phi)>=Config::PI) {phi = (phi>0 ? downPhi(phi) : upPhi(phi));}
return phi;
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline int getPhiPartition(float phi)
{
//assume phi is between -PI and PI
Expand Down
4 changes: 2 additions & 2 deletions Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ namespace Config
// Config for Hit and BinInfoUtils
constexpr int nPhiPart = 1260;
constexpr float fPhiFactor = nPhiPart / TwoPI;
constexpr int nEtaPart = 11;
constexpr int nEtaPart = 11; // 1 is better for GPU best_hit
constexpr int nEtaBin = 2 * nEtaPart - 1;

constexpr float fEtaFull = 2 * Config::fEtaDet;
Expand Down Expand Up @@ -204,7 +204,7 @@ namespace Config
#ifdef __MIC__
#define MPT_SIZE 16
#elif defined USE_CUDA
#define MPT_SIZE 10000
#define MPT_SIZE 8
#else
#define MPT_SIZE 8
#endif
Expand Down
10 changes: 10 additions & 0 deletions Hit.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,17 @@ inline float getInvRad2(float x, float y){
return 1.0f/(x*x + y*y);
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline float getPhi(float x, float y)
{
return std::atan2(y,x);
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline float getTheta(float r, float z){
return std::atan2(r,z);
}
Expand Down Expand Up @@ -217,6 +223,10 @@ class Hit

const float* posArray() const {return state_.pos_.Array();}
const float* errArray() const {return state_.err_.Array();}
#if __CUDACC__
__device__ float* posArrayCU();
__device__ float* errArrayCU();
#endif

// Non-const versions needed for CopyOut of Matriplex.
SVector3& parameters_nc() {return state_.pos_;}
Expand Down
21 changes: 16 additions & 5 deletions Makefile.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# OSXGCC5 := yes
# To keep Dan's version working
# TBB_PREFIX := tbb
# TBB_PREFIX := ${TBBROOT}

# 1. Use ROOT or not (never used on MIC)
# Comment out to disable root ("yes" is not relevant)
Expand All @@ -34,9 +35,11 @@ endif

# 2.1 Use nvcc to compile cuda code
# CUDA compiler
NV := nvcc
CUBROOT=/home/ml15/tools/cub
NV := nvcc -prec-sqrt=true -I${CUBROOT}
#-g -G -lineinfo
# Comment out to compile for CPU
# USE_CUDA := -DUSE_CUDA
#USE_CUDA := yes

# 3. Optimization
# -O3 implies vectorization and simd (but not AVX)
Expand Down Expand Up @@ -113,12 +116,19 @@ INWARD_FIT := -DINWARDFIT
# Derived settings
################################################################

CPPFLAGS := -I. ${USE_MATRIPLEX} ${USE_INTRINSICS} ${USE_CUDA} -std=c++11
CPPFLAGS := -I. ${USE_MATRIPLEX} ${USE_INTRINSICS} -std=c++11
CXXFLAGS := ${OPT} ${OSX_CXXFLAGS}

LDFLAGS_HOST :=
LDFLAGS_HOST :=
LDFLAGS_MIC :=

ifdef USE_CUDA
CPPFLAGS += -DUSE_CUDA -I/nfs/opt/cuda/include
#CPPFLAGS += -I/home/ml15/tools/cub
CPPFLAGS += -I${CUBROOT}
LDFLAGS_HOST += -L${CUDALIBDIR}
endif

CPPFLAGS += ${USE_STATE_VALIDITY_CHECKS} ${USE_SCATTERING} ${USE_LINEAR_INTERPOLATION} ${ENDTOEND} ${USE_ETA_SEGMENTATION} ${INWARD_FIT} ${GEN_FLAT_ETA}

ifdef USE_VTUNE_NOTIFY
Expand All @@ -130,7 +140,8 @@ endif
endif

ifneq ($(CXX),icc)
#CXXFLAGS += -Wall -Wno-unknown-pragmas
CPPFLAGS += -I/opt/rh/python27/root/usr/include
LDFLAGS_HOST += -L/opt/rh/python27/root/usr/lib64
endif

ifeq ($(CXX),icc)
Expand Down
3 changes: 3 additions & 0 deletions Math/MatrixRepresentationsStatic.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,9 @@ namespace Math {
inline T* Array() { return fArray; }

inline const T* Array() const { return fArray; }
#ifdef __CUDACC__
T* ArrayCU();
#endif

/**
assignment : only symmetric to symmetric allowed
Expand Down
3 changes: 3 additions & 0 deletions Math/SMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ class SMatrix {
const T* Array() const;
/// return pointer to internal array
T* Array();
#ifdef __CUDACC__
T* ArrayCU();
#endif

/** @name --- STL-like interface ---
The iterators access the matrix element in the order how they are
Expand Down
3 changes: 3 additions & 0 deletions Math/SVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ class SVector {
const T* Array() const;
/// return non-const pointer to internal array
T* Array();
#ifdef __CUDACC__
T* ArrayCU();
#endif

/** @name --- STL-like interface --- */

Expand Down
75 changes: 75 additions & 0 deletions Matriplex/GenMul.pm
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,75 @@ sub multiply_standard

# ----------------------------------------------------------------------

sub generate_addend_gpu
{
my ($S, $x, $y) = @_;

return undef if $S->{$x}{pat} eq '0' or $S->{$y}{pat} eq '0';
return "1" if $S->{$x}{pat} eq '1' and $S->{$y}{pat} eq '1';

my $xstr = sprintf "$S->{$x}{mat}{name}\[%2d*$S->{$x}{mat}{name}N+$S->{$x}{mat}{name}n]", $S->{$x}{idx};
my $ystr = sprintf "$S->{$y}{mat}{name}\[%2d*$S->{$y}{mat}{name}N+$S->{$y}{mat}{name}n]", $S->{$y}{idx};

return $xstr if $S->{$y}{pat} eq '1';
return $ystr if $S->{$x}{pat} eq '1';

return "${xstr}*${ystr}";
}

sub multiply_gpu
{
# Standard mutiplication - outputs unrolled C code, one line
# per target matrix element.
# Arguments: a, b, c -- all GenMul::MBase with right dimensions.
# Does: c = a * b

check_multiply_arguments(@_);

my ($S, $a, $b, $c) = @_;

my $is_c_symmetric = $c->isa("GenMul::MatrixSym");

# With no_size_check matrices do not have to be compatible.
my $k_max = $a->{N} <= $b->{M} ? $a->{N} : $b->{M};

for (my $i = 0; $i < $c->{M}; ++$i)
{
my $j_max = $is_c_symmetric ? $i + 1 : $c->{N};

for (my $j = 0; $j < $j_max; ++$j)
{
my $x = $c->idx($i, $j);

printf "$S->{prefix}$c->{name}\[%2d*$c->{name}N+$c->{name}n\] = ", $x;

my @sum;

for (my $k = 0; $k < $k_max; ++$k)
{
$S->generate_indices_and_patterns_for_multiplication($i, $j, $k);

my $addend = $S->generate_addend_gpu('a', 'b');

push @sum, $addend if defined $addend;
}
if (@sum)
{
print join(" + ", @sum), ";";
}
else
{
print "0;"
}
print "\n";
}
}

$S->delete_temporaries();
}

# ----------------------------------------------------------------------

sub load_if_needed
{
my ($S, $x) = @_;
Expand Down Expand Up @@ -709,6 +778,7 @@ sub dump_multiply_std_and_intrinsic
}

print <<"FNORD";
#ifndef __CUDACC__
#ifdef MPLEX_INTRINSICS
for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
Expand All @@ -732,6 +802,11 @@ FNORD
print <<"FNORD";
}
#endif
#else // __CUDACC__
FNORD
$S->multiply_gpu($a, $b, $c);
print <<"FNORD";
#endif // __CUDACC__
FNORD


Expand Down
3 changes: 3 additions & 0 deletions Matriplex/Matriplex.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ class Matriplex
for (int j = 0; j < N; ++j)
{
fArray[i*N + j] = * (const T*) (arr + i*sizeof(T) + vi[j]);
//if(j==2) {
//printf("cpu -- %d : %d, %f\n", i, vi[j], fArray[i*N+j]);
//}
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,17 @@ inline double dtime()
return( tseconds );
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline float hipo(float x, float y)
{
return std::sqrt(x*x + y*y);
}

#ifdef __CUDACC__
__host__ __device__
#endif
inline void sincos4(const float x, float& sin, float& cos)
{
// Had this writen with explicit division by factorial.
Expand All @@ -74,7 +80,7 @@ inline void sincos4(const float x, float& sin, float& cos)
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a, b) __assume_aligned(a, b)
#else
#define ASSUME_ALIGNED(a, b) __builtin_assume_aligned(a, b)
#define ASSUME_ALIGNED(a, b) a = static_cast<decltype(a)>(__builtin_assume_aligned(a, b))
#endif

#include "Matriplex/MatriplexSym.h"
Expand Down
43 changes: 43 additions & 0 deletions Track.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ class Track

const float* posArray() const {return state_.parameters.Array();}
const float* errArray() const {return state_.errors.Array();}
//#ifdef USE_CUDA
#if __CUDACC__
__device__ float* posArrayCU();
__device__ float* errArrayCU();
#endif

// Non-const versions needed for CopyOut of Matriplex.
SVector6& parameters_nc() {return state_.parameters;}
Expand All @@ -149,8 +154,17 @@ class Track
SVector3 position() const {return SVector3(state_.parameters[0],state_.parameters[1],state_.parameters[2]);}
SVector3 momentum() const {return SVector3(state_.parameters[3],state_.parameters[4],state_.parameters[5]);}

#if __CUDACC__
__host__ __device__
#endif
int charge() const {return state_.charge;}
#if __CUDACC__
__host__ __device__
#endif
float chi2() const {return chi2_;}
#if __CUDACC__
__host__ __device__
#endif
int label() const {return label_;}

float x() const { return state_.parameters[0];}
Expand Down Expand Up @@ -200,12 +214,18 @@ class Track
}
}

#if __CUDACC__
__host__ __device__
#endif
void addHitIdx(int hitIdx,float chi2)
{
hitIdxArr_[++hitIdxPos_] = hitIdx;
if (hitIdx >= 0) { ++nGoodHitIdx_; chi2_+=chi2; }
}

#if __CUDACC__
__host__ __device__
#endif
int getHitIdx(int posHitIdx) const
{
return hitIdxArr_[posHitIdx];
Expand All @@ -222,6 +242,9 @@ class Track
}
}

#if __CUDACC__
__host__ __device__
#endif
void setHitIdx(int posHitIdx, int newIdx) {
hitIdxArr_[posHitIdx] = newIdx;
}
Expand All @@ -233,6 +256,16 @@ class Track
}
}

#if __CUDACC__
__host__ __device__
#endif
void setNGoodHitIdx(int nHits) {
nGoodHitIdx_ = nHits;
}

#if __CUDACC__
__host__ __device__
#endif
void resetHits()
{
hitIdxPos_ = -1;
Expand All @@ -250,8 +283,18 @@ class Track
}
return layers;
}

#if __CUDACC__
__host__ __device__
#endif
void setCharge(int chg) {state_.charge=chg;}
#if __CUDACC__
__host__ __device__
#endif
void setChi2(float chi2) {chi2_=chi2;}
#if __CUDACC__
__host__ __device__
#endif
void setLabel(int lbl) {label_=lbl;}

void setState(const TrackState& newState) {state_=newState;}
Expand Down

0 comments on commit 393b7ea

Please sign in to comment.