Skip to content
This repository has been archived by the owner on Oct 25, 2019. It is now read-only.

Commit

Permalink
Loop unroll
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Aug 31, 2017
1 parent 959feb3 commit fd9e545
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 73 deletions.
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This header-only module provides C++ classes and several accompanying methods to

Bug reports or feature requests can be filed on `GitHub <http://github.com/tdegeus/cppmat>`_.

| (c - MIT) T.W.J. de Geus (Tom) | tom@geus.me | `www.geus.me <http://www.geus.me>`_ | `github.com/tdegeus <http://github.com/tdegeus>`_
(c - MIT) T.W.J. de Geus (Tom) | tom@geus.me | `www.geus.me <http://www.geus.me>`_ | `github.com/tdegeus <http://github.com/tdegeus>`_

Contents
========
Expand Down
192 changes: 120 additions & 72 deletions include/cppmat/tensor3.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,11 @@ template<class X> class tensor3_4
{
X C = static_cast<X>(0);

for ( size_t i = 0 ; i < 81 ; ++i )
C += std::abs(m_data[i]);
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C += std::abs(m_data[i ]);
C += std::abs(m_data[i+1]);
C += std::abs(m_data[i+2]);
}

return C;
}
Expand Down Expand Up @@ -156,32 +159,44 @@ template<class X> class tensor3_4

tensor3_4<X>& operator*= (const tensor3_4<X> &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] *= B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] *= B[i ];
m_data[i+1] *= B[i+1];
m_data[i+2] *= B[i+2];
}

return *this;
};

tensor3_4<X>& operator/= (const tensor3_4<X> &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] /= B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] /= B[i ];
m_data[i+1] /= B[i+1];
m_data[i+2] /= B[i+2];
}

return *this;
};

tensor3_4<X>& operator+= (const tensor3_4<X> &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] += B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] += B[i ];
m_data[i+1] += B[i+1];
m_data[i+2] += B[i+2];
}

return *this;
};

tensor3_4<X>& operator-= (const tensor3_4<X> &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] -= B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] -= B[i ];
m_data[i+1] -= B[i+1];
m_data[i+2] -= B[i+2];
}

return *this;
};
Expand All @@ -191,32 +206,44 @@ template<class X> class tensor3_4

tensor3_4<X>& operator*= (const X &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] *= B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] *= B;
m_data[i+1] *= B;
m_data[i+2] *= B;
}

return *this;
};

tensor3_4<X>& operator/= (const X &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] /= B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] /= B;
m_data[i+1] /= B;
m_data[i+2] /= B;
}

return *this;
};

tensor3_4<X>& operator+= (const X &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] += B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] += B;
m_data[i+1] += B;
m_data[i+2] += B;
}

return *this;
};

tensor3_4<X>& operator-= (const X &B)
{
for ( size_t i=0; i<81; ++i )
m_data[i] -= B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
m_data[i ] -= B;
m_data[i+1] -= B;
m_data[i+2] -= B;
}

return *this;
};
Expand All @@ -242,8 +269,11 @@ template <class X> tensor3_4<X> operator* (const tensor3_4<X> &A, const tensor3_
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] * B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] * B[i ];
C[i+1] = A[i+1] * B[i+1];
C[i+2] = A[i+2] * B[i+2];
}

return C;
}
Expand All @@ -252,8 +282,11 @@ template <class X> tensor3_4<X> operator/ (const tensor3_4<X> &A, const tensor3_
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] / B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] / B[i ];
C[i+1] = A[i+1] / B[i+1];
C[i+2] = A[i+2] / B[i+2];
}

return C;
}
Expand All @@ -262,8 +295,11 @@ template <class X> tensor3_4<X> operator+ (const tensor3_4<X> &A, const tensor3_
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] + B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] + B[i ];
C[i+1] = A[i+1] + B[i+1];
C[i+2] = A[i+2] + B[i+2];
}

return C;
}
Expand All @@ -272,8 +308,11 @@ template <class X> tensor3_4<X> operator- (const tensor3_4<X> &A, const tensor3_
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] - B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] - B[i ];
C[i+1] = A[i+1] - B[i+1];
C[i+2] = A[i+2] - B[i+2];
}

return C;
}
Expand All @@ -285,35 +324,47 @@ template <class X> tensor3_4<X> operator* (const tensor3_4<X> &A, const X &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] * B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] * B;
C[i+1] = A[i+1] * B;
C[i+2] = A[i+2] * B;
}

return C; }

template <class X> tensor3_4<X> operator/ (const tensor3_4<X> &A, const X &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] / B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] / B;
C[i+1] = A[i+1] / B;
C[i+2] = A[i+2] / B;
}

return C; }

template <class X> tensor3_4<X> operator+ (const tensor3_4<X> &A, const X &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] + B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] + B;
C[i+1] = A[i+1] + B;
C[i+2] = A[i+2] + B;
}

return C; }

template <class X> tensor3_4<X> operator- (const tensor3_4<X> &A, const X &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A[i] - B;
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A[i ] - B;
C[i+1] = A[i+1] - B;
C[i+2] = A[i+2] - B;
}

return C; }

Expand All @@ -324,8 +375,11 @@ template <class X> tensor3_4<X> operator* (const X &A, const tensor3_4<X> &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A * B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A * B[i ];
C[i+1] = A * B[i+1];
C[i+2] = A * B[i+2];
}

return C;
}
Expand All @@ -334,8 +388,11 @@ template <class X> tensor3_4<X> operator/ (const X &A, const tensor3_4<X> &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A / B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A / B[i ];
C[i+1] = A / B[i+1];
C[i+2] = A / B[i+2];
}

return C;
}
Expand All @@ -344,8 +401,11 @@ template <class X> tensor3_4<X> operator+ (const X &A, const tensor3_4<X> &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A + B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A + B[i ];
C[i+1] = A + B[i+1];
C[i+2] = A + B[i+2];
}

return C;
}
Expand All @@ -354,8 +414,11 @@ template <class X> tensor3_4<X> operator- (const X &A, const tensor3_4<X> &B)
{
tensor3_4<X> C;

for ( size_t i=0; i<81; ++i )
C[i] = A - B[i];
for ( size_t i = 0 ; i < 81 ; i += 3 ) {
C[i ] = A - B[i ];
C[i+1] = A - B[i+1];
C[i+2] = A - B[i+2];
}

return C;
}
Expand Down Expand Up @@ -1408,12 +1471,12 @@ template <class X> tensor3_2s<X> operator+ (const tensor3_2s<X> &A, const tensor
{
tensor3_2s<X> C;

for ( size_t i = 0 ; i < 3 ; ++i ) {
for ( size_t j = i ; j < 3 ; ++j ) {
if ( i == j ) C[ i*3 - (i-1)*i/2 ] = A[ i*3 - (i-1)*i/2 ] + B[ i ];
else C[ i*3 - (i-1)*i/2 + j - i ] = A[ i*3 - (i-1)*i/2 + j - i ];
}
}
C[0] = A[0]+B[0];
C[1] = A[1];
C[2] = A[2];
C[3] = A[3]+B[1];
C[4] = A[4];
C[5] = A[5]+B[2];

return C;
}
Expand All @@ -1422,12 +1485,12 @@ template <class X> tensor3_2s<X> operator- (const tensor3_2s<X> &A, const tensor
{
tensor3_2s<X> C;

for ( size_t i = 0 ; i < 3 ; ++i ) {
for ( size_t j = i ; j < 3 ; ++j ) {
if ( i == j ) C[ i*3 - (i-1)*i/2 ] = A[ i*3 - (i-1)*i/2 ] - B[ i ];
else C[ i*3 - (i-1)*i/2 + j - i ] = A[ i*3 - (i-1)*i/2 + j - i ];
}
}
C[0] = A[0]-B[0];
C[1] = A[1];
C[2] = A[2];
C[3] = A[3]-B[1];
C[4] = A[4];
C[5] = A[5]-B[2];

return C;
}
Expand Down Expand Up @@ -3251,36 +3314,21 @@ template<class X> tensor3_2d<X> inline tensor3_2d<X>::T() const

template<class X> X inline tensor3_2<X>::trace() const
{
X C = static_cast<X>(0);

for ( size_t i=0; i<3; ++i )
C += (*this)(i,i);

return C;
return m_data[0]+m_data[4]+m_data[8];
}

// -------------------------------------------------------------------------------------------------

template<class X> X inline tensor3_2s<X>::trace() const
{
X C = static_cast<X>(0);

for ( size_t i=0; i<3; ++i )
C += (*this)(i,i);

return C;
return m_data[0]+m_data[3]+m_data[5];
}

// -------------------------------------------------------------------------------------------------

template<class X> X inline tensor3_2d<X>::trace() const
{
X C = static_cast<X>(0);

for ( size_t i=0; i<3; ++i )
C += (*this)[i];

return C;
return m_data[0]+m_data[1]+m_data[2];
}

// -------------------------------------------------------------------------------------------------
Expand Down

0 comments on commit fd9e545

Please sign in to comment.