Skip to content
Permalink
Browse files
TransformedAABBoxSSE move mCumulativeMatrix to the stack too
  • Loading branch information
rygorous committed Feb 24, 2013
1 parent 0fad7d4 commit 028a108
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 15 deletions.
@@ -189,6 +189,8 @@ void AABBoxRasterizerSSE::Render(CPUTAssetSet **pAssetSet,
BoxTestSetup setup;
setup.Init(mViewMatrix, mProjMatrix, mpCamera, mOccludeeSizeThreshold);

__m128 cumulativeMatrix[4];

for(UINT assetId = 0, modelId = 0; assetId < numAssetSets; assetId++)
{
for(UINT nodeId = 0; nodeId < pAssetSet[assetId]->GetAssetCount(); nodeId++)
@@ -198,7 +200,7 @@ void AABBoxRasterizerSSE::Render(CPUTAssetSet **pAssetSet,
ASSERT((CPUT_SUCCESS == result), _L ("Failed getting asset by index"));
if(pRenderNode->IsModel())
{
if(!mpTransformedAABBox[modelId].IsTooSmall(setup))
if(!mpTransformedAABBox[modelId].IsTooSmall(setup, cumulativeMatrix))
{
CPUTModelDX11* model = (CPUTModelDX11*)pRenderNode;
model = (CPUTModelDX11*)pRenderNode;
@@ -94,6 +94,7 @@ void AABBoxRasterizerSSEMT::TransformAABBoxAndDepthTest(UINT taskId)
setup.Init(mViewMatrix, mProjMatrix, mpCamera, mOccludeeSizeThreshold);

__m128 xformedPos[AABB_VERTICES];
__m128 cumulativeMatrix[4];

static const UINT kChunkSize = 64;
for(UINT base = taskId*kChunkSize; base < mNumModels; base += mNumDepthTestTasks * kChunkSize)
@@ -103,9 +104,9 @@ void AABBoxRasterizerSSEMT::TransformAABBoxAndDepthTest(UINT taskId)
{
mpVisible[i] = false;

if(mpBBoxVisible[i] && !mpTransformedAABBox[i].IsTooSmall(setup))
if(mpBBoxVisible[i] && !mpTransformedAABBox[i].IsTooSmall(setup, cumulativeMatrix))
{
if(mpTransformedAABBox[i].TransformAABBox(xformedPos))
if(mpTransformedAABBox[i].TransformAABBox(xformedPos, cumulativeMatrix))
mpVisible[i] = mpTransformedAABBox[i].RasterizeAndDepthTestAABBox(mpRenderTargetPixels, xformedPos);
else
mpVisible[i] = true;
@@ -51,14 +51,15 @@ void AABBoxRasterizerSSEST::TransformAABBoxAndDepthTest()
setup.Init(mViewMatrix, mProjMatrix, mpCamera, mOccludeeSizeThreshold);

__m128 xformedPos[AABB_VERTICES];
__m128 cumulativeMatrix[4];

for(UINT i = 0; i < mNumModels; i++)
{
mpVisible[i] = false;

if(mpBBoxVisible[i] && !mpTransformedAABBox[i].IsTooSmall(setup))
if(mpBBoxVisible[i] && !mpTransformedAABBox[i].IsTooSmall(setup, cumulativeMatrix))
{
if(mpTransformedAABBox[i].TransformAABBox(xformedPos))
if(mpTransformedAABBox[i].TransformAABBox(xformedPos, cumulativeMatrix))
mpVisible[i] = mpTransformedAABBox[i].RasterizeAndDepthTestAABBox(mpRenderTargetPixels, xformedPos);
else
mpVisible[i] = true;
@@ -65,14 +65,12 @@ TransformedAABBoxSSE::TransformedAABBoxSSE()
{
mWorldMatrix = (__m128*)_aligned_malloc(sizeof(float) * 4 * 4, 16);
mpBBVertexList = (__m128*)_aligned_malloc(sizeof(float) * 4 * AABB_VERTICES, 16);
mCumulativeMatrix = (__m128*)_aligned_malloc(sizeof(float) * 4 * 4, 16);
}

TransformedAABBoxSSE::~TransformedAABBoxSSE()
{
_aligned_free(mWorldMatrix);
_aligned_free(mpBBVertexList);
_aligned_free(mCumulativeMatrix);
}

//--------------------------------------------------------------------------
@@ -111,12 +109,12 @@ void TransformedAABBoxSSE::CreateAABBVertexIndexList(CPUTModelDX11 *pModel)
//----------------------------------------------------------------------------
// Determine if the occluddee size is too small and if so avoid drawing it
//----------------------------------------------------------------------------
bool TransformedAABBoxSSE::IsTooSmall(const BoxTestSetup &setup)
bool TransformedAABBoxSSE::IsTooSmall(const BoxTestSetup &setup, __m128 cumulativeMatrix[4])
{
MatrixMultiply(mWorldMatrix, setup.mViewProjViewport, mCumulativeMatrix);
MatrixMultiply(mWorldMatrix, setup.mViewProjViewport, cumulativeMatrix);

__m128 center = _mm_set_ps(1.0f, mBBCenter.z, mBBCenter.y, mBBCenter.x);
__m128 mBBCenterOSxForm = TransformCoords(&center, mCumulativeMatrix);
__m128 mBBCenterOSxForm = TransformCoords(&center, cumulativeMatrix);
float w = mBBCenterOSxForm.m128_f32[3];
if( w > 1.0f )
{
@@ -129,13 +127,13 @@ bool TransformedAABBoxSSE::IsTooSmall(const BoxTestSetup &setup)
//----------------------------------------------------------------
// Trasforms the AABB vertices to screen space once every frame
//----------------------------------------------------------------
bool TransformedAABBoxSSE::TransformAABBox(__m128 xformedPos[])
bool TransformedAABBoxSSE::TransformAABBox(__m128 xformedPos[], const __m128 cumulativeMatrix[4])
{
__m128 zAllIn = _mm_castsi128_ps(_mm_set1_epi32(~0));

for(UINT i = 0; i < AABB_VERTICES; i++)
{
__m128 vert = TransformCoords(&mpBBVertexList[i], mCumulativeMatrix);
__m128 vert = TransformCoords(&mpBBVertexList[i], cumulativeMatrix);

// We have inverted z; z is in front of near plane iff z <= w.
__m128 vertZ = _mm_shuffle_ps(vert, vert, 0xaa); // vert.zzzz
@@ -39,17 +39,16 @@ class TransformedAABBoxSSE : public HelperSSE
void CreateAABBVertexIndexList(CPUTModelDX11 *pModel);
void TransformAABBoxAndDepthTest();

bool IsTooSmall(const BoxTestSetup &setup);
bool IsTooSmall(const BoxTestSetup &setup, __m128 cumulativeMatrix[4]);

bool TransformAABBox(__m128 xformedPos[]);
bool TransformAABBox(__m128 xformedPos[], const __m128 cumulativeMatrix[4]);

bool RasterizeAndDepthTestAABBox(UINT *pRenderTargetPixels, const __m128 xformedPos[]);

private:
CPUTModelDX11 *mpCPUTModel;
__m128 *mWorldMatrix;
__m128 *mpBBVertexList;
__m128 *mCumulativeMatrix;

float3 mBBCenter;
float mRadiusSq;

0 comments on commit 028a108

Please sign in to comment.