Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

out of place transform not working #270

Closed
jglaser opened this issue Nov 18, 2019 · 3 comments
Closed

out of place transform not working #270

jglaser opened this issue Nov 18, 2019 · 3 comments
Assignees

Comments

@jglaser
Copy link

jglaser commented Nov 18, 2019

What is the expected behavior

complex-to-complex in-place transforms using hipfft with single precision produce incorrect results when performed out of place

What actually happens

I modified the sample script hipfft_3d_z2z.cpp to do the FFT out-of-place.

I get

output:
(-23.8431,-23.8431) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

with out-of-place transform, but I expect the same result as for inplace, which is

output:
(1056,96) (-320,320) (-320,0) (-320,-320) 
(-32,-32) (0,0) (0,0) (0,0) 
(0,-32) (0,0) (0,0) (0,0) 
(32,-32) (0,0) (0,0) (0,0) 

(-32,32) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

(-32,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

(-32,-32) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 
(0,0) (0,0) (0,0) (0,0) 

How to reproduce

// Kernel for initializing the real-valued input data on the GPU.
__global__ void initdata(hipfftComplex* x, const int Nx, const int Ny, const int Nz)
{
    const int idx = blockIdx.x * blockDim.x + threadIdx.x;
    const int idy = blockIdx.y * blockDim.y + threadIdx.y;
    const int idz = blockIdx.z * blockDim.z + threadIdx.z;
    if(idx < Nx && idy < Ny && idz < Nz)
    {
        const int pos = (idx * Ny + idy) * Nz + idz;
        x[pos].x      = idx + 10 * idz;
        x[pos].y      = idy;
    }
}

// Helper function for determining grid dimensions
template <typename Tint1, typename Tint2>
Tint1 ceildiv(const Tint1 nominator, const Tint2 denominator)
{
    return (nominator + denominator - 1) / denominator;
}

int main()
{
    std::cout << "hipfft 3D single-precision complex-to-complex transform (out of place)\n";

    const int Nx        = 4;
    const int Ny        = 4;
    const int Nz        = 4;
    int       direction = HIPFFT_FORWARD; // forward=-1, backward=1

    std::vector<std::complex<float>> cdata(Nx * Ny * Nz);
    size_t complex_bytes = sizeof(decltype(cdata)::value_type) * cdata.size();

    // Create HIP device object and copy data to device:
    // hipfftComplex for single-precision
    hipfftComplex* x;
    hipMalloc(&x, complex_bytes);

    hipfftComplex* y;
    hipMalloc(&y, complex_bytes);

    // Inititalize the data on the device
    hipError_t rt;
    const dim3 blockdim(8, 8, 8);
    const dim3 griddim(ceildiv(Nx, blockdim.x), ceildiv(Ny, blockdim.y), ceildiv(Nz, blockdim.z));
    hipLaunchKernelGGL(initdata, blockdim, griddim, 0, 0, x, Nx, Ny, Nz);
    hipDeviceSynchronize();
    rt = hipGetLastError();
    assert(rt == hipSuccess);

    std::cout << "input:\n";
    hipMemcpy(cdata.data(), x, complex_bytes, hipMemcpyDefault);
    for(int i = 0; i < Nx; i++)
    {
        for(int j = 0; j < Ny; j++)
        {
            for(int k = 0; k < Nz; k++)
            {
                int pos = (i * Ny + j) * Nz + k;
                std::cout << cdata[pos] << " ";
            }
            std::cout << "\n";
        }
        std::cout << "\n";
    }
    std::cout << std::endl;

    // Create plan
    hipfftResult rc   = HIPFFT_SUCCESS;
    hipfftHandle plan = NULL;
    rc                = hipfftCreate(&plan);
    assert(rc == HIPFFT_SUCCESS);
    rc = hipfftPlan3d(&plan, // plan handle
                      Nx, // transform length
                      Ny, // transform length
                      Nz, // transform length
                      HIPFFT_C2C); // transform type (HIPFFT_C2C for single-precision)
    assert(rc == HIPFFT_SUCCESS);

    // Execute plan
    // hipfftExecZ2Z: single precision, hipfftExecC2C: for single-precision
    rc = hipfftExecC2C(plan, x, y, direction);
    assert(rc == HIPFFT_SUCCESS);

    std::cout << "output:\n";
    hipMemcpy(cdata.data(), y, complex_bytes, hipMemcpyDeviceToHost);
    for(int i = 0; i < Nx; i++)
    {
        for(int j = 0; j < Ny; j++)
        {
            for(int k = 0; k < Nz; k++)
            {
                int pos = (i * Ny + j) * Nz + k;
                std::cout << cdata[pos] << " ";
            }
            std::cout << "\n";
        }
        std::cout << "\n";
    }
    std::cout << std::endl;

    hipfftDestroy(plan);
    hipFree(x);
    hipFree(y);
    return 0;
}

Environment

Hardware description
GPU Vega 10 [Radeon Instinct MI25]
CPU AMD EPYC 7601 32-Core
Software version
HCC 10.0
Library 0.9.7.722-rocm-rel-2.9-6-e3055e1
@malcolmroberts
Copy link
Contributor

Thank for bringing up this issue. It seems to be isolatd to the 3D complex-to-complex out-of-place transform.

@malcolmroberts
Copy link
Contributor

I think that this is fixed in develop now; would you mind giving it a try?

@malcolmroberts
Copy link
Contributor

I'm going to close this issue; if the problem presists, please re-open.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants