diff --git a/.github/workflows/test_linux_cuda.yaml b/.github/workflows/test_linux_cuda.yaml
new file mode 100644
index 000000000..527448bec
--- /dev/null
+++ b/.github/workflows/test_linux_cuda.yaml
@@ -0,0 +1,64 @@
+
+name: Unit-tests on Linux GPU
+
+on:
+  pull_request:
+  push:
+    branches:
+      - nightly
+      - main
+      - release/*
+  workflow_dispatch:
+
+jobs:
+  tests:
+    strategy:
+      matrix:
+        python_version: ["3.9"]
+        # TODO: Add more cuda versions.
+        cuda_arch_version: ["12.4"]
+      fail-fast: false
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    with:
+      runner: linux.g5.4xlarge.nvidia.gpu
+      repository: pytorch/torchcodec
+      gpu-arch-type: cuda
+      gpu-arch-version: ${{ matrix.cuda_arch_version }}
+      timeout: 120
+
+      script: |
+        nvidia-smi
+        conda create --yes --name test
+        conda activate test
+        conda install --quiet --yes pip cmake pkg-config nasm
+
+        pip install --quiet --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
+        conda install --quiet --yes nvidia::libnpp
+
+        # Build and install FFMPEG from source with CUDA enabled.
+        # The one on conda doesn't have CUDA enabled.
+        # Sub-step: install nvidia headers. Reference this link for details:
+        # https://docs.nvidia.com/video-technologies/video-codec-sdk/12.1/ffmpeg-with-nvidia-gpu/index.html
+        git clone --quiet https://git.videolan.org/git/ffmpeg/nv-codec-headers.git
+
+        pushd nv-codec-headers
+        make --silent PREFIX=$CONDA_PREFIX -j install
+        popd
+
+        # Now build FFMPEG from source with CUDA enabled.
+        git clone --quiet https://git.ffmpeg.org/ffmpeg.git ffmpeg/
+        pushd ffmpeg
+        git checkout origin/release/6.1
+        which pkg-config
+        pkg-config --list-all
+        ./configure --prefix=$CONDA_PREFIX --enable-nonfree --enable-cuda-nvcc --disable-static --enable-shared --optflags=-fno-omit-frame-pointer --disable-stripping --enable-cuvid
+        make --silent -j install
+        popd
+
+        CMAKE_BUILD_PARALLEL_LEVEL=8 CXXFLAGS="" LDFLAGS="-Wl,--allow-shlib-undefined -Wl,-rpath,$CONDA_PREFIX/lib -Wl,-rpath-link,$CONDA_PREFIX/lib -L$CONDA_PREFIX/lib" CMAKE_BUILD_TYPE=Release ENABLE_CUDA=1 ENABLE_NVTX=1 pip install -e ".[dev]" --no-build-isolation -vv --debug
+
+        # We skip certain tests because they are not relevant to GPU decoding and they always fail with
+        # a custom FFMPEG build.
+        pytest -k "not (test_get_metadata or get_ffmpeg_version)"
+        python benchmarks/decoders/gpu_benchmark.py
+        conda deactivate
diff --git a/src/torchcodec/decoders/_core/CMakeLists.txt b/src/torchcodec/decoders/_core/CMakeLists.txt
index d8554bdaa..ed8e8ef36 100644
--- a/src/torchcodec/decoders/_core/CMakeLists.txt
+++ b/src/torchcodec/decoders/_core/CMakeLists.txt
@@ -58,7 +58,7 @@ function(make_torchcodec_library library_name ffmpeg_target)
 
     set(NEEDED_LIBRARIES ${ffmpeg_target} ${TORCH_LIBRARIES} ${Python3_LIBRARIES})
     if(ENABLE_CUDA)
-        list(APPEND NEEDED_LIBRARIES ${CUDA_CUDA_LIBRARY})
+        list(APPEND NEEDED_LIBRARIES ${CUDA_CUDA_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} )
     endif()
     if(ENABLE_NVTX)
         list(APPEND NEEDED_LIBRARIES nvtx3-cpp)