diff --git a/flang/docs/GettingStarted.md b/flang/docs/GettingStarted.md
index 927d56cd29111..f470067e07f60 100644
--- a/flang/docs/GettingStarted.md
+++ b/flang/docs/GettingStarted.md
@@ -192,7 +192,7 @@ Clang with NVPTX backend and NVCC compilers are supported.
 
 ```bash
 cd llvm-project/flang
-mkdir -rf build_flang_runtime
+rm -rf build_flang_runtime
 mkdir build_flang_runtime
 cd build_flang_runtime
 
@@ -202,13 +202,19 @@ cmake \
   -DCMAKE_C_COMPILER=clang \
   -DCMAKE_CXX_COMPILER=clang++ \
   -DCMAKE_CUDA_COMPILER=clang \
+  -DCMAKE_CUDA_HOST_COMPILER=clang++ \
   ../runtime/
 make -j FortranRuntime
 ```
 
+Note that the used version of `clang` must [support](https://releases.llvm.org/16.0.0/tools/clang/docs/ReleaseNotes.html#cuda-support)
+CUDA toolkit version installed on the build machine.  If there are multiple
+CUDA toolkit installations, please use `-DCUDAToolkit_ROOT=/some/path`
+to specify the compatible version.
+
 ```bash
 cd llvm-project/flang
-mkdir -rf build_flang_runtime
+rm -rf build_flang_runtime
 mkdir build_flang_runtime
 cd build_flang_runtime
 
@@ -218,21 +224,53 @@ cmake \
   -DCMAKE_C_COMPILER=clang \
   -DCMAKE_CXX_COMPILER=clang++ \
   -DCMAKE_CUDA_COMPILER=nvcc \
+  -DCMAKE_CUDA_HOST_COMPILER=clang++ \
   ../runtime/
 make -j FortranRuntime
 ```
 
+Note that `nvcc` might limit support to certain
+[versions](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#host-compiler-support-policy) of `CMAKE_CUDA_HOST_COMPILER`,
+so please use compatible versions.
+
 The result of the build is a "fat" library with the host and device
 code.  Note that the packaging of the libraries is different
 between [Clang](https://clang.llvm.org/docs/OffloadingDesign.html#linking-target-device-code) and NVCC, so the library must be linked using
 compatible compiler drivers.
 
+### Bulding in-tree
+One may build Flang runtime library along with building Flang itself
+by providing these additional CMake variables on top of the Flang in-tree
+build config:
+
+For example:
+```bash
+  -DFLANG_EXPERIMENTAL_CUDA_RUNTIME=ON \
+  -DCMAKE_CUDA_ARCHITECTURES=80 \
+  -DCMAKE_C_COMPILER=clang \
+  -DCMAKE_CXX_COMPILER=clang++ \
+  -DCMAKE_CUDA_COMPILER=clang \
+  -DCMAKE_CUDA_HOST_COMPILER=clang++ \
+```
+
+Or:
+```bash
+  -DFLANG_EXPERIMENTAL_CUDA_RUNTIME=ON \
+  -DCMAKE_CUDA_ARCHITECTURES=80 \
+  -DCMAKE_C_COMPILER=gcc \
+  -DCMAKE_CXX_COMPILER=g++ \
+  -DCMAKE_CUDA_COMPILER=nvcc \
+  -DCMAKE_CUDA_HOST_COMPILER=g++ \
+```
+
+Normal `make -j check-flang` will work with such CMake configuration.
+
 ##### OpenMP target offload build
 Only Clang compiler is currently supported.
 
 ```
 cd llvm-project/flang
-mkdir -rf build_flang_runtime
+rm -rf build_flang_runtime
 mkdir build_flang_runtime
 cd build_flang_runtime
 
@@ -250,6 +288,8 @@ part of the library is just a container for the device code.
 The resulting library may be linked to user programs using
 Clang-like device linking pipeline.
 
+The same set of CMake variables works for Flang in-tree build.
+
 ## Supported C++ compilers
 
 Flang is written in C++17.
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index ee1214b791ee9..2fa4c800e0c90 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -154,24 +154,30 @@ set(supported_files
   )
 
 if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
-  enable_language(CUDA)
+  if (BUILD_SHARED_LIBS)
+    message(FATAL_ERROR
+      "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
+      )
+  endif()
 
-  # Add the unsupported files to LLVM_OPTIONAL_SOURCES.
-  set(todo_files ${sources})
-  list(REMOVE_ITEM todo_files ${supported_files})
-  list(APPEND LLVM_OPTIONAL_SOURCES ${todo_files})
+  enable_language(CUDA)
 
   # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
   # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
   set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
 
-  # Treat all sources as CUDA files.
-  set(sources ${supported_files})
-  set_source_files_properties(${sources} PROPERTIES LANGUAGE CUDA)
+  # Treat all supported sources as CUDA files.
+  set_source_files_properties(${supported_files} PROPERTIES LANGUAGE CUDA)
+  set(CUDA_COMPILE_OPTIONS)
   if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
     # Allow varargs.
-    add_compile_options(-Xclang -fcuda-allow-variadic-functions)
+    set(CUDA_COMPILE_OPTIONS
+      -Xclang -fcuda-allow-variadic-functions
+      )
   endif()
+  set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
+    "${CUDA_COMPILE_OPTIONS}"
+    )
 endif()
 
 set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
@@ -198,12 +204,11 @@ if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
   if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
     message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
   endif()
-
-  # Add the unsupported files to LLVM_OPTIONAL_SOURCES.
-  set(todo_files ${sources})
-  list(REMOVE_ITEM todo_files ${supported_files})
-  list(APPEND LLVM_OPTIONAL_SOURCES ${todo_files})
-  set(sources ${supported_files})
+  if (BUILD_SHARED_LIBS)
+    message(FATAL_ERROR
+      "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
+      )
+  endif()
 
   if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
       "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
@@ -231,19 +236,28 @@ if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
       "${FLANG_OMP_DEVICE_ARCHITECTURES}"
       )
 
-    add_compile_options(-fopenmp -fvisibility=hidden -fopenmp-cuda-mode)
-    add_compile_options(--offload-arch=${compile_for_architectures})
-    # Force LTO for the device part.
-    add_compile_options(-foffload-lto)
+    set(OMP_COMPILE_OPTIONS
+      -fopenmp
+      -fvisibility=hidden
+      -fopenmp-cuda-mode
+      --offload-arch=${compile_for_architectures}
+      # Force LTO for the device part.
+      -foffload-lto
+      )
+    set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
+      "${OMP_COMPILE_OPTIONS}"
+      )
+
+    # Enable "declare target" in the source code.
+    set_source_files_properties(${supported_files}
+      PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
+      )
   else()
     message(FATAL_ERROR
       "Flang runtime build is not supported for these compilers:\n"
       "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
       "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
   endif()
-
-  # Enable "declare target" in the source code.
-  add_compile_definitions(OMP_OFFLOAD_BUILD)
 endif()
 
 add_flang_library(FortranRuntime
diff --git a/flang/unittests/CMakeLists.txt b/flang/unittests/CMakeLists.txt
index 1b4fb986f6e39..72d37ebeb853c 100644
--- a/flang/unittests/CMakeLists.txt
+++ b/flang/unittests/CMakeLists.txt
@@ -1,8 +1,40 @@
+if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
+  # If Fortran runtime is built as CUDA library, the linking
+  # of targets that link FortranRuntime must be done
+  # with CUDA_RESOLVE_DEVICE_SYMBOLS.
+  # CUDA language must be enabled for CUDA_RESOLVE_DEVICE_SYMBOLS
+  # to take effect.
+  enable_language(CUDA)
+endif()
+
 add_custom_target(FlangUnitTests)
 set_target_properties(FlangUnitTests PROPERTIES FOLDER "Flang Unit Tests")
 
+function(add_flang_unittest_offload_properties target)
+  # Set CUDA_RESOLVE_DEVICE_SYMBOLS.
+  if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
+    set_target_properties(${target}
+      PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON
+      )
+  endif()
+  # Enable OpenMP offload during linking. We may need to replace
+  # LINK_OPTIONS with COMPILE_OPTIONS when there are OpenMP offload
+  # unittests.
+  #
+  # FIXME: replace 'native' in --offload-arch option with the list
+  #        of targets that Fortran Runtime was built for.
+  #        Common code must be moved from flang/runtime/CMakeLists.txt.
+  if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
+    set_target_properties(${target}
+      PROPERTIES LINK_OPTIONS
+      "-fopenmp;--offload-arch=native"
+      )
+  endif()
+endfunction()
+
 function(add_flang_unittest test_dirname)
   add_unittest(FlangUnitTests ${test_dirname} ${ARGN})
+  add_flang_unittest_offload_properties(${test_dirname})
 endfunction()
 
 if (CXX_SUPPORTS_SUGGEST_OVERRIDE_FLAG)
@@ -34,6 +66,8 @@ function(add_flang_nongtest_unittest test_name)
   if(NOT ARG_SLOW_TEST)
     add_dependencies(FlangUnitTests ${test_name}${suffix})
   endif()
+
+  add_flang_unittest_offload_properties(${test_name}${suffix})
 endfunction()
 
 add_subdirectory(Optimizer)