diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 88fedabba27..9f183528719 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -DEXECUTORCH_BUILD_QNN="$QNN" \
-        -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
-        -Bcmake-out .
+        -DQNN_SDK_ROOT="$QNN_SDK_ROOT"
     cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
 }
 
diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md
index 38eb195fcd4..c6f0350fff7 100644
--- a/examples/models/llama/README.md
+++ b/examples/models/llama/README.md
@@ -269,18 +269,7 @@ You can export and run the original Llama 3 8B instruct model.
 
 1. Build executorch with optimized CPU performance as follows. Build options available [here](https://github.com/pytorch/executorch/blob/main/CMakeLists.txt#L59).
     ```
-    cmake -DPYTHON_EXECUTABLE=python \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
-        -DEXECUTORCH_ENABLE_LOGGING=1 \
-        -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-        -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-        -Bcmake-out .
+    cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out
 
     cmake --build cmake-out -j16 --target install --config Release
     ```
@@ -288,14 +277,9 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the
 
 2. Build llama runner.
     ```
-    cmake -DPYTHON_EXECUTABLE=python \
-        -DCMAKE_INSTALL_PREFIX=cmake-out \
+    cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
+        -DBUILD_TESTING=OFF \
         -DCMAKE_BUILD_TYPE=Release \
-        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
-        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-        -DSUPPORT_REGEX_LOOKAHEAD=ON
         -Bcmake-out/examples/models/llama \
         examples/models/llama
 
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
index 05f47e55c74..7f3f8ad1519 100644
--- a/extension/llm/runner/CMakeLists.txt
+++ b/extension/llm/runner/CMakeLists.txt
@@ -44,6 +44,10 @@ target_include_directories(
 add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})
 
 # add tokenizers
+set(SUPPORT_REGEX_LOOKAHEAD ON)
+# llama/runner/CMakeLists.txt builds a shared library libllama_runner.so that
+# transitively depends on tokenizers. Need to build tokenizers with -fPIC.
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 add_subdirectory(
   ${EXECUTORCH_ROOT}/extension/llm/tokenizers
   ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/tokenizers
@@ -54,8 +58,9 @@ set(runner_deps executorch_core extension_module extension_tensor tokenizers)
 target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})
 
 target_include_directories(
-  extension_llm_runner INTERFACE ${_common_include_directories}
-                                 ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
+  extension_llm_runner
+  INTERFACE ${_common_include_directories}
+            ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 
 if(BUILD_TESTING)