- No phoning home is the main goal here. LLM with retrieval augmentation and a local database with zero outside communication.
- First successful run below: loading a local
codegemma:7b-code-fp16(~16 GB) into desktop RAM with default paramaters. Her first response was quite endearing: - Currently working on CUDA integration (see bottom of README)

-
CUDA
-
Local database with retrieval augmentation.
-
Attempt to utilize Qt Networking and Network Authorization components to ensure the application is inaccessible via internet and bluetooth.
-
See
LocalLLM.vcxproj
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp20</LanguageStandard>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<AdditionalIncludeDirectories>$(SolutionDir)llama.cpp\include;$(SolutionDir)llama.cpp\ggml\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(SolutionDir)llama.cpp\build\src;$(SolutionDir)llama.cpp\build\ggml\src;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>llama.lib;ggml.lib;ggml-base.lib;ggml-cpu.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
---
- Building with Visual Studio
Build started at 1:17 AM...
1>------ Build started: Project: LocalLLM, Configuration: Release x64 ------
1> Qt/MSBuild: 3.4.1.0
1> Reading Qt configuration (C:/Qt/6.10.1/msvc2022_64/bin/qmake)
1> Qt: 6.10.1
1> LocalLLM.cpp
1> Generating code
1> Previous IPDB not found, fall back to full compilation.
1> All 172 functions were compiled because no usable IPDB/IOBJ from previous compilation was found.
1> Finished generating code
1> LocalLLM.vcxproj -> C:\...\LocalLLM\x64\Release\LocalLLM.exe
1> 'pwsh.exe' is not recognized as an internal or external command,
1> operable program or batch file.
========== Build: 1 succeeded, 0 failed, 0 up-to-date, 0 skipped ==========
========== Build completed at 1:17 AM and took 01.635 seconds ==========
- The community/open-source version of Qt is included, see (
LocalLLM.vcxproj)
<QtInstall>msvc2022_64</QtInstall>
<QtPathBinaries>C:\Qt\6.10.1\msvc2022_64\bin</QtPathBinaries>
<QtPathLibraryExecutables>C:\Qt\6.10.1\msvc2022_64\modules</QtPathLibraryExecutables>
-
(strategy: if something fails, fix it, then delete the
build-cudadirectory before trying again.) -
VS Developer PowerShell (x64), set:
-
-NoExit -Command "& { Import-Module """$env:VSAPPIDDIR\..\Tools\Microsoft.VisualStudio.DevShell.dll"""; Enter-VsDevShell -SkipAutomaticLocation -SetDefaultWindowTitle -DevCmdArguments '-arch=x64 -host_arch=x64' -InstallPath $env:VSAPPIDDIR\..\..\}"
- In Developer Powershell (x64) run
cl
Microsoft (R) C/C++ Optimizing Compiler Version 19.50.35723 for x64
Copyright (C) Microsoft Corporation. All rights reserved.
usage: cl [ option... ] filename... [ /link linkoption... ]
- Important, set:
$env:VCToolsVersion = "14.44.35207"- Then,
git -clone https://github.com/ggml-org/llama.cpp
- Once completed, move the completed
llama.cppdirectory into the same directory of this repository as a subdirectory. - e.g.:
C:/../repos/LocalLLM/llama.cpp
cd .\llama.cpp
PS C:\...\LocalLLM\llama.cpp> $env:VCToolsVersion = "14.44.35207"
PS C:\...\LocalLLM\llama.cpp> cl
Microsoft (R) C/C++ Optimizing Compiler Version 19.50.35723 for x64
Copyright (C) Microsoft Corporation. All rights reserved.
usage: cl [ option... ] filename... [ /link linkoption... ]
PS C:\...\LocalLLM\llama.cpp> nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Tue_Dec_16_19:27:18_Pacific_Standard_Time_2025
Cuda compilation tools, release 13.1, V13.1.115
Build cuda_13.1.r13.1/compiler.37061995_0
- Build with Ninja via x64 MVSC 2022 and set
$env:VCToolsVersion = "14.44.35207" - If you don't have Ninja, install it. Something like
winget install -e --id Ninja-build.Ninja - This ensures you're able to build with CUDA locally. A lot of suppressions are added to try to find errors instead of a hundred thousand warnings.
cmake -S . -B build-cuda -G Ninja `
-DCMAKE_BUILD_TYPE=Release `
-DGGML_CUDA=ON `
-DCMAKE_C_COMPILER=cl `
-DCMAKE_C_FLAGS="-w" `
-DCMAKE_CXX_FLAGS="-w" `
-DCMAKE_CXX_FLAGS="/W0" `
-DCMAKE_ASM_FLAGS="-W" `
-DCMAKE_ASM_FLAGS="/W0" `
-DCMAKE_CXX_COMPILER=cl `
-DCMAKE_CUDA_FLAGS="--allow-unsupported-compiler -w -Xcompiler /W0"
e.g.:
cmake -S . -B build-cuda -G Ninja `
-DCMAKE_BUILD_TYPE=Release `
-DGGML_CUDA=ON `
-DCMAKE_C_COMPILER=cl `
-DCMAKE_C_FLAGS="-w" `
-DCMAKE_CXX_FLAGS="-w" `
-DCMAKE_CXX_FLAGS="/W0" `
-DCMAKE_ASM_FLAGS="-W" `
-DCMAKE_ASM_FLAGS="/W0" `
-DCMAKE_CXX_COMPILER=cl `
-DCMAKE_CUDA_FLAGS="--allow-unsupported-compiler -w -Xcompiler /W0"
-- The C compiler identification is MSVC 19.50.35723.0
-- The CXX compiler identification is MSVC 19.50.35723.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: C:/Program Files/Microsoft Visual Studio/18/Community/VC/Tools/MSVC/14.50.35717/bin/Hostx64/x64/cl.exe - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: C:/Program Files/Microsoft Visual Studio/18/Community/VC/Tools/MSVC/14.50.35717/bin/Hostx64/x64/cl.exe - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
CMAKE_BUILD_TYPE=Release
-- Found Git: C:/Program Files/Git/cmd/git.exe (found version "2.52.0.windows.1")
-- The ASM compiler identification is MSVC
CMake Warning (dev) at C:/Program Files/CMake/share/cmake-4.2/Modules/CMakeDetermineASMCompiler.cmake:234 (message):
Policy CMP194 is not set: MSVC is not an assembler for language ASM. Run
"cmake --help-policy CMP194" for policy details. Use the cmake_policy
command to set the policy and suppress this warning.
Call Stack (most recent call first):
ggml/CMakeLists.txt:2 (project)
This warning is for project developers. Use -Wno-dev to suppress it.
-- Found assembler: C:/Program Files/Microsoft Visual Studio/18/Community/VC/Tools/MSVC/14.50.35717/bin/Hostx64/x64/cl.exe
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - not found
-- Found Threads: TRUE
-- Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF
-- CMAKE_SYSTEM_PROCESSOR: AMD64
-- CMAKE_GENERATOR_PLATFORM:
-- GGML_SYSTEM_ARCH: x86
-- Including CPU backend
-- Found OpenMP_C: -openmp (found version "2.0")
-- Found OpenMP_CXX: -openmp (found version "2.0")
-- Found OpenMP: TRUE (found version "2.0")
-- x86 detected
-- Performing Test HAS_AVX_1
-- Performing Test HAS_AVX_1 - Success
-- Performing Test HAS_AVX2_1
-- Performing Test HAS_AVX2_1 - Success
-- Performing Test HAS_FMA_1
-- Performing Test HAS_FMA_1 - Success
-- Performing Test HAS_AVX512_1
-- Performing Test HAS_AVX512_1 - Success
-- Adding CPU backend variant ggml-cpu: /arch:AVX512 GGML_AVX512
-- Found CUDAToolkit: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.1/include;C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.1/include/cccl (found version "13.1.115")
-- CUDA Toolkit found
-- The CUDA compiler identification is NVIDIA 13.1.115 with host compiler MSVC 19.50.35723.0
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Check for working CUDA compiler: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v13.1/bin/nvcc.exe - skipped
-- Detecting CUDA compile features
-- Detecting CUDA compile features - done
-- Using CMAKE_CUDA_ARCHITECTURES=native CMAKE_CUDA_ARCHITECTURES_NATIVE=No CUDA devices found.-real
-- Including CUDA backend
-- ggml version: 0.9.5
-- ggml commit: a3e812811-dirty
-- Could NOT find OpenSSL, try to set the path to OpenSSL root folder in the system variable OPENSSL_ROOT_DIR (missing: OPENSSL_CRYPTO_LIBRARY OPENSSL_INCLUDE_DIR)
CMake Warning at vendor/cpp-httplib/CMakeLists.txt:145 (message):
OpenSSL not found, HTTPS support disabled
-- Generating embedded license file for target: common
-- Configuring done (12.0s)
-- Generating done (0.4s)
-- Build files have been written to: C:/.../LocalLLM/llama.cpp/build-cuda
- Navigate to
C:\...\LocalLLM\llama.cpp\ggml\src\ggml-cuda\CMakeLists.txt - If you're using RTX 5090 that means you're using Blackwell, try replacing the relevant section in
C:\...\LocalLLM\llama.cpp\ggml\src\ggml-cuda\CMakeLists.txt:
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
# For RTX 5090, targeting 120a is MANDATORY for block-scaled MMA/MXFP4.
# We use 'set' instead of 'list(APPEND)' to prevent 'sm_120' from leaking in.
set(CMAKE_CUDA_ARCHITECTURES "120a")
endif()
- Open and find this line near the bottom:
target_compile_options(ggml-cuda PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>") - Directyly beneath it add this:
target_compile_options(ggml-cuda PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--diag-suppress=221>
)
-- Troubleshooting in progress.
cmake --build build-cuda --verbose -- *> cmake-build.log