diff --git a/ChangeLog b/ChangeLog
index c55cb871e9..0de579ecf8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+Tue Sep 16 2025 Łukasz Stolarczuk <lukasz.stolarczuk@intel.com>
+
+	* Version 1.0.3
+
+	This patch release contains following change:
+	- load libcuda.so.1 instead of libcuda.so on linux (#1518)
+
+Wed Sep 03 2025 Łukasz Stolarczuk <lukasz.stolarczuk@intel.com>
+
+	* Version 1.0.2
+
+	This patch release contains following change:
+	- initialize hwloc topology only before first fork, not always (#1509)
+
 Fri Aug 08 2025 Łukasz Stolarczuk <lukasz.stolarczuk@intel.com>
 
 	* Version 1.0.1
diff --git a/src/libumf.c b/src/libumf.c
index 9df7ee29cd..4ccde9bb4f 100644
--- a/src/libumf.c
+++ b/src/libumf.c
@@ -40,6 +40,13 @@ static umf_ctl_node_t CTL_NODE(umf)[] = {CTL_CHILD(provider), CTL_CHILD(pool),
 
 void initialize_global_ctl(void) { CTL_REGISTER_MODULE(NULL, umf); }
 
+// Benchmarks may fork multiple times and topology init is slow.
+// Init topology before fork (if not already) so children don't repeat it.
+// TODO: This is a hack. Better solution is needed.
+#if !defined(_WIN32) && !defined(UMF_NO_HWLOC)
+static void atfork_prepare(void) { umfGetTopologyReduced(); }
+#endif
+
 umf_result_t umfInit(void) {
     utils_init_once(&initMutexOnce, initialize_init_mutex);
 
@@ -74,11 +81,11 @@ umf_result_t umfInit(void) {
     if (TRACKER) {
         LOG_DEBUG("UMF library initialized");
     }
-#if !defined(UMF_NO_HWLOC)
-    // some benchmarks uses multiple forks, and topology initialization is very slow
-    // so if we initialize topology before the first fork, we can get significant performance gain.
-    umfGetTopologyReduced();
+
+#if !defined(_WIN32) && !defined(UMF_NO_HWLOC)
+    pthread_atfork(atfork_prepare, NULL, NULL);
 #endif
+
     return UMF_RESULT_SUCCESS;
 }
 
diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c
index 983be6b55a..953876fc12 100644
--- a/src/provider/provider_cuda.c
+++ b/src/provider/provider_cuda.c
@@ -149,7 +149,7 @@ static void init_cu_global_state(void) {
 #ifdef _WIN32
     const char *lib_name = "nvcuda.dll";
 #else
-    const char *lib_name = "libcuda.so";
+    const char *lib_name = "libcuda.so.1";
 #endif
     // The CUDA shared library should be already loaded by the user
     // of the CUDA provider. UMF just want to reuse it
diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp
index 3e81c184ff..4678ea4acd 100644
--- a/test/providers/cuda_helpers.cpp
+++ b/test/providers/cuda_helpers.cpp
@@ -110,7 +110,7 @@ int InitCUDAOps() {
 #ifdef _WIN32
     const char *lib_name = "nvcuda.dll";
 #else
-    const char *lib_name = "libcuda.so";
+    const char *lib_name = "libcuda.so.1";
 #endif
     // CUDA symbols
 #if OPEN_CU_LIBRARY_GLOBAL