<a href="https://colab.research.google.com/github/tianshenbupa/BOOKRecommendationSystem/blob/master/Untitled3_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pycuda.autoinit  # 自动初始化 CUDA 环境
import pycuda.driver as cuda
import numpy as np
from pycuda.compiler import SourceModule

# 定义 CUDA 核函数（GPU 并行计算的核心）
mod = SourceModule("""
    __global__ void add_vectors(float *dest, float *a, float *b) {
        int idx = threadIdx.x + blockDim.x * blockIdx.x;
        dest[idx] = a[idx] + b[idx];
    }
""")

# 准备数据（CPU 端）
n = 1000000  # 向量长度
a = np.random.randn(n).astype(np.float32)
b = np.random.randn(n).astype(np.float32)
result = np.empty_like(a)

# 分配 GPU 内存
a_gpu = cuda.mem_alloc(a.nbytes)
b_gpu = cuda.mem_alloc(b.nbytes)
dest_gpu = cuda.mem_alloc(a.nbytes)

# 将数据从 CPU 传输到 GPU
cuda.memcpy_htod(a_gpu, a)
cuda.memcpy_htod(b_gpu, b)

# 配置线程块和网格（关键优化参数）
block_size = 256  # 每个线程块包含 256 个线程
grid_size = (n + block_size - 1) // block_size  # 计算所需网格数

# 调用核函数执行并行计算
func = mod.get_function("add_vectors")
func(dest_gpu, a_gpu, b_gpu, block=(block_size, 1, 1), grid=(grid_size, 1))

# 将结果从 GPU 复制回 CPU
cuda.memcpy_dtoh(result, dest_gpu)

# 验证结果
assert np.allclose(result, a + b), "GPU 计算结果与 CPU 不一致！"


ModuleNotFoundError: No module named 'pycuda'

In [None]:
pip install pycuda

Collecting pycuda
  Downloading pycuda-2025.1.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/1.7 MB[0m [31m17.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2025.1.1-py3-none-any.whl.metadata (3.0 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2025.1.1-py3-none-any.whl (92 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.8/92.8 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Mako-