In [None]:
!nvidia-smi

Fri Jun  6 11:20:05 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


In [None]:
!pip install nvcc4jupyter

Collecting nvcc4jupyter
  Downloading nvcc4jupyter-1.2.1-py3-none-any.whl.metadata (5.1 kB)
Downloading nvcc4jupyter-1.2.1-py3-none-any.whl (10 kB)
Installing collected packages: nvcc4jupyter
Successfully installed nvcc4jupyter-1.2.1


In [None]:
%load_ext nvcc4jupyter

Detected platform "Colab". Running its setup...
Source files will be saved in "/tmp/tmpriegoths".


In [None]:
%%cuda

#include <chrono>
#include <iostream>

#define X 0.1
#define Y 7.1
#define a 3.0
#define N 3

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess)
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

__global__ void daxpy(double* vecX,double* vecY,double* vecZ){

	int i = threadIdx.x;
	vecZ[i] = a*vecX[i] + vecY[i];

}

// everything is written in the main function
// in order to maintain an overview over the OpenMP implementation
int main(){

	// allocate memory for the host vectors and fill them
	double* h_vecX = new double[N];
	double* h_vecY = new double[N];
	double* h_vecZ = new double[N];

	for(int i=0;i<N;i++){
		h_vecX[i] = X;
		h_vecY[i] = Y;
	}

	// timer variables
	double dt_cuda;
	std::chrono::time_point<std::chrono::system_clock> start;
	std::chrono::time_point<std::chrono::system_clock> end;

	start = std::chrono::system_clock::now();
	// allocate the device vectors
	double* d_vecX;
	cudaMalloc(&d_vecX,N*sizeof(double));
	double* d_vecY;
	cudaMalloc(&d_vecY,N*sizeof(double));
	double* d_vecZ;
	cudaMalloc(&d_vecZ,N*sizeof(double));

	cudaMemcpy(d_vecX,h_vecX,N*sizeof(double),cudaMemcpyHostToDevice);
	cudaMemcpy(d_vecY,h_vecY,N*sizeof(double),cudaMemcpyHostToDevice);
	cudaMemcpy(d_vecZ,h_vecZ,N*sizeof(double),cudaMemcpyHostToDevice);

	daxpy<<<1,N>>>(d_vecX,d_vecY,d_vecZ);

  gpuErrchk( cudaPeekAtLastError() );

  for(int i=0;i<N;i++){
		h_vecX[i] = 0.0;
	}

	cudaMemcpy(h_vecZ,d_vecZ,N*sizeof(double),cudaMemcpyDeviceToHost);
  cudaMemcpy(h_vecX,d_vecX,N*sizeof(double),cudaMemcpyDeviceToHost);

	end = std::chrono::system_clock::now();

	dt_cuda = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();

	cudaFree(d_vecX);
	cudaFree(d_vecY);
	cudaFree(d_vecZ);

	// print the outcome
	std::cout << "daxpy vector sum with CUDA" << std::endl;
	std::cout << "vectors of size " << N << std::endl;
	std::cout << "CUDA runtime is: " << dt_cuda << " ns" << std::endl;
	std::cout << "final result of the sum is: " << h_vecX[0] << std::endl;

	// free the global memory
	delete[] h_vecX;
	delete[] h_vecY;
	delete[] h_vecZ;

	return 0;
}

GPUassert: the provided PTX was compiled with an unsupported toolchain. /tmp/tmpriegoths/f5e08d5c-e918-4026-ad05-f3c8449706a8/single_file.cu 61

