In [None]:
%%writefile addb.cu
#include<stdio.h>
__global__ void add( int* a, int* b, int* c ) {
c[blockIdx.x] = a[blockIdx.x] + b[blockIdx.x];
}
#define N 512
int main( void ) {
int *a, *b, *c; // host copies of a, b, c
int *dev_a, *dev_b, *dev_c; // device copies of a, b, c
int size = N *sizeof( int); // we need space for 512 integers
// allocate device copies of a, b, c
cudaMalloc( (void**)&dev_a, size );
cudaMalloc( (void**)&dev_b, size );
cudaMalloc( (void**)&dev_c, size );
a = (int*)malloc( size );
b = (int*)malloc( size );
c = (int*)malloc( size );
*a = 2;
*b = 7;
// copy inputs to device
cudaMemcpy( dev_a, a, size, cudaMemcpyHostToDevice);
cudaMemcpy( dev_b, b, size, cudaMemcpyHostToDevice);
// launch add() kernel with N parallel blocks
add<<< N, 1 >>>( dev_a, dev_b, dev_c);
// copy device result back to host copy of c
cudaMemcpy( c, dev_c, size, cudaMemcpyDeviceToHost);
printf("Sum=%d\n", *c);
free( a ); free( b ); free( c );
cudaFree( dev_a);
cudaFree( dev_b);
cudaFree( dev_c);
return 0;
}

Overwriting addb.cu


In [None]:
!nvcc addb.cu -o addb

In [None]:
!./addb

Sum=9


In [None]:
%%writefile vecmul.cu
#include<stdio.h>
#include<cuda.h>
__global__ void VecMul(float* A, float* B, float* C, int N)
{
	int i = blockDim.x * blockIdx.x + threadIdx.x;

	if(i < N)
		C[i] = A[i]*B[i];
}
int main()
{
	int i, N = 10;
	size_t size = N * sizeof(float);

	// Allocating host and initializing
	float A[N],B[N],C[N];
	for(i=0;i<N;i++) {
		A[i] = B[i] = i;
	}

	// Allocating device and copying to device
	float *d_A, *d_B, *d_C;
	cudaMalloc((void **)&d_A, size);
	cudaMalloc((void **)&d_B, size);
	cudaMalloc((void **)&d_C, size);

	cudaMemcpy(d_A, A, size, cudaMemcpyHostToDevice);
	cudaMemcpy(d_B, B, size, cudaMemcpyHostToDevice);

	// Invoking kernel
	int threadsPerBlock = 8;
	int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

	VecMul<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);

	// Copy result from device to host
	cudaMemcpy(C, d_C, size, cudaMemcpyDeviceToHost);

	for(i=0;i<N;i++)
		printf("%f\n", C[i]);
}

Writing vecmul.cu


In [None]:
!nvcc vecmul.cu -o vecmul

In [None]:
!./vecmul

0.000000
1.000000
4.000000
9.000000
16.000000
25.000000
36.000000
49.000000
64.000000
81.000000
