<a href="https://colab.research.google.com/github/semenovi/cuda-practice/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [36]:
%%writefile cudabasic.cu
#include <stdio.h>
#include <cuda_runtime.h>
__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements)
{
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < numElements)
  {
    C[i] = A[i] + B[i];
  }
}


__global__ void isMatrixDiagonal ( float * A, int numElements, int * r )
{
  int i = blockDim.x * blockIdx.x + threadIdx.x;
  if (i < numElements)
  {
    if (sqrt( (float) i) !=  (int) sqrt( (float) i))
    {
      if (A[i] != 0)
      {
        r[0] = 0;
      }
    }
  }
    
}




int main(void)
{
  // Error code to check return values for CUDA calls

  cudaError_t err = cudaSuccess;

  // Print the vector length to be used, and compute its size
  //Выведите длину вектора, который будет использоваться, и вычислите его размер

  int numElements = 62500;
  size_t size = numElements * sizeof(float);
  printf("[Vector addition of %d elements]\n", numElements);

  // Allocate the host input vector A
  //Выделите входной вектор хоста A

  float *h_A = (float *)malloc(size);

  int *h_r = (int *)malloc(sizeof(int));

  // Verify that allocations succeeded
  //Убедитесь, что распределение прошло успешно

  if (h_A == NULL)
  {
    fprintf(stderr, "Failed to allocate host vectors!\n");
    exit(EXIT_FAILURE);
  }
  // Initialize the host input vectors
  for (int i = 0; i < numElements; ++i)
  {
    h_A[i] = rand()/(float)RAND_MAX;
  }

  h_r[0] = 1;

  // Allocate the device input vector A
  //Выделите входной вектор устройства A

  float *d_A = NULL;
  err = cudaMalloc((void **)&d_A, size);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  int *d_r = NULL;
  err = cudaMalloc((void **)&d_r, sizeof(int));
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to allocate device r (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  // Copy the host input vector A in host memory to the device input vectors in device memory
  // Скопируйте входной вектор хоста A из памяти хоста в входной вектор устройства в памяти устройства

  printf("Copy input data from the host memory to the CUDA device\n");
  err = cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  printf("Copy input data from the host memory to the CUDA device\n");
  err = cudaMemcpy(d_r, h_r, size, cudaMemcpyHostToDevice);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to copy r from host to device (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }
  
  // Launch the Vector Add CUDA Kernel
  // Запустите Векторное ядро Add CUDA
  int threadsPerBlock = 256;
  int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
  printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
  isMatrixDiagonal<<<blocksPerGrid, threadsPerBlock>>>(d_A, numElements, d_r);
  err = cudaGetLastError();
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to launch isMatrixDiagonal kernel (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  // Copy the device result vector in device memory to the host result vector in host memory.
  // Скопируйте вектор результатов устройства из памяти устройства в вектор результатов хоста в памяти хоста.

  printf("Copy output data from the CUDA device to the host memory\n");
  err = cudaMemcpy(h_r, d_r, size, cudaMemcpyDeviceToHost);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to copy int r from device to host (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }
  // Verify that the result vector is correct
  int *ch_r = (int *)malloc(sizeof(int));
  int ii = 0;
  for (; ii < numElements; ++ii)
  {
    if (sqrt( (float) ii) !=  (int) sqrt( (float) ii))
    {
      if (h_A[ii] != 0)
      {
        ch_r[0] = 0;
        break;
      }
    }
  }
  if (ch_r[0] != h_r[0])
  {
    fprintf(stderr, "Result verification failed at element %d!\n", ii);
    exit(EXIT_FAILURE);
  }
  printf("Test PASSED\n");

  // Free device global memory
  // Свободная глобальная память устройства

  err = cudaFree(d_A);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }
  err = cudaFree(d_r);
  if (err != cudaSuccess)
  {
    fprintf(stderr, "Failed to free device r (error code %s)!\n", cudaGetErrorString(err));
  exit(EXIT_FAILURE);
  // Free host memory
  // Свободная память хоста
  free(h_A);
  free(h_r);
  printf("Done\n");
  return 0;
}
}

Overwriting cudabasic.cu


In [37]:
!nvcc -o cudabasic cudabasic.cu

In [38]:
!./cudabasic

[Vector addition of 62500 elements]
Copy input data from the host memory to the CUDA device
Copy input data from the host memory to the CUDA device
Failed to copy r from host to device (error code invalid argument)!


/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////