Переустановка плагинов (для повторных запусков)

In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

In [8]:
!nvidia-smi

Wed Jan 12 20:50:43 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P8    29W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

# Output would be True if Pytorch is using GPU otherwise it would be False.

True
0
<torch.cuda.device object at 0x7f791481ee10>
1
Tesla K80


Напишем Hello World, чтобы проверить, что плагин работает

In [9]:
%%cu
#include <iostream>
    int
    main()
{
    std::cout << "Hello World\n";
    return 0;
}

Hello World



# Лабораторная работа №3

 Вариант 13. 
Методом Ньютона с точностью  найти корни уравнения 


In [18]:
%%cu
#include <stdio.h>
#include <ctime>

#include <math.h>
#include <cmath>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>

static void HandleError( cudaError_t err,
                         const char *file,
                         int line ) {
  if (err != cudaSuccess) {
      printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
              file, line );
      exit( EXIT_FAILURE );
  }
}

#define E 10e-4
#define BLOCKS 32
#define THREADS 128
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))


__device__ __host__ float func(float x){
  return pow(0.7*M_E,-0.59*x) - x;
}


__device__ __host__ float func_derivative(float x){
  return -1-pow(0.413*M_E,-0.59*x);
}


__device__ __host__ float func_derivative_second(float x){
  return pow(0.24367*M_E,-0.59*x);
}


__global__ void newton_method(float *c, double step, float A){   
  float a = A + blockIdx.x * threadIdx.x * step;
  float b = A + (blockIdx.x * threadIdx.x + 1) * step;
  if (func(a) * func(b) > 0)
      return;
  double calc;
  do{
      calc = calc - func(calc) / func_derivative(calc);
  }while (fabs(func(calc)) >= E);
  c[blockIdx.x * threadIdx.x] = calc;
}


void cpu_execute(){
  int start = clock(), time;
  double c=2; 
  int n=0;
  while (fabs(func(c))>=E)
  {
      c = c-func(c) / func_derivative(c);
      n++;
  }
  time = clock() - start;
  printf("Equation root = %lf\n",c);
 	printf("\nCPU compute time: %.5f microseconds\n\n", time*1000);
}

void gpu_execute(){
    float B = 10, A = 0.01; // common borders

    cudaEvent_t start, stop;
	float gpuTime = 0.0f;
	HANDLE_ERROR(cudaEventCreate(&start));
	HANDLE_ERROR(cudaEventCreate(&stop));
  HANDLE_ERROR(cudaEventRecord(start, 0));

  const int n = THREADS * BLOCKS;
  float step = fabs(A - B) / n;
  thrust::host_vector<float> c (n);
  thrust::device_vector<float> dev_c (n);


  newton_method <<<BLOCKS, THREADS >>> (thrust::raw_pointer_cast(dev_c.data()), step, A);
  c = dev_c;

  HANDLE_ERROR(cudaEventRecord(stop, 0));
	HANDLE_ERROR(cudaEventSynchronize(stop));
  HANDLE_ERROR(cudaEventElapsedTime(&gpuTime, start, stop));
 	printf("\nGPU compute time: %.5f microseconds\n\n", gpuTime);

  for (unsigned int i = 0; i < c.size(); i++)
  {
      if (c[i] > E)
      {
          printf("GPU root %f \n", c[i]);
      }
  }
}

int main(void){
    gpu_execute();
    cpu_execute();
    return 0;
}


GPU compute time: 0.47267 microseconds

GPU root 0.751759 
threads 0 Equation root = 0.752440

CPU compute time: 0.75244 microseconds


