# **Monta Google drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!!!Crea un collegamento alla directory condivisa nel vostro drive

In [None]:
%cd drive/MyDrive/STUDI/UNISA/CommonAssignment3/

# **Set Up CUDA**

Version Ubuntu, GPU version, GPU type, Plugin

In [None]:
!lsb_release --all
!nvcc --version
!nvidia-smi 
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin


## Librerie per la K80


In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1710-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2
!nvcc --version

## Codice di test, risultato atteso = 8

In [None]:
%%cu
#include <stdio.h>
#include <stdlib.h>
__global__ void add(int *a, int *b, int *c) {
*c = *a + *b;
}
int main() {
int a, b, c;
// host copies of variables a, b & c
int *d_a, *d_b, *d_c;
// device copies of variables a, b & c
int size = sizeof(int);
// Allocate space for device copies of a, b, c
cudaMalloc((void **)&d_a, size);
cudaMalloc((void **)&d_b, size);
cudaMalloc((void **)&d_c, size);
// Setup input values  
c = 0;
a = 3;
b = 5;
// Copy inputs to device
cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);
  cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
// Launch add() kernel on GPU
add<<<1,1>>>(d_a, d_b, d_c);
// Copy result back to host
cudaError err = cudaMemcpy(&c, d_c, size, cudaMemcpyDeviceToHost);
  if(err!=cudaSuccess) {
      printf("CUDA error copying to Host: %s\n", cudaGetErrorString(err));
  }
printf("result is %d\n",c);
// Cleanup
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}

# **Caratteristiche Macchina**


## Caratteristiche scheda

In [None]:
!nvidia-smi 

## Caratteristiche GPU

In [None]:
%%cu

#include <stdio.h>
#include <stdlib.h>

void deviceQuery()
{
  cudaDeviceProp prop;
  int nDevices=0, i;
  cudaError_t ierr;

  ierr = cudaGetDeviceCount(&nDevices);
  if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); }



  for( i = 0; i < nDevices; ++i )
  {
     ierr = cudaGetDeviceProperties(&prop, i);
     printf("Device number: %d\n", i);
     printf("  Device name: %s\n", prop.name);
     printf("  Compute capability: %d.%d\n\n", prop.major, prop.minor);
     
     printf("  Clock Rate: %d kHz\n", prop.clockRate);
     printf("  Total SMs: %d \n", prop.multiProcessorCount);
     printf("  Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor);
     printf("  Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor);
     printf("  Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor);
     printf("  L2 Cache Size: %d bytes\n", prop.l2CacheSize);
     printf("  Total Global Memory: %lu bytes\n", prop.totalGlobalMem);
     printf("  Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate);
     
     
     printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
     printf("  Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]);
     printf("  Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]);
     printf("  Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]);

     printf("  Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]);
     printf("  Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]);
     printf("  Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]);     
     
     printf("  Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock);
     printf("  Registers Per Block: %d 32-bit\n", prop.regsPerBlock);
     printf("  Warp size: %d\n\n", prop.warpSize);

  }
}

int main() {
    deviceQuery();
}

In [None]:
%cd /usr/local/cuda/samples
%cd 1_Utilities/bandwidthTest/
!make
!./bandwidthTest --mode-range --start=1000 --end=1000000 --increment=100000

## Caratteristiche CPU

In [None]:
!cat /proc/cpuinfo


## Caratteristiche Memoria


In [None]:
!cat /proc/meminfo

## Disk Info




In [None]:
!df -h

# **Common Assignment 3**

## Global


In [None]:
%cd /content/drive/MyDrive/STUDI/UNISA/CommonAssignment3/

Ezecuzione con test senza creazione csv

In [None]:
!nvcc -o global global.cu
!./global 10000 128
!./global 10000 256
!./global 10000 512
!./global 10000 1024

In [None]:
!nvprof --print-gpu-trace ./global 100000000 1024 --benchmark 

In [None]:
!nvprof --metrics inst_integer ./global 100000000 1024

Ezecuzione con creazione csv

In [None]:
!nvcc -o global_custom global_custom.cu
!./global_custom 10000000 1024

## Shared

In [None]:
%cd /content/drive/MyDrive/STUDI/UNISA/CommonAssignment3-Team02/src/

In [None]:
!nvcc -o shared shared.cu
!./shared 100000 1024

In [None]:
!nvcc -o shared shared.cu
!./shared 100000 1024
!./shared 100000 512
!./shared 100000 256
!./shared 100000 128

In [None]:
!nvprof --print-gpu-trace ./shared 100000000 1024 --benchmark 

In [None]:
!nvprof --metrics inst_integer ./shared 100000000 1024

Ezecuzione con creazione csv

In [None]:
!nvcc -o shared_custom shared_custom.cu
!./shared_custom 10000000 1024

## Texture

In [None]:
%cd /content/drive/MyDrive/STUDI/UNISA/CommonAssignment3/

In [None]:
!nvcc -arch=sm_37 -o texture texture.cu
!./texture 100000 1024

In [None]:
!nvcc -arch=sm_37 -o texture texture.cu
!./texture 100000 1024
!./texture 100000 512
!./texture 100000 256
!./texture 100000 128

In [None]:
!nvprof --print-gpu-trace ./texture 100000000 1024 --benchmark 

In [None]:
!nvprof --metrics inst_integer ./texture 100000000 1024

Ezecuzione con creazione del csv

In [None]:
!nvcc -arch=sm_37 -o texture_custom texture_custom.cu
!./texture_custom 10000000 1024

# ***Analisi***

In [None]:
%cd /content/drive/MyDrive/STUDI/UNISA/CommonAssignment3/

In [None]:
!nvcc -o ./global_custom ./global_custom.cu
!./global_custom 102400000 1024
!nvcc -o ./shared_custom ./shared_custom.cu
!./shared_custom 102400000 1024
!nvcc -arch=sm_37 -o ./texture_custom ./texture_custom.cu
!./texture_custom 102400000 1024

!nvcc -o ./global_custom ./global_custom.cu
!./global_custom 102400000 512
!nvcc -o ./shared_custom ./shared_custom.cu
!./shared_custom 102400000 512
!nvcc -arch=sm_37 -o ./texture_custom ./texture_custom.cu
!./texture_custom 102400000 512

!nvcc -o ./global_custom ./global_custom.cu
!./global_custom 102400000 256
!nvcc -o ./shared_custom ./shared_custom.cu
!./shared_custom 102400000 256
!nvcc -arch=sm_37 -o ./texture_custom ./texture_custom.cu
!./texture_custom 102400000 256

!nvcc -o ./global_custom ./global_custom.cu
!./global_custom 102400000 128
!nvcc -o ./shared_custom ./shared_custom.cu
!./shared_custom 102400000 128
!nvcc -arch=sm_37 -o ./texture_custom ./texture_custom.cu
!./texture_custom 102400000 128

In [None]:
!nvcc -o ./global_custom ./global_custom.cu
!nvcc -o ./shared_custom ./shared_custom.cu
!nvcc -arch=sm_37 -o ./texture_custom ./texture_custom.cu
for _ in range(100):
  !./global_custom 102400000 1024
  !./shared_custom 102400000 1024
  !./texture_custom 102400000 1024

for _ in range(100):
  !./global_custom 102400000 512
  !./shared_custom 102400000 512
  !./texture_custom 102400000 512

for _ in range(100):
  !./global_custom 102400000 256
  !./shared_custom 102400000 256
  !./texture_custom 102400000 256

for _ in range(100):
  !./global_custom 102400000 128
  !./shared_custom 102400000 128
  !./texture_custom 102400000 128

In [None]:
!nvcc -o global_custom global_custom.cu
!nvcc -o shared_custom shared_custom.cu
!nvcc -arch=sm_37 -o texture_custom texture_custom.cu
!echo "==============================================================GLOBAL====================================================================================="
!nvprof --metrics inst_integer ./global_custom 102400000 1024
!nvprof --metrics inst_integer ./global_custom 102400000 512
!nvprof --metrics inst_integer ./global_custom 102400000 256
!nvprof --metrics inst_integer ./global_custom 102400000 128
!echo "==============================================================SHARED====================================================================================="
!nvprof --metrics inst_integer ./shared_custom 102400000 1024
!nvprof --metrics inst_integer ./shared_custom 102400000 512
!nvprof --metrics inst_integer ./shared_custom 102400000 256
!nvprof --metrics inst_integer ./shared_custom 102400000 128
!echo "==============================================================TEXTURE====================================================================================="
!nvprof --metrics inst_integer ./texture_custom 102400000 1024
!nvprof --metrics inst_integer ./texture_custom 102400000 512
!nvprof --metrics inst_integer ./texture_custom 102400000 256
!nvprof --metrics inst_integer ./texture_custom 102400000 128