In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!nvidia-smi

Fri Jan 24 16:15:50 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!nvcc --version


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-b9l7yequ
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-b9l7yequ
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 28f872a2f99a1b201bcd0db14fdbc5a496b9bfd7
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: nvcc4jupyter
  Building wheel for nvcc4jupyter (pyproject.toml) ... [?25l[?25hdone
  Created wheel for nvcc4jupyter: filename=nvcc4jupyter-1.2.1-py3-none-any.whl size=10742 sha256=fdb30b442a03a2b978d94f242f4f3839c3b517111d4c5bcc31496f40b5aacbba
  Stored in directory: /tmp/pip-ephem-wheel-cache-zpgbkc0b/wheels/ef/1d/c6/f7e47f1aa1bc9d05c4120d94f90a79cf28603ef343b0dd43ff
Successfully bu

In [None]:
%load_ext nvcc4jupyter

Detected platform "Colab". Running its setup...
Source files will be saved in "/tmp/tmpreh7t1et".


In [None]:
%%writefile concurrent.cu
#include <cuda_runtime.h>
#include <iostream>

#define N 1
const float alertThreshold = 30.0;  // Alert threshold in m
const float brakingThreshold = 15.0; // Braking threshold in m
__global__ void receiveData(float *data) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    data[idx]--;
    printf("Distance: %f m\n", data[idx]);

}

__global__ void monitorDistance(float *data) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (data[idx] > alertThreshold) {
        printf("Safe\n");
    } else if ((data[idx] > brakingThreshold) & (data[idx] <= alertThreshold)){
      printf("Obstacle detected with distance: %f m\n", data[idx]);
    } else{
      printf("Warning: Obstacle close with distance: %f m\n", data[idx]);
    }
}

int main() {
    float *d_data1; //*d_data2;
    float *h_data = new float[N];

    for (int i = 0; i < N; ++i) {
        h_data[i] = 40;
    }

    cudaMalloc(&d_data1, N * sizeof(float));
    //cudaMalloc(&d_data2, N * sizeof(float));

    cudaStream_t stream1, stream2;
    cudaStreamCreate(&stream1);
    cudaStreamCreate(&stream2);

    cudaMemcpyAsync(d_data1, h_data, N * sizeof(float), cudaMemcpyHostToDevice, stream1);
    //cudaMemcpyAsync(d_data2, h_data, N * sizeof(float), cudaMemcpyHostToDevice, stream2);

    int threadsPerBlock = 1;
    int blocksPerGrid = 1;
    for(int i=0; i<40; i++){
      receiveData<<<blocksPerGrid, threadsPerBlock, 0, stream1>>>(d_data1);
      monitorDistance<<<blocksPerGrid, threadsPerBlock, 0, stream2>>>(d_data1);

    }


    cudaMemcpyAsync(h_data, d_data1, N * sizeof(float), cudaMemcpyDeviceToHost, stream1);
    //cudaMemcpyAsync(h_data, d_data2, N * sizeof(float), cudaMemcpyDeviceToHost, stream2);

    cudaStreamSynchronize(stream1);
    cudaStreamSynchronize(stream2);



    cudaFree(d_data1);
    //cudaFree(d_data2);
    cudaStreamDestroy(stream1);
    cudaStreamDestroy(stream2);
    delete[] h_data;

    return 0;
}


Overwriting concurrent.cu


In [None]:
!nvcc --default-stream per-thread ./concurrent.cu -o concurrent
!./concurrent

Distance: 39.000000 m
Safe
Distance: 38.000000 m
Safe
Distance: 37.000000 m
Safe
Distance: 36.000000 m
Safe
Distance: 35.000000 m
Safe
Distance: 34.000000 m
Safe
Safe
Distance: 33.000000 m
Safe
Distance: 32.000000 m
Safe
Distance: 31.000000 m
Safe
Distance: 30.000000 m
Obstacle detected with distance: 30.000000 m
Distance: 29.000000 m
Obstacle detected with distance: 29.000000 m
Distance: 28.000000 m
Distance: 27.000000 m
Obstacle detected with distance: 27.000000 m
Distance: 26.000000 m
Obstacle detected with distance: 26.000000 m
Distance: 25.000000 m
Obstacle detected with distance: 25.000000 m
Distance: 24.000000 m
Distance: 23.000000 m
Obstacle detected with distance: 23.000000 m
Distance: 22.000000 m
Obstacle detected with distance: 22.000000 m
Distance: 21.000000 m
Obstacle detected with distance: 21.000000 m
Distance: 20.000000 m
Distance: 19.000000 m
Obstacle detected with distance: 19.000000 m
Distance: 18.000000 m
Obstacle detected with distance: 18.000000 m
Distance: 17.000

In [None]:
!nvprof ./concurrent

==8037== NVPROF is profiling process 8037, command: ./concurrent
Distance: 39.000000 m
Safe
Safe
Distance: 38.000000 m
Safe
Distance: 37.000000 m
Safe
Distance: 36.000000 m
Safe
Distance: 35.000000 m
Safe
Distance: 34.000000 m
Safe
Safe
Distance: 33.000000 m
Safe
Distance: 32.000000 m
Safe
Distance: 31.000000 m
Safe
Distance: 30.000000 m
Obstacle detected with distance: 30.000000 m
Distance: 29.000000 m
Obstacle detected with distance: 29.000000 m
Distance: 28.000000 m
Distance: 27.000000 m
Obstacle detected with distance: 27.000000 m
Distance: 26.000000 m
Obstacle detected with distance: 26.000000 m
Distance: 25.000000 m
Obstacle detected with distance: 25.000000 m
Distance: 24.000000 m
Distance: 23.000000 m
Obstacle detected with distance: 23.000000 m
Distance: 22.000000 m
Obstacle detected with distance: 22.000000 m
Distance: 21.000000 m
Obstacle detected with distance: 21.000000 m
Distance: 20.000000 m
Distance: 19.000000 m
Obstacle detected with distance: 19.000000 m
Distance: 18.

In [None]:
!nvcc -Xcompiler="-fopenmp" -arch=sm_75 -o DPS DPS.cu
!OMP_NUM_THREADS=4
!./DPS

[01m[Kcc1plus:[m[K [01;31m[Kfatal error: [m[KDPS.cu: No such file or directory
compilation terminated.
/bin/bash: line 1: ./DPS: No such file or directory


In [None]:
!nvcc concurrent_tasks.cu -o concurrent_tasks
!./concurrent_tasks

Distance is: 0
Distance is: 1
Distance is: 2
Distance is: 3
Distance is: 4
Distance is: 5
Distance is: 6
Distance is: 7
Distance is: 8
Distance is: 9
Distance is: 10
Distance is: 11
Distance is: 12
Distance is: 13
Distance is: 14
Distance is: 15
Distance is: 16
Distance is: 17
Distance is: 18
Distance is: 19
Distance is: 20
Distance is: 21
Distance is: 22
Distance is: 23
Distance is: 24
Distance is: 25
Distance is: 26
Distance is: 27
Distance is: 28
Distance is: 29
Distance is: 30
Distance is: 31
Distance is: 32
Distance is: 33
Distance is: 34
Distance is: 35
Distance is: 36
Distance is: 37
Distance is: 38
Distance is: 39
Distance is: 40
Distance is: 41
Distance is: 42
Distance is: 43
Distance is: 44
Distance is: 45
Distance is: 46
Distance is: 47
Distance is: 48
Distance is: 49
Distance is: 50
Distance is: 51
Distance is: 52
Distance is: 53
Distance is: 54
Distance is: 55
Distance is: 56
Distance is: 57
Distance is: 58
Distance is: 59
Distance is: 60
Distance is: 61
Distance is: 62
Di