<a href="https://colab.research.google.com/github/tonystz/cuda/blob/main/poc/gpu-test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Get code

In [None]:
!git clone https://github.com/tonystz/cuda.git

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
!python cuda/poc/raw.py

use google drive raw log
[step0]preprocess raw log: total=1153965 skip=12507 2.2242274284362793



###CPU

In [None]:
!python cuda/poc/cpu.py

[step1]reload data: 1.5039465427398682 (1141458,) |S44 1141458 [b'::1-200' b'::1-200' b'::1-404' ... b'180.163.28.55-200'
 b'180.163.28.55-200' b'180.163.28.55-200']
[step2]404 data filter: 0.3014388084411621 202448
[step3]check result: 0.09853696823120117 [('216.244.66.200', 1081), ('217.174.192.132', 632), ('66.249.79.234', 638), ('66.249.79.238', 637), ('66.249.79.236', 645), ('23.96.16.211', 1188), ('47.101.154.149', 816), ('47.101.149.21', 809), ('38.54.24.118', 1987)]


In [None]:
!grep '23.96.16.211' pre.log|grep 404 -c

1188


###GPU initialize

In [None]:
!pip install pycuda # install cuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

###GPU

In [8]:
#!grep '404' pre.log
#!nvcc --cubin -arch sm_75 /tmp/tmp91fglzn7/kernel.cu
!cd cuda && git pull
!cd cuda/poc && make

Already up to date.
nvcc -ptx -o kernel.ptx kernel.cu
python gpu.py
8388608
62914560
shape: (1141457,) 1141457 |S44
[b'::1-200' b'::1-404' b'::1-200' ... b'180.163.28.55-200'
 b'180.163.28.55-200' b'180.163.28.55-200'] <memory at 0x7f852b9f7340>
Thread 0 got pointer: 0x205c00020 heapsize=54789936
Thread:0 Total:66969
[dedupStackShow]Thread 0 addr: 216.244.66.200=1081
[dedupStackShow]Thread 0 addr: 217.174.192.132=632
[dedupStackShow]Thread 0 addr: 66.249.79.234=638
[dedupStackShow]Thread 0 addr: 66.249.79.238=637
[dedupStackShow]Thread 0 addr: 66.249.79.236=645
[dedupStackShow]Thread 0 addr: 23.96.16.211=1188
[dedupStackShow]Thread 0 addr: 47.101.154.149=816
[dedupStackShow]Thread 0 addr: 47.101.149.21=809
[dedupStackShow]Thread 0 addr: 38.54.24.118=1987


## Try Python buffer interface(memoryview)


In [None]:
%%writefile buff.py
import pycuda.driver as cuda
import pycuda.autoinit  # noqa
from pycuda.compiler import SourceModule
import numpy as np


mod = SourceModule("""
    __global__ void bufferInterface(char *a, int len)
    {
      int idx = threadIdx.x + threadIdx.y*4;
      printf("[%d]From raw buffer interface:%s  len=%d\\n",idx,a,len);
      for(int i=0;i<len;i++){
        if(a[i]==0){
          printf("[%d]From raw buffer interface:%s \\n",idx,a+i+1);
        }
        //printf("[%d]From raw buffer interface:%s \\n",idx,a);
      }
    }
    """)

bufferInterface = mod.get_function("bufferInterface")
mv=memoryview(b'hellworld3\0this is a nother string')
mv_gpu=cuda.to_device(mv)
bufferInterface(mv_gpu, np.int32(mv.shape[0]),block=(2, 1, 1), grid=(1, 1), shared=0)

Overwriting buff.py


In [None]:
!python buff.py

[0]From raw buffer interface:hellworld3  len=34
[1]From raw buffer interface:hellworld3  len=34
[0]From raw buffer interface:this is a nother string 
[1]From raw buffer interface:this is a nother string 


##Try cuda-python  usage

In [None]:
!pip install cuda-python

In [None]:
from cuda import cuda

status, pvalue=cuda.cuCtxGetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE)
print("heap size:",status,pvalue)

cuda.cuCtxSetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE,20*1024*1024)

print("heap size:",cuda.cuCtxGetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE))

heap size: CUresult.CUDA_SUCCESS 20971520
heap size: (<CUresult.CUDA_SUCCESS: 0>, 20971520)


## Try cuda c usage

In [None]:
%%writefile a.cu
#include <cuda.h>
#include <stdio.h>

int main(int argc, char const *argv[]) {
    // cuda initialization via cudaMalloc
    size_t limit = 0;

    cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize);
    printf("cudaLimitMallocHeapSize: %u\n", (unsigned)limit);
    cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize);
    printf("cudaLimitPrintfFifoSize: %u\n", (unsigned)limit);
    cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize);
    printf("cudaLimitMallocHeapSize: %u\n", (unsigned)limit);

}

Overwriting a.cu


In [None]:
!nvcc a.cu -o query
!nvprof ./query

==45382== NVPROF is profiling process 45382, command: ./query
cudaLimitMallocHeapSize: 8388608
cudaLimitPrintfFifoSize: 1310720
cudaLimitMallocHeapSize: 8388608
==45382== Profiling application: ./query
==45382== Profiling result:
No kernels were profiled.
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
      API calls:   99.86%  111.61ms         3  37.202ms     780ns  111.60ms  cudaDeviceGetLimit
                    0.11%  120.35us       101  1.1910us     135ns  50.351us  cuDeviceGetAttribute
                    0.02%  25.205us         1  25.205us  25.205us  25.205us  cuDeviceGetName
                    0.01%  7.0620us         1  7.0620us  7.0620us  7.0620us  cuDeviceGetPCIBusId
                    0.00%  2.0930us         3     697ns     216ns  1.6050us  cuDeviceGetCount
                    0.00%     975ns         2     487ns     267ns     708ns  cuDeviceGet
                    0.00%     463ns         1     463ns     463ns     463ns  cuDeviceTotalMem
 