<a href="https://colab.research.google.com/github/tonystz/cuda/blob/main/poc/gpu-test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Get code

In [None]:
!git clone https://github.com/tonystz/cuda.git

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!python cuda/poc/raw.py

use google drive raw log
[step0]preprocess raw log: total=1153965 skip=12507 1.0102648735046387



###CPU

In [None]:
!python cuda/poc/cpu.py

[step1]reload data: 1.5039465427398682 (1141458,) |S44 1141458 [b'::1-200' b'::1-200' b'::1-404' ... b'180.163.28.55-200'
 b'180.163.28.55-200' b'180.163.28.55-200']
[step2]404 data filter: 0.3014388084411621 202448
[step3]check result: 0.09853696823120117 [('216.244.66.200', 1081), ('217.174.192.132', 632), ('66.249.79.234', 638), ('66.249.79.238', 637), ('66.249.79.236', 645), ('23.96.16.211', 1188), ('47.101.154.149', 816), ('47.101.149.21', 809), ('38.54.24.118', 1987)]


In [None]:
!grep '23.96.16.211' pre.log|grep 404 -c

1188


###GPU initialize

In [None]:
!pip install pycuda # install cuda
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import numpy as np

###GPU

In [38]:
#!grep '404' pre.log
#!nvcc --cubin -arch sm_75 /tmp/tmp91fglzn7/kernel.cu
!cd cuda && git pull
!cd cuda/poc && make

Already up to date.
nvcc -ptx -o kernel.ptx kernel.cu
python gpu.py
8388608
62914560
shape: (1141457,) 1141457 |S44
[b'::1-200' b'::1-404' b'::1-200' ... b'180.163.28.55-200'
 b'180.163.28.55-200' b'180.163.28.55-200'] <memory at 0x7f823b763f40>
(1141458, 1)


### data clean

In [59]:
a=np.load('/content/drive/MyDrive/pre.npy')
import pandas as pd
import numpy as np
p=pd.DataFrame(a)
p.replace(b'', np.nan, inplace=True)
n=p.dropna().to_numpy(dtype=np.string_).flatten()
print(n, n.shape)

#reshape with padding if there are not enough elements
r, c = 1024, 138
np.pad(n, (0, r*c - n.size), 
       mode='constant', constant_values='0.0.0.0:0').reshape(r,c)

[b'::1#1' b'113.116.36.60#12' b'66.240.219.146#3' ... b'49.7.21.99#1'
 b'49.7.20.28#2' b'82.157.8.248#1'] (141067,)


array([[b'::1#1', b'113.116.36.60#12', b'66.240.219.146#3', ...,
        b'114.119.135.224#1', b'110.249.201.140#1', b'93.123.158.33#1'],
       [b'176.107.188.172#1', b'66.249.65.238#2', b'75.4.195.166#3', ...,
        b'45.143.144.187#1', b'222.186.46.200#1', b'110.249.202.46#1'],
       [b'115.218.184.105#1', b'119.123.73.225#1', b'86.98.88.57#6', ...,
        b'207.46.13.160#1', b'13.229.89.192#9', b'51.222.253.18#1'],
       ...,
       [b'110.249.202.105#1', b'217.174.192.132#13', b'92.118.39.82#1',
        ..., b'112.195.153.228#1', b'140.250.153.55#1',
        b'27.158.124.121#1'],
       [b'119.109.82.120#1', b'115.208.46.83#1', b'103.230.15.181#2',
        ..., b'0.0.0.0:0', b'0.0.0.0:0', b'0.0.0.0:0'],
       [b'0.0.0.0:0', b'0.0.0.0:0', b'0.0.0.0:0', ..., b'0.0.0.0:0',
        b'0.0.0.0:0', b'0.0.0.0:0']], dtype='|S19')

## Try Python buffer interface(memoryview)


In [None]:
%%writefile buff.py
import pycuda.driver as cuda
import pycuda.autoinit  # noqa
from pycuda.compiler import SourceModule
import numpy as np


mod = SourceModule("""
    __global__ void bufferInterface(char *a, int len)
    {
      int idx = threadIdx.x + threadIdx.y*4;
      printf("[%d]From raw buffer interface:%s  len=%d\\n",idx,a,len);
      for(int i=0;i<len;i++){
        if(a[i]==0){
          printf("[%d]From raw buffer interface:%s \\n",idx,a+i+1);
        }
        //printf("[%d]From raw buffer interface:%s \\n",idx,a);
      }
    }
    """)

bufferInterface = mod.get_function("bufferInterface")
mv=memoryview(b'hellworld3\0this is a nother string')
mv_gpu=cuda.to_device(mv)
bufferInterface(mv_gpu, np.int32(mv.shape[0]),block=(2, 1, 1), grid=(1, 1), shared=0)

Overwriting buff.py


In [None]:
!python buff.py

[0]From raw buffer interface:hellworld3  len=34
[1]From raw buffer interface:hellworld3  len=34
[0]From raw buffer interface:this is a nother string 
[1]From raw buffer interface:this is a nother string 


##Try cuda-python  usage

In [None]:
!pip install cuda-python

In [None]:
from cuda import cuda

status, pvalue=cuda.cuCtxGetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE)
print("heap size:",status,pvalue)

cuda.cuCtxSetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE,20*1024*1024)

print("heap size:",cuda.cuCtxGetLimit(cuda.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE))

heap size: CUresult.CUDA_SUCCESS 20971520
heap size: (<CUresult.CUDA_SUCCESS: 0>, 20971520)


## Try cuda c usage

In [None]:
%%writefile a.cu
#include <cuda.h>
#include <stdio.h>

int main(int argc, char const *argv[]) {
    // cuda initialization via cudaMalloc
    size_t limit = 0;

    cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize);
    printf("cudaLimitMallocHeapSize: %u\n", (unsigned)limit);
    cudaDeviceGetLimit(&limit, cudaLimitPrintfFifoSize);
    printf("cudaLimitPrintfFifoSize: %u\n", (unsigned)limit);
    cudaDeviceGetLimit(&limit, cudaLimitMallocHeapSize);
    printf("cudaLimitMallocHeapSize: %u\n", (unsigned)limit);

}

Overwriting a.cu


In [None]:
!nvcc a.cu -o query
!nvprof ./query

==45382== NVPROF is profiling process 45382, command: ./query
cudaLimitMallocHeapSize: 8388608
cudaLimitPrintfFifoSize: 1310720
cudaLimitMallocHeapSize: 8388608
==45382== Profiling application: ./query
==45382== Profiling result:
No kernels were profiled.
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
      API calls:   99.86%  111.61ms         3  37.202ms     780ns  111.60ms  cudaDeviceGetLimit
                    0.11%  120.35us       101  1.1910us     135ns  50.351us  cuDeviceGetAttribute
                    0.02%  25.205us         1  25.205us  25.205us  25.205us  cuDeviceGetName
                    0.01%  7.0620us         1  7.0620us  7.0620us  7.0620us  cuDeviceGetPCIBusId
                    0.00%  2.0930us         3     697ns     216ns  1.6050us  cuDeviceGetCount
                    0.00%     975ns         2     487ns     267ns     708ns  cuDeviceGet
                    0.00%     463ns         1     463ns     463ns     463ns  cuDeviceTotalMem
 