In [1]:
import warnings
import pandas as pd
import numpy as np
import os
import sys # error msg
import operator # sorting
from math import *

from read_trace import *
from avgblkmodel import *

warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

# gpu info

In [2]:
gtx950 = DeviceInfo()
gtx950.sm_num = 6
gtx950.sharedmem_per_sm = 49152
gtx950.reg_per_sm = 65536
gtx950.maxthreads_per_sm = 2048

# single stream info

In [3]:
data_size = 23000
trace_file = './1cke/trace_' + str(data_size) + '.csv'
df_trace = trace2dataframe(trace_file) # read the trace to the dataframe

In [4]:
df_trace

Unnamed: 0,Start,Duration,Grid X,Grid Y,Grid Z,Block X,Block Y,Block Z,Registers Per Thread,Static SMem,Dynamic SMem,Size,Throughput,Device,Context,Stream,Name
0,ms,us,,,,,,,,B,B,KB,GB/s,,,,
1,526.961828,16.672000,,,,,,,,,,89.843750,5.139256,GeForce GTX 950 (0),1.0,13.0,[CUDA memcpy HtoD]
2,526.979716,16.224000,,,,,,,,,,89.843750,5.281168,GeForce GTX 950 (0),1.0,13.0,[CUDA memcpy HtoD]
3,527.157829,61.056000,90.0,1.0,1.0,256.0,1.0,1.0,28.0,0,0,,,GeForce GTX 950 (0),1.0,13.0,"kernel_vectorAdd(float const *, float const *,..."
4,527.221349,15.904000,,,,,,,,,,89.843750,5.387429,GeForce GTX 950 (0),1.0,13.0,[CUDA memcpy DtoH]


In [5]:
df_single_stream = model_param_from_trace(df_trace)
df_single_stream.head(20)

Unnamed: 0,seq,api_type,start,end,duration
0,0.0,h2d,526.961828,526.9785,0.016672
1,1.0,h2d_h2d_ovhd,526.9785,526.979716,0.001216
2,2.0,h2d,526.979716,526.99594,0.016224
3,3.0,h2d_kern_ovhd,526.99594,527.157829,0.161889
4,4.0,kern,527.157829,527.218885,0.061056
5,5.0,kern_d2h_ovhd,527.218885,527.221349,0.002464
6,6.0,d2h,527.221349,527.237253,0.015904


# model 2 stream case

we need a trace table to track the timing for each kernel

In [7]:
df_single_stream_update = df_single_stream.copy(deep=True)
#df_cke['stream'] = 0 # add stream column with label 0

In [8]:
df_single_stream_update

Unnamed: 0,seq,api_type,start,end,duration
0,0.0,h2d,526.961828,526.9785,0.016672
1,1.0,h2d_h2d_ovhd,526.9785,526.979716,0.001216
2,2.0,h2d,526.979716,526.99594,0.016224
3,3.0,h2d_kern_ovhd,526.99594,527.157829,0.161889
4,4.0,kern,527.157829,527.218885,0.061056
5,5.0,kern_d2h_ovhd,527.218885,527.221349,0.002464
6,6.0,d2h,527.221349,527.237253,0.015904


In [9]:
# deduct the starting timing
offset = df_single_stream_update.start[0]
#print offset
df_single_stream_update.start = df_single_stream_update.start - offset
df_single_stream_update.end = df_single_stream_update.end - offset
print df_single_stream_update

526.961828
   seq       api_type     start       end  duration
0  0.0            h2d  0.000000  0.016672  0.016672
1  1.0   h2d_h2d_ovhd  0.016672  0.017888  0.001216
2  2.0            h2d  0.017888  0.034112  0.016224
3  3.0  h2d_kern_ovhd  0.034112  0.196001  0.161889
4  4.0           kern  0.196001  0.257057  0.061056
5  5.0  kern_d2h_ovhd  0.257057  0.259521  0.002464
6  6.0            d2h  0.259521  0.275425  0.015904


In [10]:
stream_num = 2

df_cke_list = []
for x in range(stream_num):
    df_cke_list.append(df_single_stream_update.copy(deep=True))

In [11]:
# 1st stream strace


In [12]:
df_cke_list[0]

Unnamed: 0,seq,api_type,start,end,duration
0,0.0,h2d,0.0,0.016672,0.016672
1,1.0,h2d_h2d_ovhd,0.016672,0.017888,0.001216
2,2.0,h2d,0.017888,0.034112,0.016224
3,3.0,h2d_kern_ovhd,0.034112,0.196001,0.161889
4,4.0,kern,0.196001,0.257057,0.061056
5,5.0,kern_d2h_ovhd,0.257057,0.259521,0.002464
6,6.0,d2h,0.259521,0.275425,0.015904


In [13]:
# 2nd stream trace
df_cke_list[1]

Unnamed: 0,seq,api_type,start,end,duration
0,0.0,h2d,0.0,0.016672,0.016672
1,1.0,h2d_h2d_ovhd,0.016672,0.017888,0.001216
2,2.0,h2d,0.017888,0.034112,0.016224
3,3.0,h2d_kern_ovhd,0.034112,0.196001,0.161889
4,4.0,kern,0.196001,0.257057,0.061056
5,5.0,kern_d2h_ovhd,0.257057,0.259521,0.002464
6,6.0,d2h,0.259521,0.275425,0.015904


In [14]:
df_new_stream = df_single_stream.copy()
df_new_stream['stream'] = 1

In [15]:
df_cke.append(df_new_stream)

NameError: name 'df_cke' is not defined

In [None]:
# compute the time for the init data transfer
H2D_H2D_OVLP_TH = 3.158431

# we need to know the data transfer time before the 1st kernel call
h2d_first_ind = -1
h2d_last_ind = -1
for index, row in df_single_stream.iterrows():
    if row['api_type'] == 'h2d':
        h2d_first_ind = 0
        
    if row['api_type'] == 'h2d_kern_ovhd':
        h2d_last_ind = index -1
        break;
        
h2d_start = df_single_stream.loc[h2d_first_ind]['start']
h2d_finish = df_single_stream.loc[h2d_last_ind]['end']  
h2d_duration = h2d_finish - h2d_start
print("h2d : {} - {} = {}".format(h2d_start, h2d_finish, h2d_duration))

# check whether we need to start second stream data transfer till the previous one finishes
start_stream_after_prev_h2d = 1
if h2d_duration < H2D_H2D_OVLP_TH:
    start_stream_after_prev_h2d = 0
    
if start_stream_after_prev_h2d:
    current_stream_start = h2d_finish
    
    
#df_single_stream.loc[0]['api_type']