In [7]:
import cudf
import time
from tqdm import trange

In [8]:
dtypes = {
    "Stkcd" : "int32",
    "Opnprc": "float32",
    "Hiprc": "float32",
    "Loprc": "float32",
    "Clsprc": "float32"
}
df_list = []
for i in range(6):
    df = cudf.read_csv(f"../../data/TRD_Dalyr{i}.csv", dtype=dtypes)
    df_list.append(df)

df = cudf.concat(df_list, axis=0, ignore_index=True)
df

Unnamed: 0,Stkcd,Trddt,Opnprc,Hiprc,Loprc,Clsprc
0,1,2020-02-19,15.100000,15.370000,15.080000,15.240000
1,1,2020-02-20,15.270000,15.620001,15.100000,15.590000
2,1,2020-02-21,15.490000,15.720000,15.450000,15.580000
3,1,2020-02-24,15.460000,15.460000,15.150001,15.230000
4,1,2020-02-25,15.000000,15.130000,14.780000,15.040000
...,...,...,...,...,...,...
5897412,920128,2025-02-12,30.049999,30.920000,29.379999,30.650000
5897413,920128,2025-02-13,30.299999,30.680000,29.400000,29.400000
5897414,920128,2025-02-14,29.640001,29.640001,28.920000,29.100000
5897415,920128,2025-02-17,29.100000,29.650000,28.900000,29.290001


In [9]:

where_results = {}
# Where on Stkcd (int32)
start_time = time.time()
for i in trange(1000, desc="Where on Stkcd"):
    t = df[df["Stkcd"] <= 20000]
where_results["Stkcd"] = (time.time() - start_time) * 1000

start_time = time.time()
for i in trange(1000, desc="Where on Clsprc"):
    t = df[df["Clsprc"] > 15.0]
where_results["Clsprc"] = (time.time() - start_time) * 1000

print(where_results)

Where on Stkcd: 100%|██████████| 1000/1000 [00:03<00:00, 273.47it/s]
Where on Clsprc: 100%|██████████| 1000/1000 [00:04<00:00, 233.73it/s]

{'Stkcd': 3657.761573791504, 'Clsprc': 4279.566049575806}





In [12]:
def double_value(x):
    return x * 2

apply_results = {}
# Apply on Stkcd (int32)
start_time = time.time()
for i in trange(1, desc="Apply on Stkcd"):
    t = df["Stkcd"].apply(double_value)
    print(t)
apply_results["Stkcd"] = (time.time() - start_time) * 1000

# Apply on Clsprc (float32)
start_time = time.time()
for i in trange(1000, desc="Apply on Opnprc"):
    t = df["Opnprc"].apply(double_value)
apply_results["Opnprc"] = (time.time() - start_time) * 1000

print(apply_results)

Apply on Stkcd: 100%|██████████| 1/1 [00:00<00:00, 90.26it/s]


0                2
1                2
2                2
3                2
4                2
            ...   
5897412    1840256
5897413    1840256
5897414    1840256
5897415    1840256
5897416    1840256
Name: Stkcd, Length: 5897417, dtype: int64


Apply on Opnprc: 100%|██████████| 1000/1000 [00:02<00:00, 465.86it/s]

{'Stkcd': 13.38958740234375, 'Opnprc': 2148.7505435943604}



