In [1]:
import os
import time
import shutil
from fxqu4nt.utils.common import get_tmp_dir
from fxqu4nt.utils.csv_tick_file import *

CURRENT_DIR = os.getcwd()
DATA_DIR = os.path.join(os.path.dirname(CURRENT_DIR), "data")
csv_file = os.path.join(DATA_DIR, "EURUSD/201001.csv")
tmp_dir = os.path.join(DATA_DIR, "tmp")

## Test splitting on single process

In [2]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
start = time.time()
split_by_month(csv_file, tmp_dir, verbose=True)
end = time.time()
print(f"Single process take time: {end - start: .4f}")
shutil.rmtree(tmp_dir)

2020-05-16T20:55:30 - CsvTickFile - INFO - wid0/split(): Splitting tick data for 201001
2020-05-16T20:55:42 - CsvTickFile - INFO - wid0/split(): Splitting process token 11.9139 seconds


Single process take time:  11.9147


## Test splitting on multiple processes

In [3]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
start = time.time()
parallel_split_by_month(csv_file, tmp_dir, nworkers=8)
end = time.time()
print(f"Multiple processes take time: {end - start: .4f}")
shutil.rmtree(tmp_dir)

Parallel process take time:  1.9328


## Test fixing date on single process

In [3]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
out_file = os.path.join(tmp_dir, "201001.csv")
start = time.time()
fix_date(csv_file, out_file, verbose=True)
end = time.time()
print(f"Single process take time: {end - start: .4f}")
shutil.rmtree(tmp_dir)

2020-05-17T18:19:06 - CsvTickFile - INFO - wid0/split(): Splitting process token 15.9139 seconds, number of lines:597068


Single process take time:  15.9263


## Test fixing date on multiple processes

In [4]:
if not os.path.exists(tmp_dir):
    os.makedirs(tmp_dir)
out_file = os.path.join(tmp_dir, "201001.csv")
start = time.time()
parallel_fix_date(csv_file, out_file, nworkers=8)
end = time.time()
print(f"Multiple processes take time: {end - start: .4f}")
shutil.rmtree(tmp_dir)

2020-05-17T18:19:22 - CsvTickFile - INFO - wid2/split(): Splitting process token 2.3510 seconds, number of lines:74632
2020-05-17T18:19:22 - CsvTickFile - INFO - wid6/split(): Splitting process token 2.3539 seconds, number of lines:74632
2020-05-17T18:19:22 - CsvTickFile - INFO - wid5/split(): Splitting process token 2.3628 seconds, number of lines:74632
2020-05-17T18:19:22 - CsvTickFile - INFO - wid7/split(): Splitting process token 2.3763 seconds, number of lines:74637
2020-05-17T18:19:22 - CsvTickFile - INFO - wid3/split(): Splitting process token 2.3850 seconds, number of lines:74632
2020-05-17T18:19:22 - CsvTickFile - INFO - wid0/split(): Splitting process token 2.4001 seconds, number of lines:74632
2020-05-17T18:19:22 - CsvTickFile - INFO - wid4/split(): Splitting process token 2.4026 seconds, number of lines:74633
2020-05-17T18:19:22 - CsvTickFile - INFO - wid1/split(): Splitting process token 2.4202 seconds, number of lines:74631


Multiple processes take time:  2.5465
