/
df_op_benchmark.py
66 lines (46 loc) · 1.98 KB
/
df_op_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import argparse
import os
import pandas as pd
from utils import time_logger
import numpy as np
parser = argparse.ArgumentParser(description='arithmetic benchmark')
parser.add_argument('--path', dest='path', help='path to the csv data file')
parser.add_argument('--logfile', dest='logfile', help='path to the log file')
args = parser.parse_args()
file = args.path
file_size = os.path.getsize(file)
logging.basicConfig(filename=args.logfile, level=logging.INFO)
df = pd.read_csv(file)
num_rows, num_cols = df.shape
new_row = np.random.randint(0, 100, size=num_cols)
new_col = np.random.randint(0, 100, size=num_rows)
def rand_row_loc():
return np.random.randint(0, num_rows)
def rand_col_loc():
return np.random.randint(0, num_cols)
# row/col r/w
with time_logger("read a column: {}; Size: {} bytes".format(file, file_size)):
df.iloc[:, rand_col_loc()]
with time_logger("read a row: {}; Size: {} bytes".format(file, file_size)):
df.iloc[rand_row_loc(), :]
with time_logger("write a column: {}; Size: {} bytes".format(file, file_size)):
df.iloc[:, rand_col_loc()] = new_col
with time_logger("write a row: {}; Size: {} bytes".format(file, file_size)):
df.iloc[rand_row_loc(), :] = new_row
# element r/w
with time_logger("read an element: {}; Size: {} bytes".format(file,
file_size)):
df.iloc[rand_row_loc(), rand_col_loc()]
with time_logger("write an element: {}; Size: {} bytes".format(
file, file_size)):
df.iloc[rand_row_loc(), rand_col_loc()] = np.random.randint(0, 100)
# appending
with time_logger("append a row: {}; Size: {} bytes".format(file, file_size)):
df.append(pd.Series(new_row), ignore_index=True)
with time_logger("append a column: {}; Size: {} bytes".format(file,
file_size)):
df['new'] = new_col