In [1]:
import numpy as np
import time
import pathlib as pl
import pandas as pd

# Data Logging Tests

This code compares the performance of different data logging strategies.

In [2]:
numbers = np.random.uniform(low=0, high=4, size=(20_000, 10))
columns = [f'col_{i}' for i in range(10)]

In [3]:
start = time.perf_counter()
dataframe = pd.DataFrame(numbers, columns=columns)
stop = time.perf_counter()
print(stop-start)

0.0004094000000804954


In [4]:
start = time.perf_counter()
dataframe = pd.DataFrame(np.zeros(shape=numbers.shape), columns=columns)
for j, row in enumerate(numbers):
    for i, value in enumerate(row):
        dataframe.loc[j, f'col_{i}'] = value
stop = time.perf_counter()
print(stop-start)

8.755635399999846


In [5]:
start = time.perf_counter()
dataframe = pd.DataFrame(np.zeros(shape=numbers.shape), columns=columns)
for j, row in enumerate(numbers):
    for i, value in enumerate(row):
        dataframe.iloc[j, i] = value
stop = time.perf_counter()
print(stop-start)

6.613807299999962


In [6]:
start = time.perf_counter()
array = np.zeros(shape=numbers.shape)
for j, row in enumerate(numbers):
    for i, value in enumerate(row):
        array[j, i] = value
dataframe = pd.DataFrame(array, columns=columns)
stop = time.perf_counter()
print(stop-start)

0.0633476000000428


In [7]:
start = time.perf_counter()
counter = 0
dataframe = pd.DataFrame(np.zeros(shape=numbers.shape), columns=columns)
for j, row in enumerate(numbers):
    for i, value in enumerate(row):
        counter += 1
stop = time.perf_counter()
print(stop-start)

0.0505671999999322


In [8]:
start = time.perf_counter()
dataframe = pd.DataFrame(np.zeros(shape=numbers.shape), columns=columns)
for j, row in enumerate(numbers):
    dataframe.loc[j] = row
stop = time.perf_counter()
print(stop-start)

0.48541100000011284


In [9]:
start = time.perf_counter()
dataframe = pd.DataFrame(np.zeros(shape=numbers.shape), columns=columns)
for j, row in enumerate(numbers):
    row = {f'col_{i}': value for i, value in enumerate(row)}
    dataframe.loc[j] = row
stop = time.perf_counter()
print(stop-start)

2.8023470000000543


In [10]:
start = time.perf_counter()
rows = []
for j, row in enumerate(numbers):
    rows.append({f'col_{i}': value for i, value in enumerate(row)})
dataframe = pd.DataFrame(rows)
stop = time.perf_counter()
print(stop-start)

0.1100582999999915


In [11]:
start = time.perf_counter()
rows = []
for j, row in enumerate(numbers):
    rows.append({f'col_{i}': value.copy() for i, value in enumerate(row)})
dataframe = pd.DataFrame(rows)
stop = time.perf_counter()
print(stop-start)

0.22888130000001183


In [12]:
start = time.perf_counter()
with open('temp.txt', 'w') as file:
    for row in numbers:
        file.write(' '.join(map(str, row)) + '\n')
stop = time.perf_counter()
pl.Path().absolute().joinpath('temp.txt').unlink()
print(stop-start)

0.18382210000004306
