In [1]:
# Comparing timing.  
import time
import numpy as np
from sklearn.linear_model import LinearRegression
from running_linear_regression import RunningLinearRegression

b0, b1 = 2.0, 3.0
TEST_SIZE = 10000
xs_add = np.random.standard_normal(TEST_SIZE)
ys_add = b0 * xs_add + b1 + np.random.standard_normal(TEST_SIZE)


In [2]:
# Timing sklearns ``LinearRegression``.
start = time.time()

xs = np.arange(0, 10, 1)
ys = b0*xs + b1
sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
#my_reg = RunningLinearRegression().fit(xs,ys)

# Push
for x,y in zip(xs_add, ys_add):
    xs = np.append(xs, x)
    ys = np.append(ys, y)
    sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    #my_reg.push(x,y)

# Pushpop
for x,y in zip(xs_add, ys_add):
    xs = np.append(xs[1:], x)
    ys = np.append(ys[1:], y)
    sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    #my_reg.pushpop(x,y)

# Pop
for _ in range(TEST_SIZE // 2):
    xs = xs[1:]
    ys = ys[1:]
    sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    #my_reg.pop()

end = time.time()
sk_time = end-start
print(sk_time)


5.633713722229004


In [3]:
# Timing ``RunningLinearRegression``.  Note that we mimic the above 
# code for the timing of ``LinearRegression`` even though many of the
# operations are unnecessary here.  This is so that we get an 
# honest timing comparison.  

start = time.time()

xs = np.arange(0, 10, 1)
ys = b0*xs + b1
#sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
my_reg = RunningLinearRegression().fit(xs,ys)

# Push
for x,y in zip(xs_add, ys_add):
    xs = np.append(xs, x)
    ys = np.append(ys, y)
    xs.reshape(-1,1)
    #sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    my_reg.push(x,y)

# Pushpop
for x,y in zip(xs_add, ys_add):
    xs = np.append(xs[1:], x)
    ys = np.append(ys[1:], y)
    xs.reshape(-1,1)
    #sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    my_reg.pushpop(x,y)

# Pop
for _ in range(TEST_SIZE // 2):
    xs = xs[1:]
    ys = ys[1:]
    xs.reshape(-1,1)
    #sk_reg = LinearRegression().fit(xs.reshape(-1,1),ys)
    my_reg.pop()

end = time.time()
my_time = end-start
print(my_time)


0.27932286262512207


In [4]:
sk_time/my_time


20.169182247677252

In [5]:
from running_simple_stats import RunningSimpleStats

xs = np.arange(0,100,1)
ys = np.arange(0,100,1)

TEST_SIZE = 10 ** 6
xs_new = np.random.standard_normal(TEST_SIZE)
ys_new = np.random.standard_normal(TEST_SIZE)


In [6]:
# Timing pushpop as a single operation.  
start = time.time()
rss = RunningSimpleStats(xs, ys)
for x,y in zip(xs_new, ys_new):
    rss.pushpop(x,y)
end = time.time()
pushpop_time = end-start
print(pushpop_time)


1.2645580768585205


In [7]:
# Timing pushing and popping separately.  
start = time.time()
rss = RunningSimpleStats(xs, ys)
for x,y in zip(xs_new, ys_new):
    rss.push(x,y)
    rss.pop()
end = time.time()
push_pop_time = end-start
print(push_pop_time)


2.629517078399658


In [8]:
push_pop_time / pushpop_time


2.0793960566303435