In [1]:
from timeit import default_timer as timer

import joblib

import matplotlib.pyplot as plt

import numpy as np

import feets
from feets.extractors import DATAS

import pandas as pd

In [2]:
def periodic_light_curve(*, length=100, random=None, period=10):
    random = np.random.default_rng(random)
    cov = np.exp(
        -np.sin(
            (np.pi / period)
            * np.subtract.outer(np.arange(length), np.arange(length))
        )
        ** 2
    )

    time = np.arange(length)
    error = random.uniform(0, 0.08, length)
    magnitude = random.multivariate_normal(mean=np.zeros(length), cov=cov)

    return {
        "time": time,
        "magnitude": magnitude,
        "error": error,
        "magnitude2": magnitude,
        "error2": error,
        "aligned_time": time,
        "aligned_magnitude": magnitude,
        "aligned_error": error,
        "aligned_magnitude2": magnitude,
        "aligned_error2": error,
    }


def periodic_light_curve_group(*, group_size, lc_length):
    return [periodic_light_curve(length=lc_length) for _ in range(group_size)]

In [None]:
periodic_lc = periodic_light_curve(length=100, period=20, random=42)

fig, ax = plt.subplots()
ax.errorbar(
    periodic_lc["time"],
    periodic_lc["magnitude"],
    periodic_lc["error"],
    fmt=".-",
    ecolor="red",
)

ax.set_title("Periodic light curve of size 100")
ax.set_ylabel('Magnitude')
ax.set_xlabel('Time')

fig.tight_layout()

In [4]:
def run(*, lc_group, scheduler):
    fs = feets.FeatureSpace(dask_options={"scheduler": scheduler})

    start = timer()
    features = fs.extract(*lc_group)
    end = timer()

    return end - start, features


lc_lengths = [30, 100, 300, 1000]
group_sizes = [1, 50, 100, 500, 1000]
schedulers = ["synchronous", "threads", "processes"]
iters = 1000

In [None]:
results = []
for group_size in group_sizes:
    for lc_length in lc_lengths:
        for scheduler in schedulers:
            for it in range(iters):
                lc_group = periodic_light_curve_group(
                    group_size=group_size, lc_length=lc_length
                )
                time, _ = run(lc_group=lc_group, scheduler=scheduler)
                results.append(
                    {
                        "group_size": group_size,
                        "lc_length": lc_length,
                        "scheduler": scheduler,
                        "it": it,
                        "time": time,
                    }
                )

df = pd.DataFrame(results)
df.to_pickle("00_times.pkl")