# 1. Get the dataset

In [None]:
import pandas as pd

df = pd.read_csv("../examples_2/BTC-Hourly.csv")
df = df.rename(columns={"close": "target"})
df = df.drop(columns=["unix", "symbol"])

df["date"] = pd.to_datetime(df["date"])
df = df[:1000]

x, y = df.drop(columns="target"), df["target"]

import numpy as np

y = np.where(y > np.roll(y, 1), 1, 0)

# 2. Define transformations

In [None]:
from beaverfe import BeaverPipeline
from beaverfe.transformations import DateTimeTransformer, CyclicalFeaturesTransformer

transformer = BeaverPipeline(
    [
        DateTimeTransformer(datetime_columns=["date"]),
        CyclicalFeaturesTransformer(
            columns_periods={
                "date_minute": 60,
                "date_hour": 24,
            }
        ),
    ]
)

# 3. Define the pipeline

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

model = RandomForestClassifier()
pipe = Pipeline(steps=[("t", transformer), ("m", model)])

# 4. Evalute

In [None]:
from numpy import mean, std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(pipe, x, y, scoring="accuracy", cv=cv, n_jobs=-1)

print(f"{mean(scores):.3f} ({std(scores):.3f})")

# TEST transform data

In [None]:
point = int(x.shape[0] * 0.66)

x_train, y_train = x[:point], y[:point]
x_test, y_test = x[point:], y[point:]

print(x_test.head())
print("\n", "-" * 50, "\n")

transformer.fit(x_train, y_train)
x_test = transformer.transform(x_test, y_test)
print(x_test.head())