In [1]:
import numpy as np
import pandas as pd
import wavy

# Start with any time series dataframe
df = pd.DataFrame({'price': np.random.randn(1000)}, index=range(1000))

# Create panels. Each panel is composed of a list of frames.
x, y = wavy.create_panels(df, lookback=10, horizon=1)

# x and y are contain the past and corresponding future data.
# lookback and horizon are the number of timesteps.
print("Lookback:", len(x[0]), "Horizon:", len(y[0]))

# Plot the target.
y.plot()

  from .autonotebook import tqdm as notebook_tqdm


Lookback: 10 Horizon: 1


In [2]:
x = x.rename_columns({"price": "past"})

In [3]:
x[0]

Unnamed: 0,past
0,0.255255
1,-0.71082
2,0.958621
3,1.412795
4,0.721565
5,-2.013416
6,-0.311711
7,-0.091251
8,-0.08026
9,-1.354015


In [None]:
x.pct_changew()

In [None]:
y = y > 0

In [None]:
y.plot()

In [None]:
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics  import f1_score,accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
# model = wavy.ShallowModel(x, y, model=RandomForestRegressor, metrics=[mean_absolute_error], strategy="mean")
# model = wavy.ShallowModel(x, y, model=LinearRegression, metrics=[mean_absolute_error])
model = wavy.ShallowModel(x, y, model=LogisticRegression, metrics=[f1_score, accuracy_score])

In [None]:
model.fit()

In [None]:
a = model.predict()
a.plot()

In [None]:
b = model.predict_proba()
b.plot()

In [None]:
import numpy as np
from sklearn import metrics

# y = model.y_test.squeeze().array
y = [int(i) for i in model.y_test.squeeze()]
# pred = model.model.predict(model.x_test).squeeze()
pred = [int(i) for i in model.model.predict(model.x_test).squeeze()]

fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=1)
metrics.auc(fpr, tpr)

In [None]:
optimal_idx = np.argmax(tpr - fpr)

In [None]:
thresholds[optimal_idx]

In [None]:
y = [int(i) for i in model.y_test.squeeze()]

In [None]:
pred = [int(i) for i in model.model.predict(model.x_test).squeeze()]

In [None]:
# True positive
tp = [i for i, j in zip(y, pred) if i == 1 and j == 1]
# False positive
fp = [i for i, j in zip(y, pred) if i == 0 and j == 1]
# False negative
fn = [i for i, j in zip(y, pred) if i == 1 and j == 0]
# True negative
tn = [i for i, j in zip(y, pred) if i == 0 and j == 0]

print(f"tf: {len(tp)}, fp: {len(fp)}, fn: {len(fn)}, tn: {len(tn)}")

recall = len(tp) / (len(tp) + len(fn))
print(f"recall: {recall}")

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=1)

In [None]:
thresholds

In [None]:
x.sample(100, how='random').plot()

In [None]:
print(f"train: {len(x.train)}")
print(f"val: {len(x.val)}")
print(f"test: {len(x.test)}")

In [None]:
x.test.plot()

In [None]:
import plotly.express as px

px.colors.qualitative.Plotly

In [None]:
x.plot(split_sets=True)

In [None]:
x.train.plot()

In [None]:
# Convert to numpy arrays. Panels contain a train-val-test split by default.
x_train, y_train = x.train.values, y.train.values
x_test, y_test = x.test.values, y.test.values
print(x_train.shape, y_train.shape)

# Or just instantiate a model.
# model = wavy.LinearRegression(x, y)
# model.score()

In [None]:
model = wavy.LinearRegression(x, y)

In [None]:
model.x.plot()

In [None]:
a = model.x.train

In [None]:
a.plot()

In [None]:
model.score()

In [None]:
a = model.predict()

In [None]:
a.plot()

In [None]:
residuals = model.residuals()

In [None]:
residuals

In [None]:
residuals[0]

In [None]:
residuals.plot()

In [None]:
a = model.predict()

In [None]:
model.y.plot()

In [None]:
residuals[0].values