In [None]:
from quartic_sdk import APIClient
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

In [None]:
# intialize a Quartic Platform API client  
client = APIClient("https://stag.quartic.ai/",
                   username="doe@quartic.ai",
                   password="doe@quartic.ai")

In [None]:
assets = client.assets()

In [None]:
asset = assets.get("name", "Spectral Asset Canada Team")
asset_tags = client.tags(asset_id=asset.id)

In [None]:
from datetime import datetime
from pytz import timezone

stop_time = int(datetime(2021, 3, 1, 0, 0, 0, 0, timezone('UTC')).timestamp() * 1000)
start_time = stop_time - 3 * 60 * 60 * 1000
asset_data_itr = asset.data(start_time=start_time, stop_time=stop_time)

df = pd.DataFrame()
for next_df in asset_data_itr:
    df = pd.concat([df, next_df])


In [None]:
df = df.dropna(axis=1)

In [None]:
df

In [None]:
target_col = 6825
feature_cols = [6819, 6820, 6822, 6823, 6824]

In [None]:
def max_value_of_last_hour(row):
    end_ts = row.name
    start_ts = end_ts - pd.Timedelta(hours=1)
    # take the max values between row timestamp t and t - 1 hour
    row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
    row = row.rename(end_ts)
    return row

In [None]:
# Preparing training data, and that includes a simple feature engineering step
df_X = df[feature_cols]
df_X = df_X.apply(max_value_of_last_hour, axis=1)

# target (single column dataframe)
df_y = df[[target_col]]

In [None]:
import xgboost as xgb

# convert training data to format that Xgboost can consume
dtrain = xgb.DMatrix(df_X, label=df_y)

In [None]:
# parameters for model training
num_round = 100
param = {'max_depth': 100, 'eta': 0.2, 'objective': 'reg:squarederror'}
param['nthread'] = 4
param['eval_metric'] = 'mae'

bst = xgb.train(param, dtrain, num_round) # a dummy model
bst.predict(xgb.DMatrix(df_X)) # just to see if we can get any predictions

In [None]:
from quartic_sdk.model import BaseQuarticModel

class XGBRegressor(BaseQuarticModel):
    def __init__(self, name, description, log_level, model):
        self.model = model
        super().__init__(name=name, description=description, log_level=log_level)
        
    def predict(self, input_df: pd.DataFrame):
        def max_value_of_last_hour(row):
            end_ts = row.name
            start_ts = end_ts - pd.Timedelta(hours=1)
            # take the max values between row timestamp t and t - 1 hour
            row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
            row = row.rename(end_ts)
            return row

        self.log.info("Predicting...")
        input_df = input_df.apply(max_value_of_last_hour, axis=1)
        y_preds = self.model.predict(xgb.DMatrix(input_df))
        return pd.Series(y_preds)

In [None]:
xgb_reg_model = XGBRegressor(name="xgb_reg",
                             description="",
                             log_level="INFO",
                             model=bst)

xgb_reg_model.save(client=client, output_tag_name='xgb_reg', feature_tags=feature_cols, target_tag=target_col, test_df=df_X)