In [None]:
from quartic_sdk import APIClient
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

In [None]:
# intialize a Quartic Platform API client  
client = APIClient("https://stag.quartic.ai/",
                   username="doe@quartic.ai",
                   password="doe@quartic.ai")

In [None]:
assets = client.assets()

In [None]:
asset = assets.get("name", "Spectral Asset Canada Team")
asset_tags = client.tags(asset_id=asset.id)

# Train and deploy a XGBoost regressor

In [None]:
from datetime import datetime
from pytz import timezone

stop_time = int(datetime(2021, 3, 1, 0, 0, 0, 0, timezone('UTC')).timestamp() * 1000)
start_time = stop_time - 3 * 60 * 60 * 1000 # load 3 hours of data

# fetch chuncks of DataFrames and then concatenate them into a piece
asset_data_itr = asset.data(start_time=start_time, stop_time=stop_time)
df = pd.DataFrame()
for next_df in asset_data_itr:
    df = pd.concat([df, next_df])


In [None]:
target_col = 6825
feature_cols = [6819, 6820, 6822, 6823, 6824]

print(df)

In [None]:
def max_value_of_last_hour(row):
    end_ts = row.name
    start_ts = end_ts - pd.Timedelta(hours=1)
    # Take the max values between row timestamp t and t - 1 hour
    row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
    row = row.rename(end_ts)
    return row

In [None]:
# Preparing features for training regressor
df_X = df[feature_cols]
df_X = df_X.apply(max_value_of_last_hour, axis=1) # simple feature engineering step

# target for training (single column dataframe)
df_y = df[[target_col]]

In [None]:
import xgboost as xgb

# Convert training data to the format that Xgboost can ingest
dtrain = xgb.DMatrix(df_X, label=df_y)

In [None]:
# Parameters for regression model training
num_round = 100
param = {'max_depth': 100, 'eta': 0.2, 'objective': 'reg:squarederror'}
param['nthread'] = 4
param['eval_metric'] = 'mae'

regressor = xgb.train(param, dtrain, num_round) # a dummy model
print(regressor.predict(xgb.DMatrix(df_X))) # just to see if we can get any predictions

In [None]:
from quartic_sdk.model import BaseQuarticModel

class XGBRegressor(BaseQuarticModel):
    def __init__(self, name, description, log_level, model):
        self.model = model
        super().__init__(name=name, description=description, log_level=log_level)
        
    def predict(self, input_df: pd.DataFrame):
        def max_value_of_last_hour(row):
            end_ts = row.name
            start_ts = end_ts - pd.Timedelta(hours=1)
            # take the max values between row timestamp t and t - 1 hour
            row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
            row = row.rename(end_ts)
            return row

        self.log.info("Predicting...")
        input_df = input_df.apply(max_value_of_last_hour, axis=1)
        y_preds = self.model.predict(xgb.DMatrix(input_df))
        return pd.Series(y_preds)

In [None]:
xgb_reg_model = XGBRegressor(name="xgb_reg",
                             description="",
                             log_level="INFO",
                             model=regressor)

xgb_reg_model.save(client=client, output_tag_name='xgb_reg', feature_tags=feature_cols, target_tag=target_col, test_df=df_X)

## Train and deploy a XGBoost classifier

In [None]:
# Reuse the training set we prepared for the regressor
# Modify the target for classifier trainining. 
df_y_binary = df_y.copy()
# Dealing with binary class labels, one is -2 and the other is 2.
# Convert two class labels to 0 and 1 respectively
df_y_binary.loc[df_y[target_col] <= 0, target_col] = 0
df_y_binary.loc[df_y[target_col] > 0, target_col] = 1

# Convert training data to a format that Xgboost can ingest
dtrain = xgb.DMatrix(df_X, label=df_y_binary)

# Parameters for classification model training
num_round = 100
param = {'max_depth': 100, 'eta': 0.2, 'objective': 'binary:logistic'}
param['nthread'] = 4
param['eval_metric'] = 'mae'

classifier = xgb.train(param, dtrain, num_round) # a dummy model
preds = classifier.predict(xgb.DMatrix(df_X)) # Just to see if we can get any predictions

# Convert probabilties back to the original class labels
preds[preds >= 0.5] = 2
preds[preds < 0.5] = -2
print(preds)

In [None]:
from quartic_sdk.model import BaseQuarticModel

class XGBClassifer(BaseQuarticModel):
    def __init__(self, name, description, log_level, model):
        self.model = model
        super().__init__(name=name, description=description, log_level=log_level)
        
    def predict(self, input_df: pd.DataFrame):
        def max_value_of_last_hour(row):
            end_ts = row.name
            start_ts = end_ts - pd.Timedelta(hours=1)
            # take the max values between row timestamp t and t - 1 hour
            row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
            row = row.rename(end_ts)
            return row

        self.log.info("Predicting...")
        input_df = input_df.apply(max_value_of_last_hour, axis=1)
        y_preds = self.model.predict(xgb.DMatrix(input_df))
        
        # convert probabilties back to the original class labels
        preds[y_preds >= 0.5] = 2
        preds[y_preds < 0.5] = -2
        return pd.Series(y_preds)

xgb_reg_model = XGBRegressor(name="xgb_classifier",
                             description="",
                             log_level="INFO",
                             model=regressor)

xgb_reg_model.save(client=client, output_tag_name='xgb_classifier', feature_tags=feature_cols, target_tag=target_col, test_df=df_X)