In [None]:
from quartic_sdk import APIClient
import pandas as pd
from sklearn.decomposition import PCA

In [None]:
# intialize a Quartic Platform API client
client = APIClient(host='https://qa.quartic.ai', 
                   username='Prathik', 
                   password='Prathik1')

# query asset
assets = client.assets()
asset = assets.get("name", "Asset Cont replay 01")

In [None]:
from datetime import datetime
from pytz import timezone

stop_time = int(datetime.now().timestamp() * 1000) # get current time in millisec unix timestamp
start_time = stop_time - 30 * 60 * 1000 # timestamp 30 mins ago

# fetch chuncks of DataFrames and then concatenate them into a piece
asset_data_itr = asset.data(start_time=start_time, stop_time=stop_time)
df = pd.DataFrame()
for next_df in asset_data_itr:
    df = pd.concat([df, next_df])
    
df

In [None]:
target_col = "39543"
feature_cols = ["39553", "39554", "39555", "39556", "39557", "39558"]
df_X = df[feature_cols]
df_y = df[[target_col]]
y = df_y.to_numpy()

In [None]:
class DataframeFunctionTransformer():
    def __init__(self):
        self.func = self.process_df

    def transform(self, input_df, **transform_params):
        return self.func(input_df)

    def fit(self, X, y=None, **fit_params):
        return self
    
    @staticmethod
    def max_value_of_last_hour(row):
        end_ts = row.name
        start_ts = end_ts - 60 * 60 * 1000  # last hour
        # Take the max values between row timestamp t and t - 1 hour
        row = df_X[(df_X.index >= start_ts) & (df_X.index <= end_ts)].max()
        row = row.rename(end_ts)
        return row

    @classmethod
    def process_df(cls, df):
        return df.apply(cls.max_value_of_last_hour, axis=1)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# define a pipeline which contains three steps
pipe = Pipeline([
    ('max_of_last_hour', DataframeFunctionTransformer()),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=2, random_state=123))
]).fit(df_X)

X_pca = pipe.transform(df_X)

In [None]:
# apply processed data to a linear reg model
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(X_pca, y)
preds = reg.predict(X_pca)
res = preds.flatten()

import matplotlib.pyplot as plt
plt.plot(res)

In [None]:
from quartic_sdk.model import BaseQuarticModel

class LinearRegressionModel(BaseQuarticModel):
    def __init__(self, name, description, log_level, model, pipe):
        self.model = model
        self.pipe = pipe
        super().__init__(name=name, description=description, log_level=log_level)
        
    def predict(self, input_df: pd.DataFrame):
        self.log.info("Predicting...")
        X_pca = self.pipe.transform(input_df)
        y_preds = self.model.predict(X_pca)
        return pd.Series(y_preds.flatten())

lr_pca_model = LinearRegressionModel(name="lr_by_pipe",
                                     description="model trained with pipeline",
                                     log_level="INFO",
                                     model=reg,
                                     pipe=pipe)

lr_pca_model.save(client=client, 
                  output_tag_name='lr_by_pipe', 
                  feature_tags=feature_cols, 
                  target_tag=target_col, 
                  test_df=df_X)