In [0]:
!pip3 install -U pip
!pip3 install -U setuptools wheel

# CPU version of pytorch has smaller footprint - see installation instructions in
# pytorch documentation - https://pytorch.org/get-started/locally/
!pip3 install torch==1.12+cpu torchvision==0.13.0+cpu torchtext==0.13.0 -f https://download.pytorch.org/whl/cpu/torch_stable.html

!pip3 install autogluon
!pip install yfinance sklearn
!pip install pandas_datareader

Collecting pip
  Downloading pip-22.3.1-py3-none-any.whl (2.1 MB)
[?25l[K     |▏                               | 10 kB 21.0 MB/s eta 0:00:01[K     |▎                               | 20 kB 6.4 MB/s eta 0:00:01[K     |▌                               | 30 kB 9.1 MB/s eta 0:00:01[K     |▋                               | 40 kB 4.5 MB/s eta 0:00:01[K     |▉                               | 51 kB 4.9 MB/s eta 0:00:01[K     |█                               | 61 kB 5.9 MB/s eta 0:00:01[K     |█▏                              | 71 kB 6.1 MB/s eta 0:00:01[K     |█▎                              | 81 kB 6.8 MB/s eta 0:00:01[K     |█▍                              | 92 kB 5.3 MB/s eta 0:00:01[K     |█▋                              | 102 kB 5.2 MB/s eta 0:00:01[K     |█▊                              | 112 kB 5.2 MB/s eta 0:00:01[K     |██                              | 122 kB 5.2 MB/s eta 0:00:01[K     |██                              | 133 kB 5.2 MB/s eta 0:00:01[K     |█

In [0]:
import warnings
from autogluon.tabular import TabularDataset, TabularPredictor 
import numpy as np
import datetime
from pandas_datareader import data as pdr
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import pandas as pd
from platform import python_version
import mlflow.sklearn


def acquire_training_data():
    yf.pdr_override()
    y_symbols = ["BTC-USD"]

    startdate = datetime.datetime(2022, 1, 1)
    enddate = datetime.datetime(2022, 12, 31)
    df = pdr.get_data_yahoo(y_symbols, start=startdate, end=enddate)
    return df


def digitize(n):
    if n > 0:
        return 1
    return 0


def rolling_window(a, window):
    """
    Takes np.array 'a' and size 'window' as parameters
    Outputs an np.array with all the ordered sequences of values of 'a' of size 'window'
    e.g. Input: ( np.array([1, 2, 3, 4, 5, 6]), 4 )
         Output:
                 array([[1, 2, 3, 4],
                       [2, 3, 4, 5],
                       [3, 4, 5, 6]])
    """
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

class AutogluonModel(mlflow.pyfunc.PythonModel):

    def load_context(self, context):
        self.predictor = TabularPredictor.load(context.artifacts.get("predictor_path"))

    def predict(self, context, model_input):
        return self.predictor.predict(model_input)
def prepare_training_data(data):

    """
    Return a prepared numpy dataframe
    input : Dataframe with expected schema

    """
    data["Delta"] = data["Close"] - data["Open"]
    data["to_predict"] = data["Delta"].apply(lambda d: digitize(d))
    return data

def prepare_data(X,Y):
    X=pd.DataFrame(X)
    X.columns=['day_'+str(i) for i in range(14)]
    Y=pd.DataFrame(Y)
    Y.columns=['to_predict']
    df=pd.concat([X,Y],axis=1)
    
    train_data,test_data=train_test_split(
            df,  test_size=0.25, random_state=4284
        )
    return train_data,test_data

def log_model():
    model = AutogluonModel()
    predictor_path = predictor.path+'models/'+predictor.get_model_best()
    artifacts = {"predictor_path": predictor_path}
    conda_env = {
        'channels': ['conda-forge'],
        'dependencies': [
            f'python={python_version()}',
            'pip'],
        'pip': [
            f'mlflow=={mlflow.__version__}',
           
            f'cloudpickle=="2.2.0"'
        ],
        'name': 'mlflow-env'
    }
    mlflow.pyfunc.log_model(artifact_path="model", python_model=model,
                                artifacts=artifacts,
                                conda_env=conda_env)

           
def create_autogluon_experiment(train_df):  
    predictor= TabularPredictor(label ='to_predict',eval_metric='accuracy').fit(train_data = train_df, verbosity = 2,presets='medium_quality')
    return predictor

def log_experiments(predictor):
    for i,model_name in enumerate(list(predictor.leaderboard(silent=True)['model'])):
        with mlflow.start_run(run_name=model_name):
            if i==0:
                log_model()
            info=predictor.info()['model_info'][model_name]
            score=info['val_score']
            model_type=info['model_type']
            hyper_params=info['hyperparameters']
            hyper_params['model_type']=model_type
            mlflow.log_params(hyper_params)
            mlflow.log_metric('acc',score)





In [0]:
#data preparation
training_data = acquire_training_data()
prepared_training_data_df = prepare_training_data(training_data)
btc_mat = prepared_training_data_df.to_numpy()
WINDOW_SIZE = 14
X = rolling_window(btc_mat[:, 7], WINDOW_SIZE)[:-1, :]
Y = prepared_training_data_df["to_predict"].to_numpy()[WINDOW_SIZE:]
train_data,test_data=prepare_data(X,Y)

#AutoML model selection
predictor=create_autogluon_experiment(train_data)

#Logging of AutoML experiments
log_experiments(predictor)

[*********************100%***********************]  1 of 1 completedNo path specified. Models will be saved in: "AutogluonModels/ag-20230106_161632/"
Presets specified: ['medium_quality']
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20230106_161632/"
AutoGluon Version:  0.6.1
Python Version:     3.9.5
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Sep 16 14:52:22 UTC 2022
Train Data Rows:    262
Train Data Columns: 14
Label Column: to_predict
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [1, 0]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fittin

In [0]:
predictor.leaderboard()

                  model  score_val  pred_time_val  fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0        NeuralNetTorch   0.566038       0.003399  0.579196                0.003399           0.579196            1       True         10
1   WeightedEnsemble_L2   0.566038       0.004007  0.937660                0.000608           0.358464            2       True         12
2              CatBoost   0.547170       0.001951  0.527738                0.001951           0.527738            1       True          5
3       NeuralNetFastAI   0.547170       0.015572  2.043978                0.015572           2.043978            1       True          8
4              LightGBM   0.528302       0.002355  0.355362                0.002355           0.355362            1       True          2
5         LightGBMLarge   0.528302       0.002640  0.457797                0.002640           0.457797            1       True         11
6            LightGBMXT   0.528302

Unnamed: 0,model,score_val,pred_time_val,fit_time,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,NeuralNetTorch,0.566038,0.003399,0.579196,0.003399,0.579196,1,True,10
1,WeightedEnsemble_L2,0.566038,0.004007,0.93766,0.000608,0.358464,2,True,12
2,CatBoost,0.54717,0.001951,0.527738,0.001951,0.527738,1,True,5
3,NeuralNetFastAI,0.54717,0.015572,2.043978,0.015572,2.043978,1,True,8
4,LightGBM,0.528302,0.002355,0.355362,0.002355,0.355362,1,True,2
5,LightGBMLarge,0.528302,0.00264,0.457797,0.00264,0.457797,1,True,11
6,LightGBMXT,0.528302,0.003163,1.88842,0.003163,1.88842,1,True,1
7,ExtraTreesEntr,0.471698,0.078136,0.676687,0.078136,0.676687,1,True,7
8,RandomForestGini,0.45283,0.07214,0.673203,0.07214,0.673203,1,True,3
9,RandomForestEntr,0.415094,0.070553,0.673339,0.070553,0.673339,1,True,4
