In [5]:
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod
import matplotlib.pyplot as plt
plt.style.use('dark_background')

In [6]:
def linear(n=1000,a=0,b=0.1,start_date='2000-01-01'):
    x=np.random.normal(0,0.01,n)
    y=a+b*x+np.random.normal(0,0.01,n)
    dates=pd.date_range(start_date,periods=n,freq='D')
    data=pd.DataFrame(np.hstack((y[:,None],x[:,None])),columns=['y1','x1'],index=dates)
    return data
df1 = linear(n=1000,a=0,b=0.1,start_date='2000-01-01')
df2 = linear(n=1000,a=0,b=0.1,start_date='2000-01-01')

print(df1)
print(df2)

                  y1        x1
2000-01-01 -0.002909 -0.000349
2000-01-02 -0.014925  0.003193
2000-01-03 -0.001564  0.003032
2000-01-04 -0.001951 -0.014094
2000-01-05  0.009174 -0.005012
...              ...       ...
2002-09-22 -0.008811  0.001785
2002-09-23  0.022967  0.008881
2002-09-24 -0.006981  0.000270
2002-09-25  0.010244  0.013733
2002-09-26 -0.015992 -0.001439

[1000 rows x 2 columns]
                  y1        x1
2000-01-01  0.003965 -0.010379
2000-01-02 -0.006044  0.015378
2000-01-03 -0.007262  0.014449
2000-01-04  0.003556 -0.001425
2000-01-05 -0.005290 -0.002867
...              ...       ...
2002-09-22 -0.014068 -0.017610
2002-09-23 -0.008269 -0.002185
2002-09-24  0.005935  0.008227
2002-09-25  0.013204  0.013438
2002-09-26 -0.011300 -0.012476

[1000 rows x 2 columns]


In [7]:
# abstract classes

class PredictiveDistribution:
    def __init__(self, mean, cov):
        self.mean = mean
        self.cov = cov
    
    def get_weight(self):
        pass
    

class Weight:
    def __init__(self, w:np.ndarray):
        self.w = w

class Model(ABC):
    
    @abstractmethod
    def estimate(self,y: np.ndarray, **kwargs):
        """Subclasses must implement this method"""
        pass

    @abstractmethod
    def get_weight(self, **kwargs) -> Weight:
        """Subclasses must implement this method"""
        pass

# Portfolio Model class template
class PortfolioModel(ABC):

    @abstractmethod
    def view(self):
        pass

    @abstractmethod
    def estimate(self, **kwargs):
        """Subclasses must implement this method"""
        pass

# Data Transform template
class Transform(ABC):
    
    @abstractmethod
    def view(self):
        pass

    @abstractmethod
    def estimate(self, **kwargs):
        """Subclasses must implement this method"""
        pass

    @abstractmethod
    def transform(self, **kwargs):
        """Subclasses must implement this method"""
        pass
    
    @abstractmethod
    def inverse_transform(self, **kwargs):
        """Subclasses must implement this method"""
        pass



In [None]:
# for a single dataset..

dataset = Dataset(df)



model_pipe = ModelPipe(model, transforms)
model_pipe.estimate(dataset)

model_pipe.evaluate(dataset_test)
# best to store it all on model_pipe!
# model_pipe acts on data...
# more similar to first version!



In [None]:

# how should a workflow operate?
# -----
# what does a model pipeline has?
# - can act on different datasets
# - if not specified, just applied the same model to all datasets, otherwise needs to be compatible 
# with the data
# - if specified, join all data to train it as a "master" model
# - data can suffer transformations 
# - has a portfolio model that specified how the models should be joined!
# this portfolio model can take into account as well strategy performance statistics
# to make the decision on how to allocate
# ALSO
# must specify how does the models are trained and evaluated!

# what do we have?
# - dataset
# - portfolio model
# - transforms
# - model

    

    
    
    
    
    
    
    
class ModelPipe:
    def __init__(self):
        pass
    
    def estimate(self, dataset:Dataset):
        '''
        After estimate the model pipe is configure
        to work on data that has the same format        
        '''        
        pass
    
    def evaluate(self, dataset:Dataset):
        '''
        Need to check if the input dataset 
        makes sense to the one it was trained on
        '''
        assert self.estimate_dataset.is_compatible(dataset), "can only evaluate in compatible datasets"
        
        return res
    
    def get_weight(self):
        pass
    


# create model pipe

# when we run estimate we can do
model.estimate(dataset)
# and the model get trained

# when we run evaluate we can do
out = model.evaluate(dataset)
# and we get the output of an estimation

# so, when we do cvbt, the model can make many call to estimate
# but internally it builds the splits and the calls to estimate
# and evaluate necessary
out = model.cvbt(dataset)

# WHAT WE NEED?
# dataset must make sense for the model that was defined...
# add checks for this?





# for live we have a pd.DataFrame and a trained model and we
# need to call something like
model = load_model('filemodel.pkl')
model.get_weight(dataset) 
# or should it be 
model.live(dataset)
# ?






# create dataset from input dataframes
dataset = Dataset({'dataset1':df1, 'dataset2':df2})



