In [12]:
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod
import matplotlib.pyplot as plt
plt.style.use('dark_background')

In [13]:
def linear(n=1000,a=0,b=0.1,start_date='2000-01-01'):
    x=np.random.normal(0,0.01,n)
    y=a+b*x+np.random.normal(0,0.01,n)
    dates=pd.date_range(start_date,periods=n,freq='D')
    data=pd.DataFrame(np.hstack((y[:,None],x[:,None])),columns=['y1','x1'],index=dates)
    return data
df1 = linear(n=1000,a=0,b=0.1,start_date='2000-01-01')
df2 = linear(n=1000,a=0,b=0.1,start_date='2000-01-01')

print(df1)
print(df2)

                  y1        x1
2000-01-01  0.001233  0.002310
2000-01-02  0.010105 -0.002970
2000-01-03  0.003227  0.021693
2000-01-04 -0.000184 -0.009007
2000-01-05 -0.006589 -0.008609
...              ...       ...
2002-09-22 -0.011448  0.000793
2002-09-23  0.014882  0.008922
2002-09-24  0.002278  0.012777
2002-09-25  0.004196  0.012272
2002-09-26  0.001667  0.016200

[1000 rows x 2 columns]
                  y1        x1
2000-01-01 -0.003928  0.011351
2000-01-02  0.006271 -0.004165
2000-01-03 -0.016271  0.002832
2000-01-04  0.007524  0.011465
2000-01-05 -0.004329  0.010114
...              ...       ...
2002-09-22  0.001471  0.002168
2002-09-23  0.000751  0.014955
2002-09-24  0.016390 -0.006712
2002-09-25  0.000866  0.017879
2002-09-26  0.006694  0.005148

[1000 rows x 2 columns]


In [19]:
class ModelPipe:
    def __init__(self, key:str = 'Master', model=None, transforms={}):
        self.key = key or 'Master'        
        self.model = copy.deepcopy(model)
        self.transforms = copy.deepcopy(transforms) if transforms else {}
        self.model_pipes = {}
    
    # add pipes
    def add(self, key, model, transforms):
        # check types
        self.model_pipes[key] = ModelPipe(key, model, transforms)
        return self
    
    # estimate model
    def estimate(self, data):
        '''
        Estimate model pipe on data
        '''
        # estimate transforms
        self.estimate_transforms(data)
        # apply transforms
        self.apply_transforms(data)
        # estimate model
        self.estimate_model(data)
        return self

    def estimate_transforms(self, data):
        for variable, transform in self.transforms.items():            
            transform.estimate(getattr(data, variable))

    def apply_transforms(self, data):
        pass
        #for variable, transform in self.transforms.items():            
        #    transform.estimate(getattr(data, variable))

    def estimate_model(self, data):
        # store estimate data - to be used later to make sure
        # that the evaluation data matches what is expected....
        # maybe not necessary to store all fields, perhaps we can
        # just store some metainfo like cols
        self._estimate_data = data.copy()       
        # just put here all dicts - easier to read
        self.model.estimate(**data.as_dict())
        return self

    # get weight
    def get_weight(self, xq, x, y, z, t, apply_transform_x = True, apply_transform_t = True, apply_transform_y = True):
        # process inputs
        if apply_transform_y: y = self.transform_y(y, True)
        if x is not None:
            if apply_transform_x: x = self.transform_x(x, True)
        if t is not None:
            if apply_transform_t: t = self.transform_t(t, True)         
        if xq is not None:
            if apply_transform_x: xq = self.transform_x(xq, True)
        return self.model.get_weight(**{'y': y, 'x': x, 'xq': xq, 'z':z, 't':t})

    def live(self, data, idx = None):
        # just to make clear that this has exactly the same functional form as what is
        # done in evaluate

        # get data at idx (including multisequence filter)
        
        # apply transforms

        # get weight from model
        return self.get_weight(data.model_input(idx))

    def evaluate(self, data):
        """Evaluate the model using the test data and return performance metrics."""
        # this will change fields s, weight_* in data object inplace        
        # iterate on data and run live        
        for i in range(data.n):            
            data.w[i] = self.live(data, i)
        # compute performance
        data.s = np.einsum('ij,ij->i', data.w, data.y)
        return data
    
    def do_something(self, factor=1):
        # Do something with self.
        result = f"Value: {self.value if self.value is not None else 'None'} (x{factor})"
        # Recursively call do_something on all children and collect their results.
        children_results = {key: child.do_something(factor) for key, child in self.children.items()}
        return {"self": result, "children": children_results}

# Usage:
root = A("root")
root.add_child("child1", A(10))
root.add_child("child2", A(20))
# Further nesting:
root.children["child1"].add_child("grandchild", A(5))

# Calling do_something on the root propagates through the tree.
import pprint
pprint.pprint(root.do_something(factor=2))


{'children': {'child1': {'children': {'grandchild': {'children': {},
                                                     'self': 'Value: 5 (x2)'}},
                         'self': 'Value: 10 (x2)'},
              'child2': {'children': {}, 'self': 'Value: 20 (x2)'}},
 'self': 'Value: root (x2)'}


In [20]:
slice(0,5)

slice(0, 5, None)

In [7]:
# abstract classes

class PredictiveDistribution:
    def __init__(self, mean, cov):
        self.mean = mean
        self.cov = cov
    
    def get_weight(self):
        pass
    

class Weight:
    def __init__(self, w:np.ndarray):
        self.w = w

class Model(ABC):
    
    @abstractmethod
    def estimate(self,y: np.ndarray, **kwargs):
        """Subclasses must implement this method"""
        pass

    @abstractmethod
    def get_weight(self, **kwargs) -> Weight:
        """Subclasses must implement this method"""
        pass

# Portfolio Model class template
class PortfolioModel(ABC):

    @abstractmethod
    def view(self):
        pass

    @abstractmethod
    def estimate(self, **kwargs):
        """Subclasses must implement this method"""
        pass

# Data Transform template
class Transform(ABC):
    
    @abstractmethod
    def view(self):
        pass

    @abstractmethod
    def estimate(self, **kwargs):
        """Subclasses must implement this method"""
        pass

    @abstractmethod
    def transform(self, **kwargs):
        """Subclasses must implement this method"""
        pass
    
    @abstractmethod
    def inverse_transform(self, **kwargs):
        """Subclasses must implement this method"""
        pass



In [18]:
class MyClass:
    def __init__(self):
        self._data = {}

    def __getitem__(self, key):
        # Auto‑create a sub‑instance if key is missing.
        if key not in self._data:
            self._data[key] = MyClass()
        return self._data[key]

    def __setitem__(self, key, value):
        self._data[key] = value

    def __delitem__(self, key):
        del self._data[key]

    def __iter__(self):
        return iter(self._data)

    def __len__(self):
        return len(self._data)

    def keys(self):
        return self._data.keys()

    def values(self):
        return self._data.values()

    def items(self):
        return self._data.items()

    def do_something(self, *args, **kwargs):
        # Do something for this instance
        print("Doing something on", self)
        # Recursively call do_something on each sub‑instance that is a MyClass instance
        for value in self._data.values():
            if isinstance(value, MyClass):
                value.do_something(*args, **kwargs)

    def __repr__(self):
        return repr(self._data)

# Usage example:
root = MyClass()
root['a']['b'] = 42   # Accessing 'a' automatically creates a new MyClass instance.
print("Tree structure:", root)
root.do_something()

Tree structure: {'a': {'b': 42}}
Doing something on {'a': {'b': 42}}
Doing something on {'b': 42}


In [None]:
# for a single dataset..

dataset = Dataset(df)

model_pipe = ModelPipe(model, transforms)
model_pipe.estimate(dataset)

model_pipe.evaluate(dataset_test)
# best to store it all on model_pipe!
# model_pipe acts on data...
# more similar to first version!





In [None]:

# how should a workflow operate?
# -----
# what does a model pipeline has?
# - can act on different datasets
# - if not specified, just applied the same model to all datasets, otherwise needs to be compatible 
# with the data
# - if specified, join all data to train it as a "master" model
# - data can suffer transformations 
# - has a portfolio model that specified how the models should be joined!
# this portfolio model can take into account as well strategy performance statistics
# to make the decision on how to allocate
# ALSO
# must specify how does the models are trained and evaluated!

# what do we have?
# - dataset
# - portfolio model
# - transforms
# - model

    

    
    
    
    
    
    
    
class ModelPipe:
    def __init__(self):
        pass
    
    def estimate(self, dataset:Dataset):
        '''
        After estimate the model pipe is configure
        to work on data that has the same format        
        '''        
        pass
    
    def evaluate(self, dataset:Dataset):
        '''
        Need to check if the input dataset 
        makes sense to the one it was trained on
        '''
        assert self.estimate_dataset.is_compatible(dataset), "can only evaluate in compatible datasets"
        
        return res
    
    def get_weight(self):
        pass
    


# create model pipe

# when we run estimate we can do
model.estimate(dataset)
# and the model get trained

# when we run evaluate we can do
out = model.evaluate(dataset)
# and we get the output of an estimation

# so, when we do cvbt, the model can make many call to estimate
# but internally it builds the splits and the calls to estimate
# and evaluate necessary
out = model.cvbt(dataset)

# WHAT WE NEED?
# dataset must make sense for the model that was defined...
# add checks for this?





# for live we have a pd.DataFrame and a trained model and we
# need to call something like
model = load_model('filemodel.pkl')
model.get_weight(dataset) 
# or should it be 
model.live(dataset)
# ?






# create dataset from input dataframes
dataset = Dataset({'dataset1':df1, 'dataset2':df2})



