In [1]:
import torch
import gp_math_func as MF
import gp_factor_func as FF
n_time = 200
n_stock = 100
n_factor = 10
x = torch.rand(n_time , n_stock , n_factor)
y = torch.rand(n_time , n_stock , 1)
nan = (y > 0.9) + (y < 0.1)
y[nan] = torch.nan
# mat = a.T.mm(a)

gp_factor = x
labels = y
universe = ~nan.all(-1)
insample = torch.arange(n_time) <160
ir_window = 10
roll_window = 10
halflife  = 5


In [2]:
weight_scheme = 'ir' # 'ic' , 'ir'
window_type   = 'rolling' # 'rolling'
weight_decay  = 'exp' # 'constant' , 'linear'

class MultiFactor:
    def __init__(self , weight_scheme = 'ir', window_type = 'rolling', weight_decay= 'exp' , 
                 ir_window = 10 , roll_window = 10 , halflife  = 5 , 
                 insample = None , universe = None , min_coverage = 0.1) -> None:
        assert weight_scheme in ['ew' , 'ic' , 'ir']
        assert window_type   in ['rolling' , 'insample']
        assert weight_decay  in ['constant' , 'linear' , 'exp']
        self.weight_scheme = weight_scheme
        self.window_type   = window_type
        self.weight_decay  = weight_decay
        self.ir_window     = ir_window
        self.roll_window   = roll_window
        self.halflife      = halflife
        self.insample      = insample
        self.universe      = universe
        self.min_coverage  = min_coverage

    def ts_decay(self , max_len , weight_decay = None , halflife = None):
        weight_decay = weight_decay if weight_decay else self.weight_decay
        halflife     = halflife     if halflife     else self.halflife
        return FF.decay_weight(weight_decay , max_len , exp_halflife=halflife)

    @staticmethod
    def static_decorator(func , relative_weight_cap = 5.):
        def wrapper(data , time_slice = None):
            if time_slice is not None:
                if data is not None: data = data[time_slice]
            w = func(data).nan_to_num(torch.nan,torch.nan,torch.nan).reshape(1,1,-1)
            w /= w.abs().sum(-1,keepdim=True)
            w[w > (relative_weight_cap / w.shape[-1])] = relative_weight_cap / w.shape[-1]
            w /= w.abs().sum(-1,keepdim=True)
            return w
        return wrapper
    
    @staticmethod
    def dynamic_decorator(func , relative_weight_cap = 5. , method = 0):
        def wrapper(data , roll_window = 10):
            if method == 1:
                data = torch.nn.functional.pad(data,[0,0,roll_window-1,0],value=torch.nan).unfold(0,roll_window,1).permute(2,0,1)
                w = func(data).nan_to_num(torch.nan,torch.nan,torch.nan).permute(1,0,2)
            else:
                w = data * 0.
                for i in range(len(w)):
                    w[i] = func(data[i-roll_window:i]).nan_to_num(torch.nan,torch.nan,torch.nan)
                w = w.unsqueeze(1)
            w /= w.abs().sum(-1,keepdim=True)
            w[w > (relative_weight_cap / w.shape[-1])] = relative_weight_cap / w.shape[-1]
            w /= w.abs().sum(-1,keepdim=True)
            return w
        return wrapper
    
    def multi_factor_weight(self , window_type = None , **kwargs):
        window_type = window_type if window_type is not None else self.window_type
        if window_type == 'insample':
            weight_tensor = self.static_factor_weight(**kwargs)
        else:
            weight_tensor = self.dynamic_factor_weight(**kwargs)
        return weight_tensor

    def static_factor_weight(self , weight_scheme = None , weight_decay = None , insample = None , **kwargs):
        weight_scheme = weight_scheme  if weight_scheme is not None else self.weight_scheme
        weight_decay  = weight_decay   if weight_decay  is not None else self.weight_decay
        insample      = insample       if insample      is not None else self.insample
        assert weight_scheme in ['ew' , 'ic' , 'ir']
        assert weight_decay  in ['constant' , 'linear' , 'exp']

        if weight_scheme == 'ew': 
            func = self.weight_ew
            data = kwargs['ic'] if 'ic' in kwargs.keys() else kwargs['ir']
        else:
            func = self.weight_icir
            data = kwargs[weight_scheme]
        func = self.static_decorator(func)
        return func(data , time_slice = insample)
    
    def dynamic_factor_weight(self , weight_scheme = None , weight_decay = None , roll_window = None , **kwargs):
        weight_scheme = weight_scheme  if weight_scheme is not None else self.weight_scheme
        weight_decay  = weight_decay   if weight_decay  is not None else self.weight_decay
        roll_window   = roll_window    if roll_window   is not None else self.roll_window
        assert weight_scheme in ['ew' , 'ic' , 'ir']
        assert weight_decay  in ['constant' , 'linear' , 'exp']

        if weight_scheme == 'ew': 
            return self.static_factor_weight(weight_scheme , time_slice = self.insample, **kwargs)
        else:
            func = self.dynamic_decorator(self.weight_icir)
            return func(kwargs[weight_scheme] , roll_window = roll_window)

    def weight_ew(self , data):
        return data.nanmean(0).sign()

    def weight_icir(self, data):
        ts_w = self.ts_decay(len(data)).reshape(1,-1)
        fini = data.isfinite() * 1.
        data = data.nan_to_num(0,0,0)
        if data.dim() == 2:
            return (ts_w @ data) / (ts_w @ fini)
        elif data.dim() == 3:
            return torch.einsum('ij,jkl->ikl' , ts_w , data) / torch.einsum('ij,jkl->ikl' , ts_w , fini)

    def weighted_multi(self , singles , weight):
        assert singles.shape == weight.shapes
        weight = singles.isfinite() * weight
        wsum = torch.nansum(singles * weight , dim = -1) 
        return MF.zscore_inplace(wsum,-1)
    
    def calculate_icir(self , factors , labels , ir_window = None , universe = None , min_coverage = None , **kwargs):
        ir_window    = ir_window    if ir_window    is not None else self.ir_window
        universe     = universe     if universe     is not None else self.universe
        min_coverage = min_coverage if min_coverage is not None else self.min_coverage
        if labels.dim() == factors.dim():
            labels = labels.squeeze(-1)
        rankic = torch.full((len(factors) , factors.shape[-1]) , fill_value=torch.nan).to(labels)
        for i_factor in range(factors.shape[-1]):
            rankic[:,i_factor] = MF.rankic_2d(factors[...,i_factor] , labels , dim = 1 , 
                                              universe = universe , min_coverage = min_coverage)
        rankir = MF.ma(rankic , ir_window) / MF.ts_stddev(rankic , ir_window)
        return rankic , rankir

mfs = MultiFactor(weight_scheme = weight_scheme, window_type = window_type, weight_decay= weight_decay ,
                  universe = universe , insample = insample ,
                  ir_window = ir_window , roll_window = roll_window , halflife = halflife)
#gp_factor , labels , universe , insample             
rankic , rankir = mfs.calculate_icir(gp_factor , labels , universe = universe)

weight_tensor = mfs.multi_factor_weight(ic=rankic , ir=rankir , 
                                        weight_scheme = weight_scheme, window_type = window_type, weight_decay= weight_decay ,
                                        ir_window = ir_window , roll_window = roll_window , halflife  = halflife , 
                                        insample = insample , universe = universe)

multi = (gp_factor * weight_tensor).nanmean(-1)
multi = MF.zscore(multi , -1)
weight_tensor , multi

(tensor([[[    nan,     nan,     nan,  ...,     nan,     nan,     nan]],
 
         [[    nan,     nan,     nan,  ...,     nan,     nan,     nan]],
 
         [[    nan,     nan,     nan,  ...,     nan,     nan,     nan]],
 
         ...,
 
         [[ 0.1213,  0.1548,  0.0121,  ..., -0.1517,  0.0401,  0.1213]],
 
         [[ 0.1111,  0.1502,  0.0764,  ..., -0.1110,  0.0440,  0.1088]],
 
         [[ 0.1029,  0.1398,  0.0812,  ..., -0.0784,  0.0374,  0.1011]]]),
 tensor([[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [ 0.1011, -0.0541, -0.0419,  ...,  0.0592, -0.0906,  0.0121],
         [ 0.0549, -0.0317,  0.1113,  ...,  0.0283, -0.0458, -0.1149],
         [-0.0988, -0.1708, -0.1378,  ...,  0.1534, -0.0181,  0.0223]]))

In [1]:
# 给定因子表达式,得到全样本区间的因子值
import torch
from gp_main import gp_factor_generator
import gp_factor_func as FF

gp_space = gp_factor_generator(job_id = 'bendi')

a = gp_space.compile('ts_rng_dif(CP,10,1)' , 'inf')
b = gp_space.compile('ts_rank(turn,10)' , 'inf')



**Job Directory is : "./pop/bendi"
-------------------- Data --------------------
  --> Directly load "./data/package/gp_data_package_test.pt"
**Load Data Done, Cost 0.03 Secs
  --> 2 factors, 2 raw data loaded!


In [2]:
# 基于多个单因子,计算多因子
multi_factor = FF.MultiFactor(
    weight_scheme = 'ir', window_type = 'rolling', weight_decay= 'exp' ,
    universe = gp_space.tensors.universe , insample = gp_space.tensors.insample ,
    ir_window = 40 , roll_window = 40 , halflife = 20)

factor = torch.stack([a,b] , dim = -1)
labels = gp_space.tensors.labels_raw

metrics = multi_factor.calculate_icir(factor , labels , universe = gp_space.tensors.universe) # namespace of ic,ir
multi = multi_factor.multi_factor(factor , window_type='rolling',**metrics)
multi.get('multi') # multi对象拥有multi,weight,inputs三个自变量,用multi.multi也行

tensor([[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
        ...,
        [-0.0033, -0.0188,  0.0001,  ...,  0.0016, -0.0011,  0.0013],
        [-0.0102, -0.0191,  0.0004,  ...,  0.0016, -0.0009,  0.0013],
        [-0.0108, -0.0155,  0.0003,  ...,  0.0017, -0.0008,  0.0013]])

In [4]:
multi.inputs.shape

torch.Size([484, 5210, 2])

In [7]:
multi.__dict__

{'multi': tensor([[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [-0.0033, -0.0188,  0.0001,  ...,  0.0016, -0.0011,  0.0013],
         [-0.0102, -0.0191,  0.0004,  ...,  0.0016, -0.0009,  0.0013],
         [-0.0108, -0.0155,  0.0003,  ...,  0.0017, -0.0008,  0.0013]]),
 'weight': tensor([[[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[    nan,     nan]],
 
         [[

In [21]:
from argparse import Namespace

class gpContainer(Namespace):
    __reserved_names__ = ['get' , 'set' , 'update' , 'delete' , 'subset' , 'keys' , 'values' , 'items' , 'copy' , 'apply' , 'map']
    def __init__(self , inherit_from = None , **kwargs) -> None:
        if inherit_from is not None: 
            for k , v in inherit_from.items(): self.set(k , v)
        for k , v in kwargs.items(): self.set(k , v)
    def get(self , key , default = None , require = False):
        return getattr(self , key) if hasattr(self , key) else default
    def update(self , **kwargs):
        for k , v in kwargs.items(): self.set(k , v)
        return self
    def set(self , key  , v):
        assert key not in type(self).__reserved_names__ , key
        setattr(self , key , v)
        return self
    def delete(self , key):
        assert key not in type(self).__reserved_names__ , key
        if hasattr(self , key): delattr(self, key)
        return self
    def subset(self , keys , require = False):
        return {key:self.get(key , require = require) for key in keys}
    def keys(self): 
        return self.__dict__.keys()
    def values(self): 
        return self.__dict__.values()
    def items(self): 
        return self.__dict__.items()
    def __repr__(self) -> str:
        return self.__dict__.__repr__()
    def __add__(self , another):
        new = type(self)()
        for k , v in self.items(): new.set(k , v)
        for k , v in another.items(): new.set(k , v)
        return new
    def __getitem__(self , key):
        return self.__dict__.__getitem__(key)


    
a = gpContainer(a = 1 , b=2)
def f(a,b):
    return a*b
f(**a)

TypeError: 'gpContainer' object is not subscriptable