# Import

In [None]:
# only use CPU
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1" 

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np
import importlib

In [None]:
# models
import models.sdf as SDF
import models.conditional as Conditional

# loss
import models.loss as Loss

# training
import models.training as Training

# data
import models.data as Data

In [None]:
# gpu_lst = tf.config.list_physical_devices("GPU")
# if not gpu_lst:
#     raise Exception("No GPU")

# Data

In [None]:
path = "../../datasets"

In [None]:
ls ../../datasets

## Macro data

In [None]:
macro_path = f"{path}/macro/macro_train.npz"

In [None]:
macro_data = np.load(macro_path)

In [None]:
macro_data.files

In [None]:
macro_data["data"].shape

1. 178 macro features
2. 240 time entries

In [None]:
# each date entry has 178 macro features
macro_data["data"][0].shape

## Firm data

In [None]:
firm_path = f"{path}/char/Char_train.npz"

In [None]:
firm_data = np.load(firm_path)

In [None]:
firm_data.files

In [None]:
firm_data["data"].shape

1. 46 firm-specific characteristics
2. 1 excess return - to be used for SDF construction
3. 240 time entry
4. 3686 firms

In [None]:
# each date entry has 3686 firms, 46 firm specific char + 1 return
firm_data["data"][0].shape

## Data cleaning

In [None]:
importlib.reload(Data)
macro_train, firm_train, return_train, mask_train = Data.Data().clean(macro_data["data"], firm_data["data"])

Total time period: 240

Total firms: 3686

Total macro, firm features: 178, 46

In [None]:
macro_train.shape, firm_train.shape, return_train.shape, mask_train.shape

In [None]:
n = 5
sample = [
    macro_train[:, :, :],
    firm_train[:, :n, :],
    return_train[:, :n],
    mask_train[:, :n]
]

In [None]:
# we take 5 samples to test run
for _ in sample:
    print(_.shape)

# Models

## SDF

In [None]:
importlib.reload(SDF)
sdf = SDF.SDFModel()
sdf(sample).shape

In [None]:
sdf.summary()

In [None]:
importlib.reload(Conditional)
conditional = Conditional.ConditionalModel()
conditional(sample).shape

In [None]:
conditional.summary()

# Loss

In [None]:
importlib.reload(Loss)
pricing_loss = Loss.PricingLoss()
pricing_loss(
    SDF = sdf(sample),
    moment = conditional(sample),
    return_data = sample[2],
    mask = sample[3],
weighted_loss=True)

In [None]:
pricing_loss.sharpe_loss(
    sdf = sdf(sample)
)

In [None]:
pricing_loss.sharpe_loss(
    normalize = True,
    sdf_weight = sdf.trainable_weights[-2],
    return_data = sample[2],
    mask = sample[3]
)

In [None]:
pricing_loss.residual_loss(
    return_data = sample[2],
    mask = sample[3],
    sdf_weight = sdf(sample)
)

In [None]:
Ni = tf.reduce_sum(tf.cast(sample[3], "int32"), axis=1)

In [None]:
Ni.shape

In [None]:
masked_return = tf.boolean_mask(sample[2], mask=sample[3])
masked_return.shape

In [None]:
sample[3].shape

In [None]:
sample[3]

In [None]:
sample[3][0]

In [None]:
sample[3].shape

In [None]:
masked_return = tf.boolean_mask(sample[2], sample[3])

In [None]:
masked_return_lst = tf.split(masked_return, Ni)

In [None]:
sdf(sample).shape

In [None]:
Ni.shape

In [None]:
tf.split(sdf(sample), Ni)

# Training

Macro feature: 240 x 178
Firm feature: 240 x ? x 46
Return: 240 x ?
Mask: 240 x ?

In [None]:
importlib.reload(Training)
trainer = Training.Trainer(loss=Loss.PricingLoss())
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
trainer.train(
    sdf_model=sdf,
    conditional_model=conditional,
    optimizer=optimizer,
    inputs=sample,
    sdf_epoches_unc=1,
    moment_epoches=1,
    sdf_epoches_cond=1,
    steps=100
)