# Generating synthetic data

In [7]:
import numpy as np
from scipy.stats import truncnorm
from sklearn.preprocessing import StandardScaler

In [6]:
def model_parameter(n_samples, model_name='heston'):
    params = {}
    if model_name == 'heston':
        params = {
            'eta': 5 * np.random.rand(n_samples),
            'rho': -1 * np.random.rand(n_samples),
            'lambda': 10 * np.random.rand(n_samples),
            'var_avg': np.random.rand(n_samples),
            'var_init': np.random.rand(n_samples)
        }
    elif model_name == 'rbergomi':
        # truncated normal
        # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
        params = {
            'eta': truncnorm.rvs(-3, 3, 2.5, 0.5, n_samples), # [1, 4, 2.5, 0.5]
            'rho': truncnorm.rvs(-0.25, 2.25, -0.95, 0.2, n_samples), # [-1, -0.5, -0.95, 0.2]
            'H': truncnorm.rvs(-1.2, 8.6, 0.07, 0.05, n_samples), # [0.01, 0.5, 0.07, 0.05]
            'var_init': truncnorm.rvs(-2.5, 7, 0.3, 0.1, n_samples) # [0.05, 1, 0.3, 0.1]
            # TODO: what does the squared mean
        }
    return params

def param_initializer(model_name='heston'):
    params = {}
    if model_name == 'heston':
        params = {
            'eta': 5 * np.random.rand(),
            'rho': -1 * np.random.rand(),
            'lambda': 10 * np.random.rand(),
            'var_avg': np.random.rand(),
            'var_init': np.random.rand()
        }
    elif model_name == 'rbergomi':
        # truncated normal
        # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
        params = {
            'eta': truncnorm.rvs(-3, 3, 2.5, 0.5), # [1, 4, 2.5, 0.5]
            'rho': truncnorm.rvs(-0.25, 2.25, -0.95, 0.2), # [-1, -0.5, -0.95, 0.2]
            'H': truncnorm.rvs(-1.2, 8.6, 0.07, 0.05), # [0.01, 0.5, 0.07, 0.05]
            'var_init': truncnorm.rvs(-2.5, 7, 0.3, 0.1) # [0.05, 1, 0.3, 0.1]
            # TODO: what does the squared mean
        }
    return params

In [None]:
def market_information(n_samples):
    pass

In [None]:
def data_generation(n_samples, model='heston'):
    """
    Parameters:
    -----------
        n_samples: integer, number of samples to generate
        model: str, the name of the model, 'heston' or 'rbergomi'

    Returns:
    --------
        X: input data
        Y: lables
    """
    X = None
    Y = None
    return X, Y

In [4]:
def feature_scaling(X_train, *X_others):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    scaled = [X_train_scaled]
    for X in X_others:
        scaled.append(scaler.transform(X))
    return scaled

# Neural Network

In [11]:
import keras as K
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
n1 = ...
model = K.models.Sequential(
    K.layers.Dense(n1, activation='relu'),
    K.layers.BatchNormalization(),
    K.layers.Dense(1, activation='relu'),
)

In [None]:
# check_point = ModelCheckpoint('./models/', monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=2, mode='auto', restore_best_weights=True)

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.fit(X_train, Y_train, batch_size=128, epochs=50, verbose=0, validation_data=(X_val, Y_val), callbacks=[early_stopping])

# Deep Calibration

In [8]:
def deep_calibration(nn, jacobian, weights, market_quotes, market_info, model_name, lambd_init, max_iter, tol, beta0, beta1):
    # initialize
    mu = param_initializer(model_name)
    lambd = lambd_init
    param_names = mu.keys()
    mu = mu.values()
    m = len(mu)
    W = np.diag(weights)
    I = np.eye(m)
    n = 0
    # predict
    R = nn.predict(...) - Q # TODO
    J = None # TODO
    J_W = J.T.dot(W)
    delta_mu = np.linalg.pinv(J_W.dot(J) + lambd * I).dot(J_W.dot(R)) # vector size: [m, ]
    while n < max_iter and np.linalg.norm(delta_mu) > tol:
        mu_new = mu + delta_mu
        R_new = nn.predict(...) - Q # TODO
        R_norm = np.linalg.norm(R)
        c_mu = (R_norm - np.linalg.norm(R_new)) / (R_norm - np.linalg.norm(R + J.dot(delta_mu)))
        if c_mu <= beta0:
            # reject delta_mu
            lambd *= 2 # too slow, use greater lambd
        else:
            # accept delta_mu
            mu += delta_mu
            R = R_new
            J = None # TODO
            J_W = J.T.dot(W)
        if c_mu >= beta1:
            lambd /= 2 # too quick, use smaller lambd
        delta_mu = np.linalg.pinv(J_W.dot(J) + lambd * I).dot(J_W.dot(R)) # vector size: [m, ]
        n += 1
    return dict(zip(param_names, mu))

### 第一篇论文里没看懂的问题

1. 式（2）里的model parameters $\mu$ 是指的模型的参数，也就是说对Heston模型，$\mu$ 就是Table 1 里的 $(\eta, \rho, \lambda, \bar{v}, v_0)$ ；对rBergomi模型就是 $(\eta, \rho, H, v_0)$ 。是这样吗？
2. Market information $\xi$ 怎么得到？是跟 $\mu$ 一样按照某个分布随机生成吗？同理，$(M, T)$ 是怎么得到的？论文第11页上面那一部分讲的是参数生成，但我看不太懂。
3. 第11页第二段说"increase the number of samples in option parameter regions with high liquidity ..."， 这一段我也看不懂。
4. 你们谁有时间能不能研究一下QuantLib这个package，看看用来生成Heston模型数据应该调用哪个函数？我研究了一下，但没有Finance的知识背景很难理解那些函数和函数的参数是什么意义。