In [2]:
import numpy as np
import datetime as dt
import tensorflow as tf
from pailab import MLRepo, MeasureConfiguration, MLObjectType
import logging

logging.basicConfig(level=logging.FATAL)
tf.VERSION

'1.10.0'

In [3]:
ml_repo = MLRepo('test_user', repo_dir='c:/ml_repos/sc')

### Setup repo

In [None]:
if True:
    filename_X = 'C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\sc_params_600015.npy'
    filename_Y = 'C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\prices_600015.npy'  
    ml_repo.raw_data.add_from_numpy_file('sc', filename_X,['initial_variance', 'speed_of_mean_reversion', 
                                                           'long_run_variance', 'vol_of_variance', 'correlation', 'strike', 'expiry'], filename_Y, ['price'] )
    ml_repo.training_data.add('training_small','sc', 0, 30000)
    ml_repo.test_data.add('training_small','sc', 30001, 60000)
    ml_repo.add_measure(MeasureConfiguration.MSE)
    ml_repo.add_measure(MeasureConfiguration.R2)
    ml_repo.add_measure(MeasureConfiguration.MAX)
    
    
    
#the following is for demonstration how to generate data by simulation only
if False: #create new data
    import pyvacon.analytics as analytics
    import pyvacon.tools.converter as converter
    import pyvacon.models.converter as model_converter
    import pyvacon.models.tools as model_tools
    import random
    import math
    simtimes_p = range(1,3*365)
    refdate = analytics.ptime(2017, 1, 1, 0, 0, 0)
    num_threads = 2
    nsims = 10000
    ntimesteps_per_year = 365
    spot = 1.0
    n_strikes_per_sample = 5
    expiry_intervalls =  [[10,365],[366, 2*365], [2*365+1,3*365]]
    n_expiries_per_sample = len(expiry_intervalls)
    n_samples = 40000
    n_overall_samples = n_samples *n_strikes_per_sample*n_expiries_per_sample
    X_result = np.empty((n_overall_samples, 5 + 2))
    Y_result = np.empty((n_overall_samples, 1))
    variance_min = math.log(0.05)
    variance_max = math.log(1.2)
    counter = 0
    for i in range(n_samples):
        if i % 5000 == 0:
            print('sample ' + str(i))
        initial_variance = random.uniform(variance_min, variance_max)
        speed_of_meanreversion = random.uniform(0.1, 5.0)
        long_run_variance = random.uniform(variance_min, variance_max)
        vol_of_variance = random.uniform(0.01, 1.2)
        correlation = random.uniform(-0.9,0.9)
        model = analytics.ScottChesneyModel('t', refdate, spot, initial_variance, speed_of_meanreversion, long_run_variance, vol_of_variance, correlation)
        
        tmp = np.random.random_integers(10,3*365, n_expiries_per_sample)
        simtimes_p = []
        for d in range(n_expiries_per_sample):
            simtimes_p.append( int(np.random.randint(expiry_intervalls[d][0],expiry_intervalls[d][1])) )
        simtimes = converter.createPTimeList(refdate, simtimes_p)
        
        tmp = analytics.ModelLab(model, refdate)
        seed = np.random.randint(1,1024)
        #print('sample ' + str(i) + '  seed ' + str(seed))
        tmp.simulate(simtimes, nsims,ntimesteps_per_year, num_threads, seed)
        for t in range(len(simtimes_p)):
            expiry_index = simtimes_p[t]
            expiry_yf = (expiry_index+1)/365.0
            strikes = np.random.uniform(0.5,1.5, n_strikes_per_sample)
            prices = model_tools.compute_statistics(tmp, strikes, t, 0, lambda x, y: max(x-y,0.0))
            #put_prices = model_tools.compute_statistics(tmp, strikes_puts, expiry_index, 0, lambda x, y: max(y-x,0.0))
            #print(put_prices)
            for k in range(n_strikes_per_sample):
                X_result[counter, 0] = initial_variance
                X_result[counter, 1] = speed_of_meanreversion
                X_result[counter, 2] = long_run_variance
                X_result[counter, 3] = vol_of_variance
                X_result[counter, 4] = correlation
                X_result[counter, 5] = strikes[k]
                X_result[counter, 6] = expiry_yf
                Y_result[counter, 0] = prices[k]; #analytics.calcImpliedVol(call_prices[j], strikes_calls[j], expiry_yf, 1.0, 1.0, 'C')
                counter = counter + 1
        if i % 5000 == 0:
            np.save('C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\sc_params_' + str(counter) + '.npy', X_result)
            np.save('C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\prices_' + str(counter) + '.npy', Y_result)

    #print(Y_result)
    np.save('C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\sc_params.npy', X_result)
    np.save('C:\\Users\\Anwender\\development\\RIVACON\\iqual\\experiments\\sc\\prices.npy', Y_result)


### Add model and train NN

In [None]:
if True:
    from tensorflow.keras import layers
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, Activation

    model = Sequential()
    model.add(Dense(2,input_dim=7))
    model.add(Activation('relu'))
    model.add(Dense(5))
    model.add(Activation('relu'))
    model.add(Dense(1))
    model.add(Activation('relu'))
    
    import externals.tensorflow_keras_interface as tf_interface
    tf_interface.add_model(ml_repo, model, 'simple_dense', loss='MSE', epochs=1000, batch_size=1000)
    ml_repo.run_training(run_descendants = True)

In [None]:
import pailab.plot as plot

In [None]:
plot.histogram_model_error(ml_repo, ml_repo.models.simple_dense(), ml_repo.training_data.training_small())

In [None]:
#ml_repo.run_evaluation()
for k in MLObjectType:
    names = ml_repo.get_names(k.value)
    for n in names: 
        print(n + '\t  ' + k.value)

In [None]:
ml_repo.models.simple_dense.jobs.training.load()
job_info = ml_repo._job_runner.get_info(ml_repo.models.simple_dense.jobs.training(), ml_repo.models.simple_dense.jobs.training.obj.repo_info.version)
print(job_info.error_message)
