Final CNN1 model for Amgen Inc.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import initializers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Flatten, Input, BatchNormalization, Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dropout, Dense

Importing the dataset

In [None]:
df = pd.read_csv("AMGN_NN_60sec_ret.csv", low_memory=False, delimiter=",")

In [None]:
df["datetime"] = df["TDay"] + " " + df["IntraT"]
df.set_index(pd.DatetimeIndex(df['datetime']), inplace=True)

df = df.drop(columns = {'Spread_Q', 'ret.Spread_Q', 'lag.Spread_Q'})
df.rename(columns={"ret.MidQ": "retMidQ"}, inplace=True)

df["MidQsq"] = df.apply(lambda row: row.MidQ ** 2, axis=1)
df["ret.MidQsq"] = df.apply(lambda row: row.retMidQ ** 2, axis=1)
df["MidQsqsq"] = df.apply(lambda row: row.MidQ ** 3, axis=1)
df['ret.MidQsqsq'] = df.apply(lambda row: row.retMidQ ** 3, axis=1)
df['MidQsqsqsq'] = df.apply(lambda row: row.MidQ ** 4, axis=1)
df['ret.MidQsqsqsq'] = df.apply(lambda row: row.retMidQ ** 4, axis=1)

df = df.drop(columns = {'TDay', 'IntraT', 'datetime'})
df.rename(columns={"retMidQ": "ret.MidQ"}, inplace=True)

df.head()

Creating the input X

In [None]:
sc1 = MinMaxScaler(feature_range=(0,255))
df['X1sc'] = sc1.fit_transform(df['MidQ'].values.reshape(-1,1))
df['X2sc'] = sc1.fit_transform(df['ret.MidQ'].values.reshape(-1,1))
df['X3sc'] = sc1.fit_transform(df['lag.MidQ'].values.reshape(-1,1))
df['X4sc'] = sc1.fit_transform(df['MidQsq'].values.reshape(-1,1))
df['X5sc'] = sc1.fit_transform(df['ret.MidQsq'].values.reshape(-1,1))
df['X6sc'] = sc1.fit_transform(df['MidQsqsq'].values.reshape(-1,1))
df['X7sc'] = sc1.fit_transform(df['ret.MidQsqsq'].values.reshape(-1,1))
df['X8sc'] = sc1.fit_transform(df['MidQsqsqsq'].values.reshape(-1,1))
df['X9sc'] = sc1.fit_transform(df['ret.MidQsqsqsq'].values.reshape(-1,1))

X1 = df.groupby(df.index.date)['X1sc'].apply(np.array).reset_index()
X1 = X1.rename(columns={"index": "datetime"})

X2 = df.groupby(df.index.date)['X2sc'].apply(np.array).reset_index()
X2 = X2.rename(columns={"index": "datetime"})

X3 = df.groupby(df.index.date)['X3sc'].apply(np.array).reset_index()
X3 = X3.rename(columns={"index": "datetime"})

X4 = df.groupby(df.index.date)['X4sc'].apply(np.array).reset_index()
X4 = X4.rename(columns={"index": "datetime"})

X5 = df.groupby(df.index.date)['X5sc'].apply(np.array).reset_index()
X5 = X5.rename(columns={"index": "datetime"})

X6 = df.groupby(df.index.date)['X6sc'].apply(np.array).reset_index()
X6 = X6.rename(columns={"index": "datetime"})

X7 = df.groupby(df.index.date)['X7sc'].apply(np.array).reset_index()
X7 = X7.rename(columns={"index": "datetime"})

X8 = df.groupby(df.index.date)['X8sc'].apply(np.array).reset_index()
X8 = X8.rename(columns={"index": "datetime"})

X9 = df.groupby(df.index.date)['X9sc'].apply(np.array).reset_index()
X9 = X9.rename(columns={"index": "datetime"})

df.head()

Creating the labels y

In [None]:
df_avgm = df.groupby(df.index.date)['ret.MidQ'].mean().reset_index()
df_avgm.columns = ['datetime', 'y']

sc2 = MinMaxScaler(feature_range=(0,1))
df_avgm['ysc'] = sc2.fit_transform(df_avgm['y'].values.reshape(-1,1))

df_avgc = pd.merge(pd.merge(pd.merge(pd.merge(pd.merge(pd.merge(pd.merge(pd.merge(pd.merge(X1, X2, on="datetime"),
                                                                                  X3, on="datetime"),
                                                                        X4, on="datetime"),
                                                               X5, on="datetime"),
                                                      X6, on="datetime"),
                                             X7, on="datetime"),
                                    X8, on="datetime"),
                            X9, on="datetime"),
                   df_avgm, on="datetime")

df_avgc = df_avgc.drop(columns = {'datetime', 'y'})

df_avgc.head()

Connecting the right X and y

In [None]:
df_avgc.ysc = df_avgc.ysc.shift(-1)
df_avgc = df_avgc.dropna(axis='rows', how='any')

df_avgc.head()

Shaping the input X and split between taining and testing

In [None]:
X1t = df_avgc['X1sc'].drop(df_avgc['X1sc'].index[100:123])
X1t = np.asarray(X1t).flatten().tolist()
X1t = np.array(X1t).reshape(100, 393, 1)

X2t = df_avgc['X2sc'].drop(df_avgc['X2sc'].index[100:123])
X2t = np.asarray(X2t).flatten().tolist()
X2t = np.array(X2t).reshape(100, 393, 1)

X3t = df_avgc['X3sc'].drop(df_avgc['X3sc'].index[100:123])
X3t = np.asarray(X3t).flatten().tolist()
X3t = np.array(X3t).reshape(100, 393, 1)

X4t = df_avgc['X4sc'].drop(df_avgc['X4sc'].index[100:123])
X4t = np.asarray(X4t).flatten().tolist()
X4t = np.array(X4t).reshape(100, 393, 1)

X5t = df_avgc['X5sc'].drop(df_avgc['X5sc'].index[100:123])
X5t = np.asarray(X5t).flatten().tolist()
X5t = np.array(X5t).reshape(100, 393, 1)

X6t = df_avgc['X6sc'].drop(df_avgc['X6sc'].index[100:123])
X6t = np.asarray(X6t).flatten().tolist()
X6t = np.array(X6t).reshape(100, 393, 1)

X7t = df_avgc['X7sc'].drop(df_avgc['X7sc'].index[100:123])
X7t = np.asarray(X7t).flatten().tolist()
X7t = np.array(X7t).reshape(100, 393, 1)

X8t = df_avgc['X8sc'].drop(df_avgc['X8sc'].index[100:123])
X8t = np.asarray(X8t).flatten().tolist()
X8t = np.array(X8t).reshape(100, 393, 1)

X9t = df_avgc['X9sc'].drop(df_avgc['X9sc'].index[100:123])
X9t = np.asarray(X9t).flatten().tolist()
X9t = np.array(X9t).reshape(100, 393, 1)

In [None]:
X_train = np.concatenate((X1t, X2t, X3t, X4t, X5t, X6t, X7t, X8t, X9t),
                         axis=2)
X_train = X_train.reshape(X_train.shape[0], 393, 9, 1)
X_train = X_train.astype('float32')

X_train.shape

In [None]:
X1p = df_avgc['X1sc'].drop(df_avgc['X1sc'].index[0:100])
X1p = np.asarray(X1p).flatten().tolist()
X1p = np.array(X1p).reshape(23, 393, 1)

X2p = df_avgc['X2sc'].drop(df_avgc['X2sc'].index[0:100])
X2p = np.asarray(X2p).flatten().tolist()
X2p = np.array(X2p).reshape(23, 393, 1)

X3p = df_avgc['X3sc'].drop(df_avgc['X3sc'].index[0:100])
X3p = np.asarray(X3p).flatten().tolist()
X3p = np.array(X3p).reshape(23, 393, 1)

X4p = df_avgc['X4sc'].drop(df_avgc['X4sc'].index[0:100])
X4p = np.asarray(X4p).flatten().tolist()
X4p = np.array(X4p).reshape(23, 393, 1)

X5p = df_avgc['X5sc'].drop(df_avgc['X5sc'].index[0:100])
X5p = np.asarray(X5p).flatten().tolist()
X5p = np.array(X5p).reshape(23, 393, 1)

X6p = df_avgc['X6sc'].drop(df_avgc['X6sc'].index[0:100])
X6p = np.asarray(X6p).flatten().tolist()
X6p = np.array(X6p).reshape(23, 393, 1)

X7p = df_avgc['X7sc'].drop(df_avgc['X7sc'].index[0:100])
X7p = np.asarray(X7p).flatten().tolist()
X7p = np.array(X7p).reshape(23, 393, 1)

X8p = df_avgc['X8sc'].drop(df_avgc['X8sc'].index[0:100])
X8p = np.asarray(X8p).flatten().tolist()
X8p = np.array(X8p).reshape(23, 393, 1)

X9p = df_avgc['X9sc'].drop(df_avgc['X9sc'].index[0:100])
X9p = np.asarray(X9p).flatten().tolist()
X9p = np.array(X9p).reshape(23, 393, 1)

In [None]:
X_test = np.concatenate((X1p, X2p, X3p, X4p, X5p, X6p, X7p, X8p, X9p),
                        axis=2)
X_test = X_test.reshape(X_test.shape[0], 393, 9, 1)
X_test = X_test.astype('float32')

X_test.shape

Shaping the labels y and split between training and testing

In [None]:
df_avgc = df_avgc[['X1sc', 'X2sc', 'X3sc', 'X4sc', 'X5sc', 'X6sc', 'X7sc', 'X8sc', 'X9sc', 'ysc']]
ym = df_avgc.iloc[:, 9:10].values

In [None]:
ymt = []
for i in range(0, 100):
    ymt.append(ym[i, 0])
    
ymt = np.array(ymt)
ym_train = np.reshape(ymt, (ymt.shape[0], 1))

ym_train.shape

In [None]:
ymp = []
for i in range(100, 123):
    ymp.append(ym[i, 0])
    
ymp = np.array(ymp)
ym_test = np.reshape(ymp, (ymp.shape[0], 1))

ym_test.shape

Actual Model

In [None]:
IM_HEIGHT = 393
IM_WIDTH = 9

In [None]:
class MultiOutputModel():
    
    def make_default_hidden_layers(self, inputs):
    
        x = Conv2D(16, (3, 3), padding="same",
                   kernel_initializer="HeUniform")(inputs)
        x = Activation("elu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0)(x)        
        
        x = Conv2D(32, (3, 3), padding="same",
                   kernel_initializer="HeUniform")(x)
        x = Activation("elu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0)(x)
                
        x = Conv2D(64, (3, 3), padding="same",
                   kernel_initializer="HeUniform")(x)
        x = Activation("elu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0)(x)
        
        x = Conv2D(128, (3, 3), padding="same",
                   kernel_initializer="HeUniform")(x)
        x = Activation("elu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(1, 1))(x)
        x = Dropout(0)(x)
        
        x = Conv2D(256, (3, 3), padding="same",
                   kernel_initializer="HeUniform")(x)
        x = Activation("elu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(1, 1))(x)
        x = Dropout(0)(x)
        
        return x    
    
    def build_retmean_branch(self, inputs):

        x = self.make_default_hidden_layers(inputs)        
        x = Flatten()(x)
        x = Dense(64)(x)
        x = Activation("elu")(x)
        x = Dropout(0.3)(x)
        x = Dense(1)(x)
        x = Activation("sigmoid", name="retmean_output")(x)        
        
        return x
    
    def assemble_full_model(self, width, height):

        input_shape = (height, width, 1)        
        inputs = Input(shape=input_shape)
        
        retmean_branch = self.build_retmean_branch(inputs)       
        
        model = Model(inputs=inputs,
                     outputs = [retmean_branch],
                     name="CNN_Model_4")        
        
        return model
    
model = MultiOutputModel().assemble_full_model(IM_WIDTH, IM_HEIGHT)

Training parameters 

In [None]:
init_lr = 0.0001
epochs = 750
batch_size = 10

opt = RMSprop(learning_rate=init_lr, decay=init_lr / epochs)

model.compile(optimizer=opt, 
            loss={'retmean_output': 'mse'},
            loss_weights={'retmean_output': 0.1},
            metrics={'retmean_output': 'mae'})

Restarting the random weight initialization after each run

In [None]:
def reset_weights(model):
    for layer in model.layers:
        if isinstance(layer, tf.keras.Model): #if you're using a model as a layer
            reset_weights(layer) #apply function recursively
            continue

        #where are the initializers?
        if hasattr(layer, 'cell'):
            init_container = layer.cell
        else:
            init_container = layer

        for key, initializer in init_container.__dict__.items():
            if "initializer" not in key: #is this item an initializer?
                  continue #if no, skip it

            # find the corresponding variable, like the kernel or the bias
            if key == 'recurrent_initializer': #special case check
                var = getattr(init_container, 'recurrent_kernel')
            else:
                var = getattr(init_container, key.replace("_initializer", ""))

            var.assign(initializer(var.shape, var.dtype))
            #use the initializer

Training process

In [None]:
predres=[]
predstd=[]

for i in range(100):

    history = model.fit(x=X_train, y={"retmean_output": ym_train},
                    batch_size=batch_size,
                    epochs=epochs)
    
    res = model.predict(X_test)
    predres.append(res)
    std = np.std(predres)
    predstd.append(std)
    
    model.reset_states()
    reset_weights(model)

Standard deviation after each time the model is training and forecasting

In [None]:
plt.figure(figsize=(7, 2), dpi=80)
plt.plot(predstd, color = 'black')
plt.title('Amgen Inc.')
plt.xlabel('Number of iterations')
plt.ylabel('Standard deviation')
plt.show()

Shaping the predictions

In [None]:
df_res = pd.DataFrame(list(map(np.ravel, predres)))
meanres = df_res.mean(axis=0)
meanres = np.array(meanres)
meanres = np.reshape(meanres, (meanres.shape[0], 1))

meanres20d = meanres[:20]
ym_test20d = ym_test[:20]

df_meanres20d = pd.DataFrame({'predictionsc': meanres20d[:, 0]})
df_ym_test20d = pd.DataFrame({'realsc': ym_test20d[:, 0]})

df_meanres20d['prediction'] = sc2.inverse_transform(df_meanres20d['predictionsc'].values.reshape(-1,1))
df_ym_test20d['real'] = sc2.inverse_transform(df_ym_test20d['realsc'].values.reshape(-1,1))

print(df_meanres20d)
print(df_ym_test20d)

Visualization of the results

In [None]:
plt.figure(figsize=(7, 2), dpi=80)
plt.plot(df_ym_test20d['real'], color='black', label='real')
plt.plot(df_meanres20d['prediction'], color='green', label='prediction')
plt.xticks(np.arange(0, 20+1, 2.0))
plt.title('Amgen Inc.')
plt.xlabel('Day')
plt.ylabel('Simple mean return')
plt.legend()
plt.show()

Mean squared error of the results

In [None]:
diff_res = np.subtract(df_ym_test20d['real'], df_meanres20d['prediction'])
sq_res = np.square(diff_res)
mseres = sq_res.mean()

print(mseres)

In [None]:
diff_res = np.subtract(df_ym_test20d['realsc'], df_meanres20d['predictionsc'])
sq_res = np.square(diff_res)
mseres_sc = sq_res.mean()

print(mseres_sc)

Saving the results and the model

In [None]:
df_meanres20d.to_csv(r'C:\Users\nmart\Documents\Office\ZU\S8\Bachelor-Thesis\Nils_Test_Daten\Nils_Test_Daten\Finales\CNN1_res_AMGN.csv', index = False)

In [None]:
model.save('models/CNN1_AMGN')