In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras import *
from keras.callbacks import *
import os
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
from commons import mean_absolute_percentage_error
from keras.layers import *
from sklearn.pipeline import Pipeline
from keras.utils import to_categorical
from tensorflow.keras.models import load_model
from keras.optimizers import *
from scikeras.wrappers import KerasClassifier

In [9]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.13.0


In [15]:
np.random.seed(7)

In [17]:
# load dataset
dataframe = pd.read_csv("./datasets/pca_95_cls.csv", sep=',')

In [19]:
dataframe.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,priceUSD
0,0.074162,0.015329,-0.048046,0.042709,0.007321,-0.014251,0.001355,-0.044263,-0.014403,-0.036199,...,0.017701,-0.0206,-0.021125,-0.001148,-0.004502,-0.01236,-0.032049,0.007081,0.006557,1
1,0.094841,0.072671,-0.07784,-0.014523,0.027039,-0.053013,0.056817,-0.00906,0.047423,-0.009912,...,-0.047544,0.013065,0.06567,0.006482,0.020321,0.00713,0.01632,0.013705,-0.042491,1
2,0.06488,0.028643,-0.038454,0.019065,0.028725,-0.014173,-0.002313,-0.031474,-0.009467,-0.034115,...,0.020285,0.006481,-0.012896,0.008115,-0.02212,-0.021993,0.012241,0.021045,-0.03373,1


In [21]:
dataframe.shape

(735, 51)

In [23]:
length=dataframe.shape[1]-1

In [25]:
length

50

In [27]:
# split into input (X) and output (Y) variables
X = dataframe.iloc[:,0:length]
y = dataframe['priceUSD']

In [29]:
X.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.074162,0.015329,-0.048046,0.042709,0.007321,-0.014251,0.001355,-0.044263,-0.014403,-0.036199,...,-0.004087,0.017701,-0.0206,-0.021125,-0.001148,-0.004502,-0.01236,-0.032049,0.007081,0.006557
1,0.094841,0.072671,-0.07784,-0.014523,0.027039,-0.053013,0.056817,-0.00906,0.047423,-0.009912,...,0.003421,-0.047544,0.013065,0.06567,0.006482,0.020321,0.00713,0.01632,0.013705,-0.042491
2,0.06488,0.028643,-0.038454,0.019065,0.028725,-0.014173,-0.002313,-0.031474,-0.009467,-0.034115,...,0.014521,0.020285,0.006481,-0.012896,0.008115,-0.02212,-0.021993,0.012241,0.021045,-0.03373


In [31]:
y=np.ravel(y)

In [81]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,

In [83]:
shape=X.shape[1]

In [86]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, train_size=0.8, shuffle=False, random_state=7)

In [88]:
estimators=[]

In [90]:
estimators.append(['robust',RobustScaler()])

In [92]:
estimators.append(['mixmax',MinMaxScaler()])

In [94]:
scale=Pipeline(estimators,verbose=True)

In [96]:
scale.fit(X_train)

[Pipeline] ............ (step 1 of 2) Processing robust, total=   0.0s
[Pipeline] ............ (step 2 of 2) Processing mixmax, total=   0.0s


In [98]:
X_train=scale.transform(X_train)

In [101]:
X_test=scale.transform(X_test)

In [105]:
def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 1e-2
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [129]:
def sequential_model(initializer='normal', activation='relu', neurons=300, NUM_FEATURES=shape, **kwargs):
    # Create model
    model = Sequential()
    model.add(Input(shape=(NUM_FEATURES,)))  # Set the correct input shape
    model.add(Dense(400, kernel_initializer=initializer, activation=activation))
    model.add(Dense(500, activation=activation))
    model.add(Dense(100, activation=activation))
    model.add(Dense(1, activation='sigmoid', kernel_initializer=initializer))  # Single output neuron with sigmoid
    
    # Define and compile optimizer
    adam = Adam(learning_rate=lr_schedule(0), amsgrad=True)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    
    return model


In [132]:
mcp_save = ModelCheckpoint('trained_models/ANN_cls_interval3_pca.keras', save_best_only=True, monitor='val_loss', mode='max')
earlyStopping = EarlyStopping(monitor='val_loss', patience=100,verbose=1, mode='max')

In [134]:
classifier=KerasClassifier(
    build_fn=sequential_model,batch_size=32, epochs=1000,validation_split=0.1,validation_freq=1, shuffle=True,use_multiprocessing=True, callbacks=[mcp_save,earlyStopping])

In [137]:
classifier.fit(X_train,y_train)

Learning rate:  0.01
Epoch 1/1000


  X, y = self._initialize(X, y)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.4637 - loss: 0.7409 - val_accuracy: 0.4576 - val_loss: 0.7005
Epoch 2/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5318 - loss: 0.6969 - val_accuracy: 0.4576 - val_loss: 0.7027
Epoch 3/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5393 - loss: 0.6930 - val_accuracy: 0.4576 - val_loss: 0.6955
Epoch 4/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5158 - loss: 0.6931 - val_accuracy: 0.4576 - val_loss: 0.6950
Epoch 5/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5263 - loss: 0.6921 - val_accuracy: 0.4576 - val_loss: 0.6982
Epoch 6/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5160 - loss: 0.6928 - val_accuracy: 0.4576 - val_loss: 0.6976
Epoch 7/1000
[1m17/17[0m [32m━━━━━━━━

In [141]:
prediction_model = load_model('trained_models/ANN_cls_interval3_pca.keras',compile=False)

In [147]:
y_pred = (prediction_model.predict(X_test) > 0.5).astype("int32")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [149]:
acc=accuracy_score(y_test,y_pred)
acc

0.48299319727891155

In [151]:
f1=f1_score(y_test,y_pred,average='weighted')
f1

0.3146102477688323

In [153]:
auc=roc_auc_score(y_test,y_pred)
auc

0.5

In [155]:
y_prob=[prediction_model.predict(X_test).max() for i in range(len(y_test))]

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6m

In [157]:
print(classification_report(y_test,y_pred,labels=[0,1], target_names=['decrease','increase']))

              precision    recall  f1-score   support

    decrease       0.00      0.00      0.00        76
    increase       0.48      1.00      0.65        71

    accuracy                           0.48       147
   macro avg       0.24      0.50      0.33       147
weighted avg       0.23      0.48      0.31       147



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [159]:
predictions=pd.DataFrame(zip(np.ravel(y_test),np.ravel(y_pred)),columns=['y_test','y_pred'])

In [161]:
predictions

Unnamed: 0,y_test,y_pred
0,1,1
1,1,1
2,1,1
3,0,1
4,0,1
...,...,...
142,1,1
143,1,1
144,0,1
145,1,1
