In [38]:
# Import necessary libraries
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Input, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.models import load_model

In [40]:
np.random.seed(7)

In [42]:
# Load dataset
dataframe = pd.read_csv("./datasets/pca_95_cls.csv", sep=',')
X = dataframe.iloc[:, :-1]  # Selecting all columns except the last one as input features
y = dataframe['priceUSD']   # Target variable

In [44]:
dataframe.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,priceUSD
0,0.074162,0.015329,-0.048046,0.042709,0.007321,-0.014251,0.001355,-0.044263,-0.014403,-0.036199,...,0.017701,-0.0206,-0.021125,-0.001148,-0.004502,-0.01236,-0.032049,0.007081,0.006557,1
1,0.094841,0.072671,-0.07784,-0.014523,0.027039,-0.053013,0.056817,-0.00906,0.047423,-0.009912,...,-0.047544,0.013065,0.06567,0.006482,0.020321,0.00713,0.01632,0.013705,-0.042491,1
2,0.06488,0.028643,-0.038454,0.019065,0.028725,-0.014173,-0.002313,-0.031474,-0.009467,-0.034115,...,0.020285,0.006481,-0.012896,0.008115,-0.02212,-0.021993,0.012241,0.021045,-0.03373,1


In [46]:
dataframe.shape

(735, 51)

In [48]:
length=dataframe.shape[1]-1

In [50]:
length

50

In [52]:
# split into input (X) and output (Y) variables
X = dataframe.iloc[:,0:length]
y = dataframe['priceUSD']

In [54]:
X.head(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,0.074162,0.015329,-0.048046,0.042709,0.007321,-0.014251,0.001355,-0.044263,-0.014403,-0.036199,...,-0.004087,0.017701,-0.0206,-0.021125,-0.001148,-0.004502,-0.01236,-0.032049,0.007081,0.006557
1,0.094841,0.072671,-0.07784,-0.014523,0.027039,-0.053013,0.056817,-0.00906,0.047423,-0.009912,...,0.003421,-0.047544,0.013065,0.06567,0.006482,0.020321,0.00713,0.01632,0.013705,-0.042491
2,0.06488,0.028643,-0.038454,0.019065,0.028725,-0.014173,-0.002313,-0.031474,-0.009467,-0.034115,...,0.014521,0.020285,0.006481,-0.012896,0.008115,-0.02212,-0.021993,0.012241,0.021045,-0.03373


In [56]:
y=np.ravel(y)

In [58]:
y

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,

In [60]:
shape=X.shape[1]

In [63]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

In [65]:
estimators=[]

In [67]:
estimators.append(('robust', RobustScaler()))

In [69]:
estimators.append(('minmax', MinMaxScaler()))

In [71]:
scale = Pipeline(estimators, verbose=True)

In [73]:
scale.fit(X_train)

[Pipeline] ............ (step 1 of 2) Processing robust, total=   0.0s
[Pipeline] ............ (step 2 of 2) Processing minmax, total=   0.0s


In [75]:
X_train = scale.transform(X_train)

In [78]:
X_test = scale.transform(X_test)

In [80]:
# Learning Rate Scheduler
def lr_schedule(epoch):
    """Learning Rate Schedule with updates at specific epoch milestones"""
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate:', lr)
    return lr

In [82]:
# Define the upgraded model architecture
def sequential_model(initializer='he_normal', activation='relu', neurons=300, NUM_FEATURES=X_train.shape[1]):
    model = Sequential()
    model.add(Input(shape=(NUM_FEATURES,)))  # Input layer
    model.add(Dense(512, kernel_initializer=initializer, activation=activation))
    model.add(BatchNormalization())            # Batch normalization for stability
    model.add(Dropout(0.3))                    # Dropout layer for regularization
    
    model.add(Dense(256, kernel_initializer=initializer, activation=activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    model.add(Dense(128, kernel_initializer=initializer, activation=activation))
    model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification
    
    # Compile the model with Adam optimizer and dynamic learning rate
    adam = Adam(learning_rate=lr_schedule(0), amsgrad=True)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

In [85]:
# Configure Model Checkpoint and Early Stopping callbacks
mcp_save = ModelCheckpoint('trained_models/ANN_cls_interval3_pca_upgraded.keras', 
                           save_best_only=True, monitor='val_loss', mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=1, mode='min')


In [92]:
# Initialize the KerasClassifier without `use_multiprocessing`
classifier = KerasClassifier(
    build_fn=sequential_model,
    batch_size=32,
    epochs=1000, 
    validation_split=0.1,
    shuffle=True,
    callbacks=[mcp_save, early_stopping]
)

In [95]:
# Train the model
classifier.fit(X_train, y_train)

  X, y = self._initialize(X, y)


Learning rate: 0.001
Epoch 1/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.5112 - loss: 0.8765 - val_accuracy: 0.4915 - val_loss: 0.6877
Epoch 2/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5653 - loss: 0.7640 - val_accuracy: 0.5254 - val_loss: 0.6848
Epoch 3/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5932 - loss: 0.6575 - val_accuracy: 0.5424 - val_loss: 0.6873
Epoch 4/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6440 - loss: 0.6326 - val_accuracy: 0.5932 - val_loss: 0.6926
Epoch 5/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6938 - loss: 0.6086 - val_accuracy: 0.5424 - val_loss: 0.7002
Epoch 6/1000
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6890 - loss: 0.5742 - val_accuracy: 0.5763 - val_loss: 0.6940
Epoch

In [97]:
# Load the best model for evaluation
prediction_model = load_model('trained_models/ANN_cls_interval3_pca_upgraded.keras', compile=False)

In [105]:
# Predict and evaluate the model
y_pred = (prediction_model.predict(X_test) > 0.5).astype("int32")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=['Class 0', 'Class 1']))

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Accuracy: 0.5510204081632653
F1 Score: 0.5465662455458373
ROC AUC Score: 0.5433730454207
              precision    recall  f1-score   support

     Class 0       0.52      0.44      0.48        68
     Class 1       0.57      0.65      0.61        79

    accuracy                           0.55       147
   macro avg       0.55      0.54      0.54       147
weighted avg       0.55      0.55      0.55       147



In [107]:
y_prob=[prediction_model.predict(X_test).max() for i in range(len(y_test))]

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 629us/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step  
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [119]:
# Optional: Print out the first few predictions alongside actual values for verification
predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred.flatten()})

In [123]:
predictions_df

Unnamed: 0,Actual,Predicted
0,1,0
1,0,1
2,1,0
3,0,1
4,1,1
...,...,...
142,1,1
143,0,0
144,1,0
145,0,1
