In [165]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import keras
import warnings

from sklearn.preprocessing import MinMaxScaler
from hpelm import ELM
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
warnings.filterwarnings("ignore")

In [120]:
data = pd.read_csv("Group_14_data_cleaned.csv", index_col=0)
data.head()

Unnamed: 0_level_0,Bidder_ID,Bidder_Tendency,Bidding_Ratio,Successive_Outbidding,Last_Bidding,Auction_Bids,Starting_Price_Average,Early_Bidding,Winning_Ratio,Auction_Duration,Class
Auction_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
732,_***i,0.2,0.4,0.0,2.8e-05,0.0,0.993593,2.8e-05,0.666667,0.444444,0
732,g***r,0.02439,0.2,0.0,0.013123,0.0,0.993593,0.013123,0.944444,0.444444,0
732,t***p,0.142857,0.2,0.0,0.003042,0.0,0.993593,0.003042,1.0,0.444444,0
732,7***n,0.1,0.2,0.0,0.097477,0.0,0.993593,0.097477,1.0,0.444444,0
900,z***z,0.051282,0.222222,0.0,0.001318,0.0,0.0,0.001242,0.5,0.666667,0


In [7]:
data['Class'].value_counts()

0    5646
1     675
Name: Class, dtype: int64

In [163]:
# Split the data into training and testing sets
X = data.drop(['Class', 'Bidder_ID'], axis=1)
y = data['Class']

## ELM

In [208]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert input data to numpy arrays
X_train = X_train.values
y_train = y_train.values
X_test = X_test.values
y_test = y_test.values

# Initialize the ELM model with the desired parameters
model = ELM(X_train.shape[1], 1, classification="c", batch=10, accelerator="cpu", precision="single")

model.add_neurons(256, "tanh")


# Train the model
model.train(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)
y_pred = np.round(y_pred)
# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[1119   14]
 [   1  131]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      1133
           1       0.90      0.99      0.95       132

    accuracy                           0.99      1265
   macro avg       0.95      0.99      0.97      1265
weighted avg       0.99      0.99      0.99      1265



In [221]:
class ELMClassifier:
    def __init__(self, n_hidden=10, activation="tanh", batch=10):
        self.n_hidden = n_hidden
        self.activation = activation
        self.batch = batch
        self.elm = ELM(inputs=X_train.shape[1], outputs=1, classification="c", batch=batch)
        self.elm.add_neurons(n_hidden, activation)

    def fit(self, X, y):
        self.elm.train(X, y)

    def predict(self, X):
        return self.elm.predict(X)

    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

    def get_params(self, deep=True):
        return {'n_hidden': self.n_hidden, 'activation': self.activation, 'batch': self.batch}

    def set_params(self, **params):
        for param, value in params.items():
            setattr(self, param, value)
        return self


# Define the hyperparameters and their ranges
param_grid = {
    'n_hidden': [32, 64, 128],
    'activation': ['tanh']
}

# elm_classifier = ELMClassifier()
# Initialize the random search object
elm_random = RandomizedSearchCV(estimator= ELMClassifier(), param_distributions=param_grid, n_iter=10, cv=5, random_state=42)

# Fit the random search object to the training data
random_search_result = elm_random.fit(X_train, y_train)

# Print the results
print("Best: %f using %s" % (random_search_result.best_score_, random_search_result.best_params_))

# Print the best parameters and corresponding score
print("Best parameters: ", random_search_result.best_params_)
print("Best score: ", random_search_result.best_score_)


Best: nan using {'n_hidden': 32, 'activation': 'tanh'}
Best parameters:  {'n_hidden': 32, 'activation': 'tanh'}
Best score:  nan


## SVM with Linear Kernel 

In [33]:
from scipy import stats
# RANDOM SEARCH FOR 20 COMBINATIONS OF PARAMETERS
rand_list = {"C": stats.uniform(2, 10),
"gamma": stats.uniform(0.1, 5),
'kernel': ['linear']}
rand_search = RandomizedSearchCV(mdl, param_distributions = rand_list, n_iter = 50, n_jobs = 4, cv = 3, random_state = 2017, scoring = auc) 
rand_search.fit(X_train, y_train) 
# rand_search.cv_results_

# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters: ", rand_search.best_params_)
print("Accuracy score: ", rand_search.best_score_)


Best hyperparameters:  {'C': 2.209602254061174, 'gamma': 3.935350823412439, 'kernel': 'linear'}
Accuracy score:  0.9829980312538452


In [34]:
# Use the best hyperparameters to create a new SVM model
best_svm_model = SVC(C=rand_search.best_params_['C'], kernel=rand_search.best_params_['kernel'], gamma=rand_search.best_params_['gamma'])

# Train the new SVM model using the training data
best_svm_model.fit(X_train, y_train)

# Use the trained classifier to predict the class labels for the test data
y_pred = best_svm_model.predict(X_test)


# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1110   21]
 [   3  131]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1131
           1       0.86      0.98      0.92       134

    accuracy                           0.98      1265
   macro avg       0.93      0.98      0.95      1265
weighted avg       0.98      0.98      0.98      1265



## SVM with non linear kernel

In [31]:
from scipy import stats
# RANDOM SEARCH FOR 20 COMBINATIONS OF PARAMETERS
rand_list = {"C": stats.uniform(2, 10),
"gamma": stats.uniform(0.1, 1),
'kernel': ['poly', 'rbf']}
rand_search = RandomizedSearchCV(mdl, param_distributions = rand_list, n_iter = 50, n_jobs = 4, cv = 3, random_state = 2017, scoring = auc) 
rand_search.fit(X_train, y_train) 
# rand_search.cv_results_

# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters: ", rand_search.best_params_)
print("Accuracy score: ", rand_search.best_score_)


Best hyperparameters:  {'C': 9.087699346650997, 'gamma': 1.0163457603417565, 'kernel': 'rbf'}
Accuracy score:  0.9886417589006565


In [32]:
# Use the best hyperparameters to create a new SVM model
best_svm_model = SVC(C=rand_search.best_params_['C'], kernel=rand_search.best_params_['kernel'], gamma=rand_search.best_params_['gamma'])

# Train the new SVM model using the training data
best_svm_model.fit(X_train, y_train)

# Use the trained classifier to predict the class labels for the test data
y_pred = best_svm_model.predict(X_test)

# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[1126    5]
 [   5  129]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1131
           1       0.96      0.96      0.96       134

    accuracy                           0.99      1265
   macro avg       0.98      0.98      0.98      1265
weighted avg       0.99      0.99      0.99      1265



## Training a Neural Network

In [55]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint


# Define the neural network model
def create_model(num_layers=1, num_neurons=10):
    model = Sequential()
    model.add(Dense(num_neurons, input_dim=X_train.shape[1], activation='relu'))
    for i in range(num_layers):
        model.add(Dense(num_neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create a KerasClassifier with fixed batch_size and epochs
model = KerasClassifier(build_fn=create_model, batch_size=32, epochs=10, verbose=2)

# Define the hyperparameter distribution to search
param_dist = {
    'num_layers': [1,2,3],
    'num_neurons': [8,16,32,64],
}

# Perform randomized search
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv=3, n_iter=10)
random_search_result = random_search.fit(X_train, y_train)

# Print the results
print("Best: %f using %s" % (random_search_result.best_score_, random_search_result.best_params_))


Epoch 1/10


  model = KerasClassifier(build_fn=create_model, batch_size=32, epochs=10, verbose=2)


106/106 - 1s - loss: 0.4276 - accuracy: 0.8905 - 710ms/epoch - 7ms/step
Epoch 2/10
106/106 - 0s - loss: 0.1264 - accuracy: 0.9561 - 167ms/epoch - 2ms/step
Epoch 3/10
106/106 - 0s - loss: 0.0642 - accuracy: 0.9736 - 147ms/epoch - 1ms/step
Epoch 4/10
106/106 - 0s - loss: 0.0554 - accuracy: 0.9774 - 139ms/epoch - 1ms/step
Epoch 5/10
106/106 - 0s - loss: 0.0512 - accuracy: 0.9789 - 143ms/epoch - 1ms/step
Epoch 6/10
106/106 - 0s - loss: 0.0475 - accuracy: 0.9786 - 169ms/epoch - 2ms/step
Epoch 7/10
106/106 - 0s - loss: 0.0472 - accuracy: 0.9792 - 176ms/epoch - 2ms/step
Epoch 8/10
106/106 - 0s - loss: 0.0462 - accuracy: 0.9786 - 176ms/epoch - 2ms/step
Epoch 9/10
106/106 - 0s - loss: 0.0448 - accuracy: 0.9783 - 174ms/epoch - 2ms/step
Epoch 10/10
106/106 - 0s - loss: 0.0436 - accuracy: 0.9789 - 162ms/epoch - 2ms/step
53/53 - 0s - loss: 0.0325 - accuracy: 0.9840 - 202ms/epoch - 4ms/step
Epoch 1/10
106/106 - 1s - loss: 0.3939 - accuracy: 0.8941 - 1s/epoch - 11ms/step
Epoch 2/10
106/106 - 0s - los

Epoch 3/10
106/106 - 0s - loss: 0.2095 - accuracy: 0.8923 - 127ms/epoch - 1ms/step
Epoch 4/10
106/106 - 0s - loss: 0.1427 - accuracy: 0.8923 - 127ms/epoch - 1ms/step
Epoch 5/10
106/106 - 0s - loss: 0.1084 - accuracy: 0.9496 - 128ms/epoch - 1ms/step
Epoch 6/10
106/106 - 0s - loss: 0.0797 - accuracy: 0.9730 - 126ms/epoch - 1ms/step
Epoch 7/10
106/106 - 0s - loss: 0.0628 - accuracy: 0.9745 - 125ms/epoch - 1ms/step
Epoch 8/10
106/106 - 0s - loss: 0.0552 - accuracy: 0.9777 - 128ms/epoch - 1ms/step
Epoch 9/10
106/106 - 0s - loss: 0.0515 - accuracy: 0.9777 - 124ms/epoch - 1ms/step
Epoch 10/10
106/106 - 0s - loss: 0.0493 - accuracy: 0.9798 - 124ms/epoch - 1ms/step
53/53 - 0s - loss: 0.0366 - accuracy: 0.9822 - 193ms/epoch - 4ms/step
Epoch 1/10
106/106 - 1s - loss: 0.5351 - accuracy: 0.8781 - 611ms/epoch - 6ms/step
Epoch 2/10
106/106 - 0s - loss: 0.3473 - accuracy: 0.8944 - 126ms/epoch - 1ms/step
Epoch 3/10
106/106 - 0s - loss: 0.1810 - accuracy: 0.9436 - 126ms/epoch - 1ms/step
Epoch 4/10
106/1

106/106 - 0s - loss: 0.0477 - accuracy: 0.9769 - 137ms/epoch - 1ms/step
Epoch 5/10
106/106 - 0s - loss: 0.0482 - accuracy: 0.9783 - 140ms/epoch - 1ms/step
Epoch 6/10
106/106 - 0s - loss: 0.0441 - accuracy: 0.9786 - 138ms/epoch - 1ms/step
Epoch 7/10
106/106 - 0s - loss: 0.0428 - accuracy: 0.9804 - 142ms/epoch - 1ms/step
Epoch 8/10
106/106 - 0s - loss: 0.0427 - accuracy: 0.9810 - 138ms/epoch - 1ms/step
Epoch 9/10
106/106 - 0s - loss: 0.0420 - accuracy: 0.9804 - 136ms/epoch - 1ms/step
Epoch 10/10
106/106 - 0s - loss: 0.0417 - accuracy: 0.9801 - 137ms/epoch - 1ms/step
53/53 - 0s - loss: 0.0332 - accuracy: 0.9822 - 184ms/epoch - 3ms/step
Epoch 1/10
106/106 - 1s - loss: 0.3244 - accuracy: 0.9033 - 662ms/epoch - 6ms/step
Epoch 2/10
106/106 - 0s - loss: 0.0652 - accuracy: 0.9686 - 142ms/epoch - 1ms/step
Epoch 3/10
106/106 - 0s - loss: 0.0487 - accuracy: 0.9751 - 138ms/epoch - 1ms/step
Epoch 4/10
106/106 - 0s - loss: 0.0423 - accuracy: 0.9780 - 137ms/epoch - 1ms/step
Epoch 5/10
106/106 - 0s - l

Epoch 6/10
106/106 - 0s - loss: 0.0739 - accuracy: 0.9650 - 116ms/epoch - 1ms/step
Epoch 7/10
106/106 - 0s - loss: 0.0676 - accuracy: 0.9671 - 125ms/epoch - 1ms/step
Epoch 8/10
106/106 - 0s - loss: 0.0642 - accuracy: 0.9677 - 114ms/epoch - 1ms/step
Epoch 9/10
106/106 - 0s - loss: 0.0613 - accuracy: 0.9685 - 135ms/epoch - 1ms/step
Epoch 10/10
106/106 - 0s - loss: 0.0590 - accuracy: 0.9703 - 132ms/epoch - 1ms/step
53/53 - 0s - loss: 0.0462 - accuracy: 0.9798 - 184ms/epoch - 3ms/step
Epoch 1/10
106/106 - 1s - loss: 0.5593 - accuracy: 0.8585 - 543ms/epoch - 5ms/step
Epoch 2/10
106/106 - 0s - loss: 0.3128 - accuracy: 0.8968 - 112ms/epoch - 1ms/step
Epoch 3/10
106/106 - 0s - loss: 0.1661 - accuracy: 0.9422 - 118ms/epoch - 1ms/step
Epoch 4/10
106/106 - 0s - loss: 0.0956 - accuracy: 0.9683 - 127ms/epoch - 1ms/step
Epoch 5/10
106/106 - 0s - loss: 0.0691 - accuracy: 0.9709 - 143ms/epoch - 1ms/step
Epoch 6/10
106/106 - 0s - loss: 0.0570 - accuracy: 0.9763 - 145ms/epoch - 1ms/step
Epoch 7/10
106/1

In [None]:
# Best: 0.982199 using {'num_neurons': 64, 'num_layers': 2}

## Training the best architecture

In [75]:

best_num_layers = random_search_result.best_params_["num_layers"]
best_num_neurons = random_search_result.best_params_["num_neurons"]
model = create_model(num_layers=best_num_layers, num_neurons=best_num_neurons)
model.fit(X_train, y_train, epochs=50, verbose=2)


Epoch 1/50
158/158 - 1s - loss: 0.1778 - accuracy: 0.9434 - 638ms/epoch - 4ms/step
Epoch 2/50
158/158 - 0s - loss: 0.0468 - accuracy: 0.9775 - 201ms/epoch - 1ms/step
Epoch 3/50
158/158 - 0s - loss: 0.0425 - accuracy: 0.9794 - 225ms/epoch - 1ms/step
Epoch 4/50
158/158 - 0s - loss: 0.0405 - accuracy: 0.9794 - 201ms/epoch - 1ms/step
Epoch 5/50
158/158 - 0s - loss: 0.0389 - accuracy: 0.9810 - 204ms/epoch - 1ms/step
Epoch 6/50
158/158 - 0s - loss: 0.0381 - accuracy: 0.9828 - 202ms/epoch - 1ms/step
Epoch 7/50
158/158 - 0s - loss: 0.0372 - accuracy: 0.9824 - 202ms/epoch - 1ms/step
Epoch 8/50
158/158 - 0s - loss: 0.0349 - accuracy: 0.9838 - 211ms/epoch - 1ms/step
Epoch 9/50
158/158 - 0s - loss: 0.0330 - accuracy: 0.9856 - 208ms/epoch - 1ms/step
Epoch 10/50
158/158 - 0s - loss: 0.0308 - accuracy: 0.9860 - 203ms/epoch - 1ms/step
Epoch 11/50
158/158 - 0s - loss: 0.0286 - accuracy: 0.9875 - 202ms/epoch - 1ms/step
Epoch 12/50
158/158 - 0s - loss: 0.0239 - accuracy: 0.9923 - 204ms/epoch - 1ms/step
E

<keras.callbacks.History at 0x7f8665817d90>

In [76]:

# Use the trained classifier to predict the class labels for the test data
y_pred = model.predict(X_test)
y_pred = np.round(y_pred)
# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1129    2]
 [   3  131]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1131
           1       0.98      0.98      0.98       134

    accuracy                           1.00      1265
   macro avg       0.99      0.99      0.99      1265
weighted avg       1.00      1.00      1.00      1265



Neural network performs slightly better than non-linear SVM. 

In [77]:
# Evaluate the model on the testing set
loss, accuracy = model.evaluate(X_test, y_test)


# Print the accuracy without rounding
print("Accuracy: {:.4f}".format(accuracy))

Accuracy: 0.9960


# Structure Selection

In [106]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler


# Standardize the features using the StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fit a Lasso regression model on the training data
alpha = 0.006  # regularization strength
lasso = Lasso(alpha=alpha)
lasso.fit(X_train, y_train)

# Select the features with non-zero coefficients
selected_features = X.columns[lasso.coef_ != 0]

# Print the selected features
print("Selected features:", selected_features)


Selected features: Index(['Bidder_Tendency', 'Successive_Outbidding', 'Last_Bidding',
       'Winning_Ratio'],
      dtype='object')


In [113]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif


# Get the feature matrix X and target vector y
X_train_ = data.drop(['Bidder_ID', 'Class'], axis=1)
y_train_ = data['Class']

k = 4
knn = KNeighborsClassifier(n_neighbors=k) 
selector = SelectKBest(score_func=f_classif, k=k)
# selector = SelectKBest(score_func=chi2, k=k)
X_train_new = selector.fit_transform(X_train_, y_train_)


# Print the selected feature names
selected_features = X_train_.columns[selector.get_support()]
print("Selected features:", list(selected_features))



Selected features: ['Bidder_Tendency', 'Bidding_Ratio', 'Successive_Outbidding', 'Winning_Ratio']


Training the model with the selected features to find which feature selection is a better technique. 

In [133]:
# Split the data into training and testing sets
X = data[['Bidder_Tendency', 'Successive_Outbidding', 'Last_Bidding',
       'Winning_Ratio']]
y = data['Class']
X_train_sl_ft_1, X_test_sl_ft_1, y_train_sl_ft_1, y_test_sl_ft_1 = train_test_split(X, y, test_size=0.2)

In [134]:
# Define the neural network model
def create_model_(num_layers=1, num_neurons=10):
    model = Sequential()
    model.add(Dense(num_neurons, input_dim=X_train_.shape[1], activation='relu'))
    for i in range(num_layers):
        model.add(Dense(num_neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [135]:
model = create_model_(num_layers=2, num_neurons=64)
model.fit(X_train_sl_ft_1, y_train_sl_ft_1, epochs=50, verbose=2)

Epoch 1/50
158/158 - 1s - loss: 0.2030 - accuracy: 0.9389 - 646ms/epoch - 4ms/step
Epoch 2/50
158/158 - 0s - loss: 0.0479 - accuracy: 0.9777 - 202ms/epoch - 1ms/step
Epoch 3/50
158/158 - 0s - loss: 0.0476 - accuracy: 0.9775 - 205ms/epoch - 1ms/step
Epoch 4/50
158/158 - 0s - loss: 0.0455 - accuracy: 0.9784 - 204ms/epoch - 1ms/step
Epoch 5/50
158/158 - 0s - loss: 0.0449 - accuracy: 0.9778 - 209ms/epoch - 1ms/step
Epoch 6/50
158/158 - 0s - loss: 0.0448 - accuracy: 0.9798 - 237ms/epoch - 1ms/step
Epoch 7/50
158/158 - 0s - loss: 0.0444 - accuracy: 0.9788 - 211ms/epoch - 1ms/step
Epoch 8/50
158/158 - 0s - loss: 0.0437 - accuracy: 0.9794 - 206ms/epoch - 1ms/step
Epoch 9/50
158/158 - 0s - loss: 0.0416 - accuracy: 0.9800 - 206ms/epoch - 1ms/step
Epoch 10/50
158/158 - 0s - loss: 0.0441 - accuracy: 0.9784 - 202ms/epoch - 1ms/step
Epoch 11/50
158/158 - 0s - loss: 0.0421 - accuracy: 0.9796 - 204ms/epoch - 1ms/step
Epoch 12/50
158/158 - 0s - loss: 0.0433 - accuracy: 0.9796 - 202ms/epoch - 1ms/step
E

<keras.callbacks.History at 0x7f86597d80a0>

In [132]:

# Use the trained classifier to predict the class labels for the test data
y_pred_ = model.predict(X_test_sl_ft_1)
y_pred_ = np.round(y_pred_sl_ft_1)
# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test_sl_ft_1, y_pred_sl_ft_1))
print(classification_report(y_test_sl_ft_1, y_pred_sl_ft_1))

[[1115   25]
 [   0  125]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1140
           1       0.83      1.00      0.91       125

    accuracy                           0.98      1265
   macro avg       0.92      0.99      0.95      1265
weighted avg       0.98      0.98      0.98      1265



In [137]:
# Split the data into training and testing sets
X = data[['Bidder_Tendency', 'Bidding_Ratio', 'Successive_Outbidding', 'Winning_Ratio']]
y = data['Class']
X_train_sl_ft_2, X_test_sl_ft_2, y_train_sl_ft_2, y_test_sl_ft_2 = train_test_split(X, y, test_size=0.2)
model = create_model_(num_layers=2, num_neurons=64)
model.fit(X_train_sl_ft_2, y_train_sl_ft_2, epochs=50, verbose=2)

# Use the trained classifier to predict the class labels for the test data
y_pred_ = model.predict(X_test_sl_ft_2)
y_pred_ = np.round(y_pred_sl_ft_2)
# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test_sl_ft_2, y_pred_sl_ft_2))
print(classification_report(y_test_sl_ft_2, y_pred_sl_ft_2))

Epoch 1/50
158/158 - 1s - loss: 0.2068 - accuracy: 0.9502 - 667ms/epoch - 4ms/step
Epoch 2/50
158/158 - 0s - loss: 0.0466 - accuracy: 0.9790 - 222ms/epoch - 1ms/step
Epoch 3/50
158/158 - 0s - loss: 0.0435 - accuracy: 0.9798 - 232ms/epoch - 1ms/step
Epoch 4/50
158/158 - 0s - loss: 0.0420 - accuracy: 0.9804 - 326ms/epoch - 2ms/step
Epoch 5/50
158/158 - 0s - loss: 0.0421 - accuracy: 0.9810 - 276ms/epoch - 2ms/step
Epoch 6/50
158/158 - 0s - loss: 0.0407 - accuracy: 0.9814 - 286ms/epoch - 2ms/step
Epoch 7/50
158/158 - 0s - loss: 0.0412 - accuracy: 0.9814 - 275ms/epoch - 2ms/step
Epoch 8/50
158/158 - 0s - loss: 0.0408 - accuracy: 0.9800 - 250ms/epoch - 2ms/step
Epoch 9/50
158/158 - 0s - loss: 0.0417 - accuracy: 0.9802 - 214ms/epoch - 1ms/step
Epoch 10/50
158/158 - 0s - loss: 0.0390 - accuracy: 0.9822 - 207ms/epoch - 1ms/step
Epoch 11/50
158/158 - 0s - loss: 0.0397 - accuracy: 0.9822 - 208ms/epoch - 1ms/step
Epoch 12/50
158/158 - 0s - loss: 0.0406 - accuracy: 0.9802 - 229ms/epoch - 1ms/step
E

NameError: name 'y_pred_sl_ft_2' is not defined

In [138]:
# Use the trained classifier to predict the class labels for the test data
y_pred_sl_ft_2 = model.predict(X_test_sl_ft_2)
y_pred_sl_ft_2 = np.round(y_pred_sl_ft_2)
# Evaluate the classifier's performance on the test data
print(confusion_matrix(y_test_sl_ft_2, y_pred_sl_ft_2))
print(classification_report(y_test_sl_ft_2, y_pred_sl_ft_2))

[[1102   25]
 [   0  138]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1127
           1       0.85      1.00      0.92       138

    accuracy                           0.98      1265
   macro avg       0.92      0.99      0.95      1265
weighted avg       0.98      0.98      0.98      1265



Both feature selections provide similar accuracies. 
But we are going to conisder Lasso feature selection method for our case, because Lasso is linear and uses L1 regularization to shrink coefficients of the features towards zero which can also prevent the problem of overfitting. 

## Using the best model from part 2 and part 3 on the selected features. 

In [139]:
#  Selected features : ['Bidder_Tendency', 'Successive_Outbidding', 'Last_Bidding', 'Winning_Ratio']

In [140]:
# Split the data into training and testing sets
X = data[['Bidder_Tendency', 'Successive_Outbidding', 'Last_Bidding',
       'Winning_Ratio']]
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [147]:
# BEST MODEL FROM PART - 2 (Random Forest)
# Best hyperparameters:  
# {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}

# Define the random forest model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=20)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)

print(acc_rf)

0.9762845849802372


In [148]:
# Define the neural network model
def create_model_(num_layers=1, num_neurons=10):
    model = Sequential()
    model.add(Dense(num_neurons, input_dim=X_train_.shape[1], activation='relu'))
    for i in range(num_layers):
        model.add(Dense(num_neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = create_model_(num_layers=2, num_neurons=64)
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
158/158 - 1s - loss: 0.1913 - accuracy: 0.9409 - 766ms/epoch - 5ms/step
Epoch 2/50
158/158 - 0s - loss: 0.0458 - accuracy: 0.9794 - 231ms/epoch - 1ms/step
Epoch 3/50
158/158 - 0s - loss: 0.0430 - accuracy: 0.9816 - 340ms/epoch - 2ms/step
Epoch 4/50
158/158 - 0s - loss: 0.0425 - accuracy: 0.9814 - 279ms/epoch - 2ms/step
Epoch 5/50
158/158 - 0s - loss: 0.0409 - accuracy: 0.9812 - 259ms/epoch - 2ms/step
Epoch 6/50
158/158 - 0s - loss: 0.0409 - accuracy: 0.9814 - 236ms/epoch - 1ms/step
Epoch 7/50
158/158 - 0s - loss: 0.0430 - accuracy: 0.9808 - 210ms/epoch - 1ms/step
Epoch 8/50
158/158 - 0s - loss: 0.0410 - accuracy: 0.9808 - 223ms/epoch - 1ms/step
Epoch 9/50
158/158 - 0s - loss: 0.0408 - accuracy: 0.9808 - 244ms/epoch - 2ms/step
Epoch 10/50
158/158 - 0s - loss: 0.0409 - accuracy: 0.9816 - 283ms/epoch - 2ms/step
Epoch 11/50
158/158 - 0s - loss: 0.0403 - accuracy: 0.9814 - 294ms/epoch - 2ms/step
Epoch 12/50
158/158 - 0s - loss: 0.0410 - accuracy: 0.9800 - 227ms/epoch - 1ms/step
E

<keras.callbacks.History at 0x7f8659c2e9a0>

In [150]:
# Evaluate the model on the testing set
loss, accuracy_nn = model.evaluate(X_test, y_test)




In [151]:
# Print the accuracy without rounding
print("Accuracy of the best part 2 model (Rf) on selected features : {:.4f}".format(acc_rf))
print("Accuracy of the best part 3 model (NN) on selected features: {:.4f}".format(accuracy_nn))

Accuracy of the best part 2 model (Rf) on selected features : 0.9763
Accuracy of the best part 3 model (NN) on selected features: 0.9810


In [224]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

rf = RandomForestClassifier(max_depth=20, min_samples_leaf=1, min_samples_split=5, n_estimators=100)

sfs = SFS(rf,
          k_features=(1, X.shape[1]),
          forward=True,
          floating=True,
          scoring='accuracy',
          cv=5,
          n_jobs=-1)

sfs = sfs.fit(X_train, y_train)



# Get the selected column names
selected_columns = X.columns[list(sfs.k_feature_idx_)]

# Print the selected column names
print("Selected features (RF):", selected_columns)

# Print the best score
print("Best score (RF):", sfs.k_score_)

# Transform X to contain only the selected features
X_selected_rf = sfs.transform(X)

# Train RF and NN models with selected features
rf.fit(X_selected_rf, y)




Selected features (RF): Index(['Successive_Outbidding', 'Last_Bidding', 'Winning_Ratio',
       'Auction_Duration'],
      dtype='object')
Best score (RF): 0.9980229335022266


In [225]:

def create_nn_model():
    model = Sequential()
    model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

nn = KerasClassifier(build_fn=create_nn_model, epochs=100, batch_size=10, verbose=0)

sfs_nn = SFS(nn,
             k_features=(1, X.shape[1]),
             forward=True,
             floating=True,
             scoring='accuracy',
             cv=5,
             n_jobs=-1)

sfs_nn = sfs_nn.fit(np.array(X), np.array(y))

# Get the selected column names
selected_columns = X.columns[list(sfs_nn.k_feature_idx_)]

# Print the selected column names
print("Selected features (NN):", selected_columns)

# Print the best score
print("Best score (NN):", sfs_nn.k_score_)

# Transform X to contain only the selected features
X_selected_rf = sfs_nn.transform(X)

# Train RF and NN models with selected features
nn.fit(X_selected_rf, y)

print("Selected features (NN):", sfs_nn.k_feature_idx_)
print("Best score (NN):", sfs_nn.k_score_)

X_selected_nn = sfs_nn.transform(X)
nn.fit(np.array(X_selected_nn), np.array(y))


2023-04-30 18:14:59.645952: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-30 18:14:59.646143: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-30 18:14:59.650299: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: 



ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 248, in fit
    return super().fit(x, y, **kwargs)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 175, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/var/folders/jx/rs472zns1ds_9lgk5hq47qz80000gp/T/__autograph_generated_file4it_cx3p.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:

    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential" "                 f"(type Sequential).
    
    Input 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 9, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=float32)
      • training=True
      • mask=None


--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 248, in fit
    return super().fit(x, y, **kwargs)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 175, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/var/folders/jx/rs472zns1ds_9lgk5hq47qz80000gp/T/__autograph_generated_file4it_cx3p.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:

    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_1" "                 f"(type Sequential).
    
    Input 0 of layer "dense_3" is incompatible with the layer: expected axis -1 of input shape to have value 9, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_1" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=float32)
      • training=True
      • mask=None


--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 248, in fit
    return super().fit(x, y, **kwargs)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 175, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/var/folders/jx/rs472zns1ds_9lgk5hq47qz80000gp/T/__autograph_generated_file4it_cx3p.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:

    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_2" "                 f"(type Sequential).
    
    Input 0 of layer "dense_6" is incompatible with the layer: expected axis -1 of input shape to have value 9, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_2" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=float32)
      • training=True
      • mask=None


--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 248, in fit
    return super().fit(x, y, **kwargs)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 175, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/var/folders/jx/rs472zns1ds_9lgk5hq47qz80000gp/T/__autograph_generated_file4it_cx3p.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:

    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_3" "                 f"(type Sequential).
    
    Input 0 of layer "dense_9" is incompatible with the layer: expected axis -1 of input shape to have value 9, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_3" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=float32)
      • training=True
      • mask=None


--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 248, in fit
    return super().fit(x, y, **kwargs)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/wrappers/scikit_learn.py", line 175, in fit
    history = self.model.fit(x, y, **fit_args)
  File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/var/folders/jx/rs472zns1ds_9lgk5hq47qz80000gp/T/__autograph_generated_file4it_cx3p.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:

    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/Sravan/Applications/anaconda3/lib/python3.9/site-packages/keras/engine/input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_4" "                 f"(type Sequential).
    
    Input 0 of layer "dense_12" is incompatible with the layer: expected axis -1 of input shape to have value 9, but received input with shape (None, 1)
    
    Call arguments received by layer "sequential_4" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 1), dtype=float32)
      • training=True
      • mask=None

