In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.models import Model
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load and preprocess the data
Moein_New = pd.read_csv('C:/input_file.csv', header=0)
Moein_New = Moein_New.transpose()

new_header = Moein_New.iloc[0]  # Get the new header from the first row
Moein_New = Moein_New[1:]  # Remove the first row from the data
Moein_New.columns = new_header  # Set the new header

X = Moein_New.drop('Sample', axis=1)  # Features
y = Moein_New['Sample']  # Target variable

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

y_labels = pd.factorize(y)[0]

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_scaled, y_labels)
importances = rf.feature_importances_

# Define neural network architecture
def build_model(input_shape):
  input_layer = Input(shape=(input_shape,))
x = Dense(2600, activation='relu')(input_layer)
x = Dropout(0.6)(x)
x = Dense(400, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(40, activation='relu')(x)
output_layer = Dense(len(np.unique(y_labels)), activation='softmax')(x)
model = Model(inputs=input_layer, outputs=output_layer)
return model

epochs = 50
batch_size = 64
learning_rate = 0.001

# Function to train, evaluate and save model and features
def train_and_evaluate(X_selected, y_labels, y, feature_count, X_full):
  kfold = StratifiedKFold(n_splits=7, shuffle=True, random_state=64)
best_val_loss = float('inf')
best_model = None
best_model_info = {}

for train_idx, test_idx in kfold.split(X_selected, y_labels):
  X_train, X_test = X_selected[train_idx], X_selected[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

model = build_model(X_train.shape[1])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                    validation_split=0.2, verbose=1)

val_loss = history.history['val_loss'][-1]

if val_loss < best_val_loss:
  best_val_loss = val_loss
best_model = model
best_model_info = {
  'Feature_Count': feature_count,
  'Validation_Loss': best_val_loss,
  'Validation_Accuracy': history.history['val_accuracy'][-1],
  'Neural_Architecture': [2600, 400, 40]
}

# Save model
model_save_path = f"C:/best_model_feature_{feature_count}.keras"
best_model.save(model_save_path)

# Save features
selected_features = X_full.columns[np.argsort(importances)[-feature_count:]]
features_save_path = f"C:/selected_features_{feature_count}.csv"
selected_features.to_series().to_csv(features_save_path, index=False, header=False)

return best_model, best_model_info, X_test, y_test

# Feature indices
indices_5000 = np.argsort(importances)[-5000:]
indices_1000 = np.argsort(importances)[-1000:]
indices_2000 = np.argsort(importances)[-2000:]
indices_3000 = np.argsort(importances)[-3000:]
indices_4000 = np.argsort(importances)[-4000:]

# Feature matrices
X_selected_5000 = X_scaled[:, indices_5000]
X_selected_1000 = X_scaled[:, indices_1000]
X_selected_2000 = X_scaled[:, indices_2000]
X_selected_3000 = X_scaled[:, indices_3000]
X_selected_4000 = X_scaled[:, indices_4000]

# Train, evaluate, and save models and features
best_model_5000, best_model_info_5000, X_test_5000, y_test_5000 = train_and_evaluate(X_selected_5000, y_labels, y, 5000, X)
best_model_1000, best_model_info_1000, X_test_1000, y_test_1000 = train_and_evaluate(X_selected_1000, y_labels, y, 1000, X)
best_model_2000, best_model_info_2000, X_test_2000, y_test_2000 = train_and_evaluate(X_selected_2000, y_labels, y, 2000, X)
best_model_3000, best_model_info_3000, X_test_3000, y_test_3000 = train_and_evaluate(X_selected_3000, y_labels, y, 3000, X)
best_model_4000, best_model_info_4000, X_test_4000, y_test_4000 = train_and_evaluate(X_selected_4000, y_labels, y, 4000, X)
