In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
df = pd.read_csv('train.csv')
X= df

In [4]:
test = pd.read_csv('test.csv')

In [3]:
import numpy as np
import pandas as pd

def preprocess(df):
    df['person_emp_length'] = df['person_emp_length'].fillna(df['person_emp_length'].mean())
    df['loan_int_rate'] = df['loan_int_rate'].fillna(df['loan_int_rate'].mean())

    df['person_emp_length'].replace(0, np.nan, inplace=True)
    df['person_income'].replace(0, np.nan, inplace=True)

    df['person_emp_length'].fillna(df['person_emp_length'].mean(), inplace=True)
    df['person_income'].fillna(df['person_income'].mean(), inplace=True)

    df['loan_to_income'] = (df['loan_amnt'] / df['person_income']) - df['loan_percent_income']
    df['age_income_interaction'] = df['person_age'] * df['person_income']
    df['loan_to_emp_length_ratio'] = df['loan_amnt'] / df['person_emp_length']

    monthly_income = df['person_income'] / 12
    df['monthly_debt'] = df['loan_amnt'] * (1 + df['loan_int_rate']) / 12
    df['dti_ratio'] = df['monthly_debt'] / monthly_income

    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    new_features = ['loan_to_income', 'age_income_interaction', 'loan_to_emp_length_ratio',
                    'monthly_debt', 'dti_ratio']
    df[new_features] = df[new_features].fillna(df[new_features].mean())

    df['risk_flag'] = np.where(
        (df['cb_person_default_on_file'] == 'Y') & (df['loan_grade'].isin(['C', 'D', 'E'])),
        1,
        0
    )

    categorical_columns = [
        'person_home_ownership', 'loan_intent', 'loan_grade',
        'cb_person_default_on_file'
    ]
    for col in categorical_columns:
        df[col] = df[col].astype('category')

    numeric_columns = [
        'loan_to_income', 'age_income_interaction', 'loan_to_emp_length_ratio',
        'monthly_debt', 'dti_ratio', 'risk_flag'
    ]
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    numerical_cols = df.select_dtypes(include=np.number).columns
    df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())


In [5]:
preprocess(X)
preprocess(test)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['person_emp_length'].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['person_income'].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we ar

In [7]:
y = X.pop('loan_status')

In [8]:
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns.tolist()

# 6. Remove Outliers
def remove_outliers_iqr(df, columns, target_variable=None):
    for col in columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
    if target_variable is not None:
        # Select target rows based on remaining indices in df
        aligned_target = target_variable.loc[df.index]
        # Reset index for both to avoid potential index mismatch later
        df = df.reset_index(drop=True)
        aligned_target = aligned_target.reset_index(drop=True)
        return df, aligned_target
    return df

X_no_outliers, y_no_outliers = remove_outliers_iqr(X, numerical_cols, target_variable=y)

# 7. Encoding Categorical Variables
X_no_outliers = pd.get_dummies(X_no_outliers, drop_first=True)
test = pd.get_dummies(test, drop_first=True)

# Align the columns of X_no_outliers and test
X_no_outliers, test = X_no_outliers.align(test, join='outer', axis=1, fill_value=0)

# Handle any new infinite or NaN values after encoding
X_no_outliers.replace([np.inf, -np.inf], np.nan, inplace=True)
X_no_outliers.fillna(X_no_outliers.mean(), inplace=True)
test.replace([np.inf, -np.inf], np.nan, inplace=True)
test.fillna(test.mean(), inplace=True)

# Convert to numpy arrays
X_np = X_no_outliers.values
y_np = y_no_outliers.values

In [9]:
def create_model(input_dim):
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_dim,)),
        layers.Dropout(0.5),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(16, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid') # Use sigmoid activation because we are predicting 0 and 1's
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# 9. 5-Fold Cross-Validation and Collecting Test Predictions
from sklearn.model_selection import StratifiedKFold

# Initialize StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Arrays to store the performance metrics and test predictions for each fold
fold_accuracy = []
fold_loss = []
test_preds = []

# Early Stopping Callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

for fold, (train_index, val_index) in enumerate(skf.split(X_np, y_np)):
    print(f"\nFold {fold + 1}")

    # Split data
    X_train_fold, X_val_fold = X_np[train_index], X_np[val_index]
    y_train_fold, y_val_fold = y_np[train_index], y_np[val_index]

    # Feature Scaling within each fold
    scaler = StandardScaler()
    X_train_fold_scaled = scaler.fit_transform(X_train_fold)
    X_val_fold_scaled = scaler.transform(X_val_fold)
    test_scaled = scaler.transform(test.values)

    # Build the model
    model = create_model(input_dim=X_train_fold_scaled.shape[1])

    # Train the model
    history = model.fit(
        X_train_fold_scaled, y_train_fold,
        epochs=100,
        batch_size=32,
        validation_data=(X_val_fold_scaled, y_val_fold),
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_val_fold_scaled, y_val_fold, verbose=0)
    print(f'Validation Loss for fold {fold + 1}: {loss:.4f}')
    print(f'Validation Accuracy for fold {fold + 1}: {accuracy:.4f}')

    fold_accuracy.append(accuracy)
    fold_loss.append(loss)

    # Make predictions on the test set
    test_pred = model.predict(test_scaled)
    test_preds.append(test_pred)

# After all folds
print('\nCross-validation results:')
print(f'Average Validation Accuracy: {np.mean(fold_accuracy):.4f}')
print(f'Standard Deviation of Validation Accuracy: {np.std(fold_accuracy):.4f}')
print(f'Average Validation Loss: {np.mean(fold_loss):.4f}')
print(f'Standard Deviation of Validation Loss: {np.std(fold_loss):.4f}')

# Aggregate test predictions
test_preds = np.array(test_preds)  # Shape: (n_folds, n_samples, 1)
# Average the predictions across folds
test_predictions_mean = np.mean(test_preds, axis=0).flatten()

# Convert averaged predictions to binary
test_predictions_binary = (test_predictions_mean >= 0.5).astype(int)



Fold 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8888 - loss: 0.3715 - val_accuracy: 0.9260 - val_loss: 0.1924
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9259 - loss: 0.2429 - val_accuracy: 0.9260 - val_loss: 0.1838
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9310 - loss: 0.2180 - val_accuracy: 0.9347 - val_loss: 0.1777
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9341 - loss: 0.2048 - val_accuracy: 0.9414 - val_loss: 0.1725
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9322 - loss: 0.2052 - val_accuracy: 0.9373 - val_loss: 0.1727
Epoch 6/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9356 - loss: 0.1941 - val_accuracy: 0.9450 - val_loss: 0.1651
Epoch 7/100
[1m877/87

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8921 - loss: 0.3689 - val_accuracy: 0.9260 - val_loss: 0.2081
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9250 - loss: 0.2351 - val_accuracy: 0.9260 - val_loss: 0.1930
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9286 - loss: 0.2158 - val_accuracy: 0.9267 - val_loss: 0.1873
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9324 - loss: 0.2012 - val_accuracy: 0.9277 - val_loss: 0.1862
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9322 - loss: 0.2038 - val_accuracy: 0.9287 - val_loss: 0.1852
Validation Loss for fold 2: 0.2081
Validation Accuracy for fold 2: 0.9260
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Fold 3
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8181 - loss: 0.4376 - val_accuracy: 0.9269 - val_loss: 0.1938
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9299 - loss: 0.2362 - val_accuracy: 0.9276 - val_loss: 0.1872
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9278 - loss: 0.2277 - val_accuracy: 0.9276 - val_loss: 0.1832
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9330 - loss: 0.2109 - val_accuracy: 0.9316 - val_loss: 0.1802
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9360 - loss: 0.1968 - val_accuracy: 0.9389 - val_loss: 0.1720
Validation Loss for fold 3: 0.1938
Validation Accuracy for fold 3: 0.9269
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step

Fold 4
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8730 - loss: 0.3831 - val_accuracy: 0.9260 - val_loss: 0.1927
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9277 - loss: 0.2448 - val_accuracy: 0.9273 - val_loss: 0.1856
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9288 - loss: 0.2195 - val_accuracy: 0.9279 - val_loss: 0.1825
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9310 - loss: 0.2103 - val_accuracy: 0.9304 - val_loss: 0.1798
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9365 - loss: 0.1955 - val_accuracy: 0.9369 - val_loss: 0.1770
Validation Loss for fold 4: 0.1927
Validation Accuracy for fold 4: 0.9260
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step

Fold 5
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.7439 - loss: 0.5371 - val_accuracy: 0.9260 - val_loss: 0.1993
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9271 - loss: 0.2477 - val_accuracy: 0.9273 - val_loss: 0.1806
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9287 - loss: 0.2209 - val_accuracy: 0.9304 - val_loss: 0.1776
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9320 - loss: 0.2065 - val_accuracy: 0.9306 - val_loss: 0.1712
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9327 - loss: 0.2072 - val_accuracy: 0.9361 - val_loss: 0.1701
Validation Loss for fold 5: 0.1993
Validation Accuracy for fold 5: 0.9260
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step

Cross-validation results:
Average Validation 

In [11]:
test_predictions_mean

array([0.40621036, 0.03717132, 0.2952488 , ..., 0.02677658, 0.24226694,
       0.43698555], dtype=float32)

In [12]:
def prepare_user_input(user_input_dict):
    # Convert the dictionary to a DataFrame
    user_input_df = pd.DataFrame([user_input_dict])

    # Preprocess the input data (same as the training data preprocessing)
    preprocess(user_input_df)

    # Handle categorical columns (same as in preprocessing)
    user_input_df = pd.get_dummies(user_input_df, drop_first=True)

    # Align with the training dataset (handle any columns that might be missing)
    user_input_df = user_input_df.reindex(columns=X_no_outliers.columns, fill_value=0)

    # Handle any new infinite or NaN values after encoding
    user_input_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    user_input_df.fillna(user_input_df.mean(), inplace=True)

    # Convert to numpy array for prediction
    user_input_np = user_input_df.values
    return user_input_np


In [13]:
def predict_acceptance(user_input_dict):
    # Prepare the user input
    user_input_np = prepare_user_input(user_input_dict)

    # Feature Scaling (use the scaler fitted during training)
    user_input_scaled = scaler.transform(user_input_np)

    # Get the model's prediction (probability)
    pred_prob = model.predict(user_input_scaled)

    # Output the probability (mean acceptance rate)
    acceptance_prob = pred_prob[0][0]  # Single prediction
    return acceptance_prob


In [22]:
user_input = {
    'person_age': 35,
    'person_income': 60000,
    'person_home_ownership': 'MORTGAGE',  # Categorical value
    'person_emp_length': 10,
    'loan_intent': 'PERSONAL',  # Categorical value
    'loan_grade': 'B',  # Categorical value
    'loan_amnt': 20000,
    'loan_int_rate': 0.05,
    'loan_percent_income': 0.25,
    'cb_person_default_on_file': 'N',  # Categorical value
    'cb_person_cred_hist_length': 5
}

# Get the predicted acceptance probability
predicted_acceptance = predict_acceptance(user_input)
print(f"The predicted probability of loan approval is: {predicted_acceptance * 100:.2f}%")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['person_emp_length'].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['person_income'].replace(0, np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we ar

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
The predicted probability of loan approval is: 0.01%


In [17]:
import pandas as pd

# Convert the user input dictionary to a pandas DataFrame
input_df = pd.DataFrame([user_input])

# Check for missing values
print(input_df.isnull().sum())


person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
dtype: int64


In [19]:
df['person_emp_length'] = df['person_emp_length'].replace(0, np.nan)
df['person_income'] = df['person_income'].replace(0, np.nan)
df['person_income'] = df['person_income'].fillna(df['person_income'].mean())


In [20]:
input_df = pd.get_dummies(input_df, columns=['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file'])


In [21]:
input_df

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,person_home_ownership_RENT,loan_intent_EDUCATION,loan_grade_B,cb_person_default_on_file_N
0,37,35000,0.0,6000,11.49,0.17,14,True,True,True,True


In [23]:
import pandas as pd
import numpy as np

# Sample model function (Replace with your actual model prediction function)
def predict_acceptance(input_data):
    # Assuming you have a trained model, use it for prediction
    # For demonstration purposes, let's say the model predicts a random probability
    return np.random.random()

# Preprocessing the input data
def preprocess_input(user_input):
    # Convert the user input dictionary to a pandas DataFrame
    input_df = pd.DataFrame([user_input])

    # Handle missing values
    # Replace 0 in 'person_emp_length' and 'person_income' with NaN
    input_df['person_emp_length'] = input_df['person_emp_length'].replace(0, np.nan)
    input_df['person_income'] = input_df['person_income'].replace(0, np.nan)

    # Fill missing values in 'person_income' with the mean of that column
    input_df['person_income'] = input_df['person_income'].fillna(input_df['person_income'].mean())

    # Categorical variables need to be handled before model prediction
    # Example: Encoding categorical columns as dummy variables (One-Hot Encoding)
    categorical_cols = ['person_home_ownership', 'loan_intent', 'loan_grade', 'cb_person_default_on_file']
    input_df = pd.get_dummies(input_df, columns=categorical_cols)

    return input_df

# Define user input
user_input = {
    'person_age': 37,
    'person_income': 35000,
    'person_home_ownership': 'RENT',  # Categorical value
    'person_emp_length': 0,
    'loan_intent': 'EDUCATION',  # Categorical value
    'loan_grade': 'B',  # Categorical value
    'loan_amnt': 6000,
    'loan_int_rate': 11.49,
    'loan_percent_income': 0.17,
    'cb_person_default_on_file': 'N',  # Categorical value
    'cb_person_cred_hist_length': 14
}

# Preprocess the user input data
processed_input = preprocess_input(user_input)

# Get the predicted acceptance probability
predicted_acceptance = predict_acceptance(processed_input)

# Output the predicted probability of loan approval
print(f"The predicted probability of loan approval is: {predicted_acceptance * 100:.2f}%")


The predicted probability of loan approval is: 57.76%


In [26]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import save_model, load_model

# Function to create the model
def create_model(input_dim):
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_dim,)),
        layers.Dropout(0.5),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(16, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
    ])
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

# 5-Fold Cross-Validation and Collecting Test Predictions
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Arrays to store the performance metrics and test predictions for each fold
fold_accuracy = []
fold_loss = []
test_preds = []

# Early Stopping Callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# For storing the best model
best_model = None
best_val_accuracy = 0

# Stratified K-Fold Cross-Validation
for fold, (train_index, val_index) in enumerate(skf.split(X_np, y_np)):
    print(f"\nFold {fold + 1}")

    # Split the data into training and validation sets
    X_train_fold, X_val_fold = X_np[train_index], X_np[val_index]
    y_train_fold, y_val_fold = y_np[train_index], y_np[val_index]

    # Feature Scaling within each fold
    scaler = StandardScaler()
    X_train_fold_scaled = scaler.fit_transform(X_train_fold)
    X_val_fold_scaled = scaler.transform(X_val_fold)
    test_scaled = scaler.transform(test.values)

    # Build the model
    model = create_model(input_dim=X_train_fold_scaled.shape[1])

    # Train the model
    history = model.fit(
        X_train_fold_scaled, y_train_fold,
        epochs=100,
        batch_size=32,
        validation_data=(X_val_fold_scaled, y_val_fold),
        callbacks=[early_stopping],
        verbose=1
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_val_fold_scaled, y_val_fold, verbose=0)
    print(f'Validation Loss for fold {fold + 1}: {loss:.4f}')
    print(f'Validation Accuracy for fold {fold + 1}: {accuracy:.4f}')

    fold_accuracy.append(accuracy)
    fold_loss.append(loss)

    # Save the model after each fold (optional)
    model_save_path = f'model_fold_{fold + 1}.h5'
    model.save(model_save_path)
    print(f"Model for fold {fold + 1} saved.")

    # If this is the best model so far, save it as the best model
    if accuracy > best_val_accuracy:
        best_val_accuracy = accuracy
        best_model = model
        model.save('best_model.h5')
        print(f"Best model saved with validation accuracy: {accuracy:.4f}")

    # Make predictions on the test set
    test_pred = model.predict(test_scaled)
    test_preds.append(test_pred)

# After all folds
print('\nCross-validation results:')
print(f'Average Validation Accuracy: {np.mean(fold_accuracy):.4f}')
print(f'Standard Deviation of Validation Accuracy: {np.std(fold_accuracy):.4f}')
print(f'Average Validation Loss: {np.mean(fold_loss):.4f}')
print(f'Standard Deviation of Validation Loss: {np.std(fold_loss):.4f}')

# Aggregate test predictions
test_preds = np.array(test_preds)  # Shape: (n_folds, n_samples, 1)
# Average the predictions across folds
test_predictions_mean = np.mean(test_preds, axis=0).flatten()

# Convert averaged predictions to binary (0 or 1)
test_predictions_binary = (test_predictions_mean >= 0.5).astype(int)

# If desired, save the final model (trained on all data)
final_model = create_model(input_dim=X_np.shape[1])
final_model.fit(X_np, y_np, epochs=100, batch_size=32, verbose=1)
final_model.save('final_model.h5')
print("Final model saved after training on all data.")

# Optionally, load the best model if needed
# model = load_model('best_model.h5')



Fold 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8235 - loss: 0.4718 - val_accuracy: 0.9260 - val_loss: 0.1979
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9237 - loss: 0.2468 - val_accuracy: 0.9282 - val_loss: 0.1850
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9304 - loss: 0.2176 - val_accuracy: 0.9287 - val_loss: 0.1811
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9321 - loss: 0.2070 - val_accuracy: 0.9344 - val_loss: 0.1767
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9330 - loss: 0.2078 - val_accuracy: 0.9342 - val_loss: 0.1715
Epoch 6/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9369 - loss: 0.1974 - val_accuracy: 0.9406 - val_loss: 0.1670
Epoch 7/100
[1m877/87



Validation Loss for fold 1: 0.1520
Validation Accuracy for fold 1: 0.9552
Model for fold 1 saved.
Best model saved with validation accuracy: 0.9552
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Fold 2
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8892 - loss: 0.3697 - val_accuracy: 0.9260 - val_loss: 0.2057
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9268 - loss: 0.2373 - val_accuracy: 0.9310 - val_loss: 0.1921
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9305 - loss: 0.2228 - val_accuracy: 0.9337 - val_loss: 0.1843
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9335 - loss: 0.2012 - val_accuracy: 0.9296 - val_loss: 0.1836
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9334 - loss: 0.2002 - val_accuracy: 0.9460 - val_loss: 0.1782




Validation Loss for fold 2: 0.2057
Validation Accuracy for fold 2: 0.9260
Model for fold 2 saved.
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Fold 3
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8845 - loss: 0.3675 - val_accuracy: 0.9260 - val_loss: 0.2054
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9276 - loss: 0.2377 - val_accuracy: 0.9313 - val_loss: 0.1876
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9298 - loss: 0.2158 - val_accuracy: 0.9343 - val_loss: 0.1809
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9329 - loss: 0.2015 - val_accuracy: 0.9360 - val_loss: 0.1778
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9318 - loss: 0.2002 - val_accuracy: 0.9377 - val_loss: 0.1744




Validation Loss for fold 3: 0.2054
Validation Accuracy for fold 3: 0.9260
Model for fold 3 saved.
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Fold 4
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8750 - loss: 0.3968 - val_accuracy: 0.9260 - val_loss: 0.2026
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9275 - loss: 0.2451 - val_accuracy: 0.9269 - val_loss: 0.1904
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9285 - loss: 0.2233 - val_accuracy: 0.9374 - val_loss: 0.1830
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9325 - loss: 0.2090 - val_accuracy: 0.9374 - val_loss: 0.1798
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9320 - loss: 0.2026 - val_accuracy: 0.9393 - val_loss: 0.1764




Validation Loss for fold 4: 0.2026
Validation Accuracy for fold 4: 0.9260
Model for fold 4 saved.
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step

Fold 5
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8676 - loss: 0.3979 - val_accuracy: 0.9260 - val_loss: 0.1916
Epoch 2/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9298 - loss: 0.2371 - val_accuracy: 0.9334 - val_loss: 0.1772
Epoch 3/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9306 - loss: 0.2171 - val_accuracy: 0.9284 - val_loss: 0.1801
Epoch 4/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9286 - loss: 0.2185 - val_accuracy: 0.9400 - val_loss: 0.1732
Epoch 5/100
[1m877/877[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9361 - loss: 0.1964 - val_accuracy: 0.9502 - val_loss: 0.1659




Validation Loss for fold 5: 0.1916
Validation Accuracy for fold 5: 0.9260
Model for fold 5 saved.
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step

Cross-validation results:
Average Validation Accuracy: 0.9319
Standard Deviation of Validation Accuracy: 0.0117
Average Validation Loss: 0.1915
Standard Deviation of Validation Loss: 0.0204


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).