In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

import gc
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns



root = 'C:/Users/Pratham/Documents/GitHub/instacart-customer-purchase-prediction/data/'


In [None]:

# 🔹 Load Train and Test Data from Parquet Files
X_train = pd.read_parquet("train.parquet")
y_train = pd.read_parquet("train.parquet")
X_test = pd.read_parquet("test.parquet")
y_test = pd.read_parquet("test.parquet")


In [None]:
# 🔹 Standardize Numerical Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 🔹 Build Keras Neural Network Model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification output
])



In [None]:
model.summary()

In [None]:
# 🔹 Compile Model with Additional Metrics
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy', Precision(), Recall(), AUC(name="auc")])


In [None]:

# 🔹 Train Model
history = model.fit(X_train_scaled, y_train, 
                    validation_data=(X_test_scaled, y_test),
                    epochs=20, batch_size=32, verbose=1)




In [None]:
# 🔹 Evaluate Model
loss, accuracy, precision, recall, auc = model.evaluate(X_test_scaled, y_test)
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test AUC: {auc:.4f}")


In [None]:

# 🔹 Compute F1-Score and Confusion Matrix
y_pred_probs = model.predict(X_test_scaled)
y_pred = (y_pred_probs > 0.5).astype(int)

f1 = f1_score(y_test, y_pred)
print(f"F1-Score: {f1:.4f}")



In [None]:
# 🔹 Confusion Matrix Visualization
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Greens", xticklabels=["No Reorder", "Reorder"], yticklabels=["No Reorder", "Reorder"])
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.title("Confusion Matrix")
plt.show()