In [1]:
!pip install tensorflow



In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Step 1: Load the data
data = pd.read_csv("ecommerce_user_behaviorX.csv")

In [17]:
# Step 2: Preprocessing
# Handling missing values, if any
data.fillna(method='ffill', inplace=True)

In [18]:
# Encode categorical variables
label_encoders = {}
for column in ['User ID', 'Device Type', 'Location']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [19]:

# Target variable encoding
target_encoder = LabelEncoder()
data['Authentication Method'] = target_encoder.fit_transform(data['Authentication Method'])

# Splitting the data into features and target
X = data.drop('Authentication Method', axis=1)
y = to_categorical(data['Authentication Method'])

In [20]:
# Standardizing the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# Step 3: Building the model
model = Sequential([
    Dense(64, input_shape=(X_train.shape[1],), activation='relu'),
    Dense(32, activation='relu'),
    Dense(y_train.shape[1], activation='softmax')
])

In [22]:
# Compiling the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [23]:
# Step 4: Training the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7cfca48b57b0>

In [24]:
# Step 5: Evaluating the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 74.20%


In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Predicting on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculating the metrics
accuracy = accuracy_score(y_true, y_pred_classes)
precision = precision_score(y_true, y_pred_classes, average='macro')
recall = recall_score(y_true, y_pred_classes, average='macro')
f1 = f1_score(y_true, y_pred_classes, average='macro')

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Accuracy: 74.20%
Precision: 0.78
Recall: 0.77
F1 Score: 0.77


In [29]:
def safe_encode(column, encoder, default='Unknown'):
    try:
        return encoder.transform(column)
    except ValueError:
        # If the category is unknown, assign a default label (e.g., the last label + 1)
        return [len(encoder.classes_)] * len(column)

# Modify the existing make_prediction function to handle unknown categories safely
def make_prediction(input_data):
    df = pd.DataFrame([input_data], columns=['User ID', 'Session Duration', 'Pages Visited', 'Items Viewed',
                                             'Login Frequency', 'Cart Additions', 'Successful Transactions',
                                             'Failed Transactions', 'Device Type', 'Age', 'Location'])

    # Apply encoding safely
    df['Location'] = safe_encode(df['Location'], label_encoders['Location'])
    df['Device Type'] = safe_encode(df['Device Type'], label_encoders['Device Type'])
    df['User ID'] = safe_encode(df['User ID'], label_encoders['User ID'])

    # Standard scaling
    df = scaler.transform(df)

    # Make prediction
    prediction = model.predict(df)
    predicted_class = np.argmax(prediction)
    predicted_auth_method = target_encoder.inverse_transform([predicted_class])[0]
    return predicted_auth_method

# Example input data
input_data = {
    'User ID': 50, 'Session Duration': 10, 'Pages Visited': 2, 'Items Viewed': 1,
    'Login Frequency': 1, 'Cart Additions': 2, 'Successful Transactions': 1,
    'Failed Transactions': 0, 'Device Type': 'Mobile', 'Age': 30, 'Location': 'USA'
}

predicted_auth_method = make_prediction(input_data)
print(f"Predicted Authentication Method: {predicted_auth_method}")

Predicted Authentication Method: OTP


In [30]:
# Optionally save the model
model.save('authentication_model.h5')

  saving_api.save_model(
