In [2]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder




In [3]:
# Loading the dataset
df = pd.read_csv('cleaned_data.csv')


In [4]:
# Ensuring there are no missing values
df.dropna(inplace=True)

In [5]:
# Separating features (X) and target variable (y)
X = df.drop(columns=['y'])  # Assuming 'y' is the target column
y = df['y']


In [6]:
# Encoding categorical target variable if it is not numeric
if y.dtype == 'object':
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)  # Convert categorical labels to numbers


In [7]:
# Identifying categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['number']).columns


In [8]:
# Applying One-Hot Encoding to categorical features
X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)  # Convert categorical to numeric


In [9]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [10]:
# Scaling numerical features
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

In [11]:
# Defining model architecture
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
# Compiling the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Use binary_crossentropy for classification
              metrics=['accuracy'])


In [13]:
# Training the model
history = model.fit(X_train, y_train,
                    epochs=20,
                    validation_split=0.2,
                    batch_size=32,
                    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)])


Epoch 1/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9067 - loss: 0.2366 - val_accuracy: 0.9172 - val_loss: 0.1762
Epoch 2/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9220 - loss: 0.1691 - val_accuracy: 0.9248 - val_loss: 0.1693
Epoch 3/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9245 - loss: 0.1610 - val_accuracy: 0.9248 - val_loss: 0.1704
Epoch 4/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9252 - loss: 0.1602 - val_accuracy: 0.9261 - val_loss: 0.1678
Epoch 5/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9275 - loss: 0.1602 - val_accuracy: 0.9256 - val_loss: 0.1665
Epoch 6/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9266 - loss: 0.1580 - val_accuracy: 0.9230 - val_loss: 0.1724
Epoch 7/20
[1m765/765[0m 

In [14]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9226 - loss: 0.1691
Test accuracy: 0.9238


In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Make predictions (convert probabilities to binary values)
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")  # Convert probabilities to 0 or 1

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print evaluation results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

# Display Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Accuracy: 0.9238
Precision: 0.6361
Recall: 0.5474
F1 Score: 0.5884

Confusion Matrix:
[[6644  238]
 [ 344  416]]
