In [28]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder




In [29]:
# Loading the dataset
df = pd.read_csv('cleaned_data.csv')


In [30]:
# Ensuring there are no missing values
df.dropna(inplace=True)

In [31]:
# Separating features (X) and target variable (y)
X = df.drop(columns=['y'])  # Assuming 'y' is the target column
y = df['y']


In [32]:
# Encoding categorical target variable if it is not numeric
if y.dtype == 'object':
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)  # Convert categorical labels to numbers


In [33]:
# Identifying categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['number']).columns


In [34]:
# Applying One-Hot Encoding to categorical features
X_encoded = pd.get_dummies(X, columns=categorical_cols, drop_first=True)  # Convert categorical to numeric


In [35]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [36]:
# Scaling numerical features
scaler = StandardScaler()
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

In [37]:
# Defining model architecture
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [38]:
# Compiling the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',  # Use binary_crossentropy for classification
              metrics=['accuracy'])


In [39]:
# Training the model
history = model.fit(X_train, y_train,
                    epochs=20,
                    validation_split=0.2,
                    batch_size=32,
                    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)])


Epoch 1/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9091 - loss: 0.2206 - val_accuracy: 0.9221 - val_loss: 0.1747
Epoch 2/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9225 - loss: 0.1708 - val_accuracy: 0.9216 - val_loss: 0.1739
Epoch 3/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.9233 - loss: 0.1659 - val_accuracy: 0.9259 - val_loss: 0.1692
Epoch 4/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9282 - loss: 0.1588 - val_accuracy: 0.9248 - val_loss: 0.1723
Epoch 5/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9268 - loss: 0.1567 - val_accuracy: 0.9277 - val_loss: 0.1694
Epoch 6/20
[1m765/765[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9274 - loss: 0.1584 - val_accuracy: 0.9262 - val_loss: 0.1721


In [40]:
# Evaluating the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

[1m239/239[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9204 - loss: 0.1707
Test accuracy: 0.9224
