In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
df = pd.read_csv("Cleaned-Data.csv")

In [3]:
df['Country'].replace({'China': 1, 'Italy': 2, 'Iran': 3, 'Republic of Korean':4, 'France':5,
                     'Spain':6, 'Germany':7, 'UAE':8, 'Other-EUR':9, 'Other':10   }, inplace=True)

In [5]:
# Separate features and target variable
X = df.iloc[:,0:26]
y = df.iloc[:,26]

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Standardize the feature values (optional but often helpful)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Define the VAE architecture
original_dim = X_train.shape[1]
latent_dim = 2

In [10]:
# Encoder
encoder_inputs = keras.layers.Input(shape=(original_dim,))
encoder_layer = keras.layers.Dense(128, activation='relu')(encoder_inputs)
z_mean = keras.layers.Dense(latent_dim)(encoder_layer)
z_log_var = keras.layers.Dense(latent_dim)(encoder_layer)

In [11]:
# Sampling layer
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.keras.backend.random_normal(shape=(tf.shape(z_mean)[0], latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = keras.layers.Lambda(sampling)([z_mean, z_log_var])

In [12]:
# Decoder
decoder_inputs = keras.layers.Input(shape=(latent_dim,))
decoder_layer = keras.layers.Dense(128, activation='relu')(decoder_inputs)
outputs = keras.layers.Dense(original_dim, activation='sigmoid')(decoder_layer)

In [13]:
# Build the VAE model
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')
decoder = keras.Model(decoder_inputs, outputs, name='decoder')
vae_outputs = decoder(encoder(encoder_inputs)[2])
vae = keras.Model(encoder_inputs, vae_outputs, name='vae')

In [14]:
# Define the VAE loss
reconstruction_loss = tf.keras.losses.mean_squared_error(encoder_inputs, vae_outputs)
kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
vae_loss = reconstruction_loss + kl_loss

vae.add_loss(vae_loss)

In [15]:
vae.compile(optimizer='adam')

# Train the VAE model
vae.fit(X_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21ca7b2de20>

In [16]:
# Encode the data
encoded_X_train, _, _ = encoder.predict(X_train)
encoded_X_test, _, _ = encoder.predict(X_test)

# Build a disease prediction model using the encoded features
disease_model = keras.Sequential([
    keras.layers.Input(shape=(latent_dim,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')  # Use 'sigmoid' for binary classification
])

disease_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the disease prediction model
disease_model.fit(encoded_X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = disease_model.evaluate(encoded_X_test, y_test)
print("Test accuracy:", test_accuracy)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.10004734992980957
