<a href="https://colab.research.google.com/github/shahad-jeza/my-first-nerural-network---Planar_data_classification_with_one_hidden_layer/blob/main/hospitilization_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Function to generate random data for each feature
def generate_data(n_samples):
    data = pd.DataFrame()
    data['Age'] = np.random.randint(18, 90, size=n_samples)
    data['Gender'] = np.random.choice(['Male', 'Female'], size=n_samples)
    data['Diabetes'] = np.random.choice([0, 1], size=n_samples, p=[0.8, 0.2])
    data['Hypertension'] = np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3])
    data['BMI'] = np.random.normal(loc=25, scale=5, size=n_samples)
    data['Albumin'] = np.random.normal(loc=4, scale=1, size=n_samples)
    data['Hemoglobin'] = np.random.normal(loc=14, scale=2, size=n_samples)
    data['Hospitalization'] = np.random.choice([0, 1], size=n_samples, p=[0.9, 0.1])
    return data

# Generate 400 samples of data
data = generate_data(400)

# Make classes imbalanced
data.loc[data.sample(frac=0.8).index, 'Diabetes'] = 0
data.loc[data.sample(frac=0.7).index, 'Hypertension'] = 0
data.loc[data.sample(frac=0.9).index, 'Hospitalization'] = 0

# Display the first few rows of the generated data
print(data.head())
# Save the generated data to a CSV file
data.to_csv('synthetic_data.csv', index=False)


   Age  Gender  Diabetes  Hypertension        BMI   Albumin  Hemoglobin  \
0   69    Male         0             0  28.261614  3.471383   16.531416   
1   32  Female         0             1  17.118039  4.586364   12.267650   
2   89  Female         0             0  32.382702  5.238283   15.938914   
3   78  Female         0             0  31.900457  4.021272   14.854389   
4   38  Female         0             0  21.872186  4.308833   12.707545   

   Hospitalization  
0                0  
1                0  
2                0  
3                0  
4                0  


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LeakyReLU, BatchNormalization
from tensorflow.keras.models import Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load dataset
data = pd.read_csv('/content/synthetic_data.csv')

# Encode categorical variables
encoder = LabelEncoder()
data['Gender'] = encoder.fit_transform(data['Gender'])  # Assuming Gender is a categorical variable

# Define features and target variable
X = data.drop('Hospitalization', axis=1)
y = data['Hospitalization']

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Separate the minority class
X_minority = X_train_scaled[y_train == 1]
y_minority = y_train[y_train == 1]

# Convert to numpy arrays for GAN training
X_minority = np.array(X_minority)


In [7]:
# GAN parameters
latent_dim = 100
input_dim = X_minority.shape[1]

# Define the generator model
def build_generator(latent_dim, input_dim):
    generator_input = Input(shape=(latent_dim,))
    x = Dense(128)(generator_input)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Dense(256)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Dense(input_dim, activation='tanh')(x)
    generator = Model(generator_input, x)
    return generator

# Define the discriminator model
def build_discriminator(input_dim):
    discriminator_input = Input(shape=(input_dim,))
    x = Dense(256)(discriminator_input)
    x = LeakyReLU(alpha=0.2)(x)
    x = Dense(128)(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = Dense(1, activation='sigmoid')(x)
    discriminator = Model(discriminator_input, x)
    return discriminator

# Build and compile the discriminator
discriminator = build_discriminator(input_dim)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Build the generator
generator = build_generator(latent_dim, input_dim)

# Create the GAN model
discriminator.trainable = False
gan_input = Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = Model(gan_input, gan_output)
gan.compile(optimizer='adam', loss='binary_crossentropy')

# Training the GAN
def train_gan(generator, discriminator, gan, X_train, epochs=10000, batch_size=32):
    for epoch in range(epochs):
        # Generate random noise as generator input
        noise = np.random.normal(0, 1, (batch_size, latent_dim))

        # Generate fake samples
        gen_samples = generator.predict(noise)

        # Select a random batch of real samples
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        real_samples = X_train[idx]

        # Create labels for real and fake samples
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))

        # Train the discriminator
        d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
        d_loss_fake = discriminator.train_on_batch(gen_samples, fake_labels)

        # Generate noise for the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.ones((batch_size, 1))

        # Train the generator
        g_loss = gan.train_on_batch(noise, valid_y)

        if epoch % 1000 == 0:
            print(f"Epoch: {epoch}, D Loss Real: {d_loss_real}, D Loss Fake: {d_loss_fake}, G Loss: {g_loss}")

# Train the GAN
train_gan(generator, discriminator, gan, X_minority)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch: 6000, D Loss Real: [0.42604944109916687, 1.0], D Loss Fake: [0.31071242690086365, 1.0], G Loss: 1.7572591304779053
Epoch: 7000, D Loss Real: [0.5876321792602539, 1.0], D Loss Fake: [0.6390198469161987, 0.71875], G Loss: 0.9272897243499756
Epoch: 8000, D Loss Real: [0.7216945886611938, 0.0], D Loss Fake: [0.6802090406417847, 0.59375], G Loss: 0.8853548765182495
Epoch: 9000, D Loss Real: [0.7565403580665588, 0.0], D Loss Fake: [0.7577025890350342, 0.0], G Loss: 0.8142532110214233


In [8]:
# Generate synthetic samples
noise = np.random.normal(0, 1, (len(X_minority), latent_dim))
synthetic_samples = generator.predict(noise)

# Combine synthetic samples with original data
X_train_balanced = np.vstack((X_train_scaled, synthetic_samples))
y_train_balanced = np.concatenate((y_train, np.ones(len(synthetic_samples))))




In [10]:
# Train Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_balanced, y_train_balanced)

# Predictions on test set
y_pred = rf_classifier.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)* 100
print("Accuracy:", accuracy)


Accuracy: 97.5


In [11]:
import joblib

# Save the trained model as a .pkl file
model_filename = 'random_forest_model.pkl'
joblib.dump(rf_classifier, model_filename)
print(f"Model saved to {model_filename}")


Model saved to random_forest_model.pkl
