<a href="https://colab.research.google.com/github/shahad-jeza/my-first-nerural-network---Planar_data_classification_with_one_hidden_layer/blob/main/GAN_on_ckd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Install necessary libraries
!pip install pandas scikit-learn tensorflow numpy

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers, models

# Load dataset
data_url = 'kidney_disease.csv'
data = pd.read_csv(data_url)

# Preprocess data
label_encoder = LabelEncoder()
for column in ['rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane', 'classification']:
    data[column] = label_encoder.fit_transform(data[column])

# Handle missing values for numeric columns
numeric_columns = data.select_dtypes(include=np.number).columns.tolist()
data[numeric_columns] = data[numeric_columns].fillna(data[numeric_columns].mean())

# Handle missing values for categorical columns (if any)
categorical_columns = data.select_dtypes(exclude=np.number).columns.tolist()
data[categorical_columns] = data[categorical_columns].fillna("Unknown")

# Encode categorical variables
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Separate features and labels
X = data_encoded.drop(columns=['id', 'classification'])
y = data_encoded['classification']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)



In [None]:
# GAN model to balance the dataset
latent_dim = 32

def build_generator(latent_dim, n_outputs):
    model = tf.keras.Sequential()
    model.add(layers.Dense(128, activation='relu', input_dim=latent_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(n_outputs, activation='linear'))
    return model

def build_discriminator(n_inputs):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=n_inputs))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

generator = build_generator(latent_dim, X_train.shape[1])
discriminator = build_discriminator(X_train.shape[1])
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

discriminator.trainable = False

gan_input = layers.Input(shape=(latent_dim,))
generated_data = generator(gan_input)
gan_output = discriminator(generated_data)
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(optimizer='adam', loss='binary_crossentropy')

def generate_fake_samples(generator, latent_dim, n_samples):
    input = np.random.randn(latent_dim * n_samples)
    input = input.reshape(n_samples, latent_dim)
    X = generator.predict(input)
    y = np.zeros((n_samples, 1))
    return X, y

def generate_real_samples(n_samples):
    idx = np.random.randint(0, X_train.shape[0], n_samples)
    X = X_train[idx]
    y = np.ones((n_samples, 1))
    return X, y

def train_gan(gan, generator, discriminator, latent_dim, n_epochs=1000, n_batch=128):
    half_batch = int(n_batch / 2)
    for i in range(n_epochs):
        X_real, y_real = generate_real_samples(half_batch)
        X_fake, y_fake = generate_fake_samples(generator, latent_dim, half_batch)
        discriminator.train_on_batch(X_real, y_real)
        discriminator.train_on_batch(X_fake, y_fake)
        X_gan = np.random.randn(latent_dim * n_batch)
        X_gan = X_gan.reshape(n_batch, latent_dim)
        y_gan = np.ones((n_batch, 1))
        gan.train_on_batch(X_gan, y_gan)

train_gan(gan, generator, discriminator, latent_dim)

n_samples = len(X_train) - sum(y_train == 0)
X_fake, _ = generate_fake_samples(generator, latent_dim, n_samples)
y_fake = np.ones((n_samples, ))

X_balanced = np.vstack((X_train, X_fake))
y_balanced = np.hstack((y_train, y_fake))



In [7]:
from sklearn.ensemble import RandomForestClassifier

# Initialize Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Evaluate the model
rf_accuracy = rf_model.score(X_test, y_test)
print(f"Random Forest Test Accuracy: {rf_accuracy * 100:.2f}%")


Random Forest Test Accuracy: 97.50%


In [8]:
from sklearn.ensemble import RandomForestClassifier

# Initialize Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the balanced dataset
rf_model.fit(X_balanced, y_balanced)

# Evaluate the model on the test data
rf_accuracy = rf_model.score(X_test, y_test)
print(f"Random Forest Test Accuracy on Balanced Dataset: {rf_accuracy * 100:.2f}%")


Random Forest Test Accuracy on Balanced Dataset: 96.25%
