In [4]:
import pandas as pd
import numpy as np
import random

# Generate synthetic dataset
num_samples = 1000

# Patient Information
patient_ids = [f"P{str(i).zfill(5)}" for i in range(1, num_samples + 1)]
ages = np.random.randint(18, 90, num_samples)
genders = np.random.choice(["Male", "Female"], num_samples)
medical_history = np.random.choice(["Diabetes", "Hypertension", "Cancer", "None"], num_samples)

drug_names = np.random.choice(["DrugA", "DrugB", "DrugC", "DrugD"], num_samples)
dosages = np.random.randint(50, 500, num_samples)
treatment_durations = np.random.randint(5, 60, num_samples)
effectiveness = np.random.uniform(0, 100, num_samples)
side_effects = np.random.choice(["None", "Nausea", "Dizziness", "Fatigue"], num_samples)

disease_types = np.random.choice(["Lung Cancer", "Breast Cancer", "Diabetes", "Heart Disease"], num_samples)
genetic_markers = np.random.choice(["MarkerA", "MarkerB", "MarkerC", "MarkerD"], num_samples)

# Treatment Outcome
response_to_treatment = np.random.choice(["Positive", "Negative"], num_samples)
success_rates = np.random.uniform(50, 100, num_samples)

# Create DataFrame
dataset = pd.DataFrame({
    "Patient_ID": patient_ids,
    "Age": ages,
    "Gender": genders,
    "Medical_History": medical_history,
    "Drug_Name": drug_names,
    "Dosage_mg": dosages,
    "Treatment_Duration_days": treatment_durations,
    "Effectiveness_%": effectiveness,
    "Side_Effects": side_effects,
    "Disease_Type": disease_types,
    "Genetic_Marker": genetic_markers,
    "Response_to_Treatment": response_to_treatment,
    "Success_Rate_%": success_rates
})

# Save to CSV
dataset.to_csv("synthetic_medical_dataset.csv", index=False)

print("Dataset generated successfully!")


Dataset generated successfully!


In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load dataset
dataset = pd.read_csv("synthetic_medical_dataset.csv")

# Encode categorical variables
label_encoders = {}
categorical_columns = ["Gender", "Medical_History", "Drug_Name", "Side_Effects", "Disease_Type", "Genetic_Marker", "Response_to_Treatment"]

for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

# Selecting features and target variables
X = dataset.drop(columns=["Patient_ID", "Response_to_Treatment"]).values
y = dataset["Response_to_Treatment"].values

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape for CNN input (assuming 1D features per patient)
X = np.expand_dims(X, axis=2)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build CNN model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")

# Save the model
model.save("cnn_drug_discovery_model.h5")
print("Model saved successfully!")


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Accuracy: 0.49
Model saved successfully!


  saving_api.save_model(


In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the trained model
model_path = r"cnn_drug_discovery_model.h5"
model = load_model(model_path)
print("Model loaded successfully!")

# Define label encoders for categorical values
label_encoders = {
    "Gender": LabelEncoder().fit(["Male", "Female"]),
    "Medical_History": LabelEncoder().fit(["Diabetes", "Hypertension", "Cancer", "None"]),
    "Drug_Name": LabelEncoder().fit(["DrugA", "DrugB", "DrugC", "DrugD"]),
    "Side_Effects": LabelEncoder().fit(["None", "Nausea", "Dizziness", "Fatigue"]),
    "Disease_Type": LabelEncoder().fit(["Lung Cancer", "Breast Cancer", "Diabetes", "Heart Disease"]),
    "Genetic_Marker": LabelEncoder().fit(["MarkerA", "MarkerB", "MarkerC", "MarkerD"])
}

# Define a scaler (use values from training phase if available)
scaler = StandardScaler()

# Example input values
input_data = {
    "Age": 45,
    "Gender": "Male",
    "Medical_History": "Diabetes",
    "Drug_Name": "DrugA",
    "Dosage_mg": 200,
    "Treatment_Duration_days": 30,
    "Effectiveness_%": 85.4,
    "Side_Effects": "Nausea",
    "Disease_Type": "Lung Cancer",
    "Genetic_Marker": "MarkerB"
}

# Encode categorical values
for key in label_encoders:
    if key in input_data:
        input_data[key] = label_encoders[key].transform([input_data[key]])[0]

# Convert input data to array
input_array = np.array(list(input_data.values())).reshape(1, -1)

# Normalize input features (use values from training phase if available)
input_array = scaler.fit_transform(input_array)  # Use transform() instead of fit_transform() if scaler was previously trained

# Reshape for CNN input
input_array = np.expand_dims(input_array, axis=2)

# Make prediction
prediction = model.predict(input_array)
predicted_class = (prediction > 0.5).astype(int)

print(f"Predicted Response: {predicted_class[0][0]}")


Model loaded successfully!


ValueError: in user code:

    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2341, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2327, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2315, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2283, in predict_step
        return self(x, training=False)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 11, 1), found shape=(None, 10, 1)


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the trained model
model_path = r"cnn_drug_discovery_model.h5"
model = load_model(model_path)
print("Model loaded successfully!")

# Define label encoders for categorical values
label_encoders = {
    "Gender": LabelEncoder().fit(["Male", "Female"]),
    "Medical_History": LabelEncoder().fit(["Diabetes", "Hypertension", "Cancer", "None"]),
    "Drug_Name": LabelEncoder().fit(["DrugA", "DrugB", "DrugC", "DrugD"]),
    "Side_Effects": LabelEncoder().fit(["None", "Nausea", "Dizziness", "Fatigue"]),
    "Disease_Type": LabelEncoder().fit(["Lung Cancer", "Breast Cancer", "Diabetes", "Heart Disease"]),
    "Genetic_Marker": LabelEncoder().fit(["MarkerA", "MarkerB", "MarkerC", "MarkerD"])
}

# Define a scaler (use values from training phase if available)
scaler = StandardScaler()

# Example input values for drug effectiveness prediction
input_data = {
    "Age": 45,
    "Gender": "Male",
    "Medical_History": "Diabetes",
    "Drug_Name": "DrugB",
    "Dosage_mg": 100,
    "Treatment_Duration_days": 40,
    "Effectiveness_%": 15.4,
    "Side_Effects": "Nausea",
    "Disease_Type": "Lung Cancer",
    "Genetic_Marker": "MarkerB"
}

# Encode categorical values
for key in label_encoders:
    if key in input_data:
        input_data[key] = label_encoders[key].transform([input_data[key]])[0]

# Convert input data to array
input_array = np.array(list(input_data.values())).reshape(1, -1)

# Normalize input features (use values from training phase if available)
input_array = scaler.fit_transform(input_array)  # Use transform() instead of fit_transform() if scaler was previously trained

# Reshape for CNN input
input_array = np.expand_dims(input_array, axis=2)

# Make prediction for drug effectiveness
prediction = model.predict(input_array)
effectiveness_score = prediction[0][0] * 100  # Convert to percentage

print(f"Predicted Drug Effectiveness: {effectiveness_score:.2f}%")


Model loaded successfully!


ValueError: in user code:

    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2341, in predict_function  *
        return step_function(self, iterator)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2327, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2315, in run_step  **
        outputs = model.predict_step(data)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\training.py", line 2283, in predict_step
        return self(x, training=False)
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\TWINKLE ROY\AppData\Local\Programs\Python\Python38\lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 11, 1), found shape=(None, 10, 1)


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import joblib  # Import joblib for saving the scaler

# Load dataset
dataset = pd.read_csv("synthetic_medical_dataset.csv")

# Encode categorical variables
label_encoders = {}
categorical_columns = ["Gender", "Medical_History", "Drug_Name", "Side_Effects", "Disease_Type", "Genetic_Marker", "Response_to_Treatment"]

for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

# Selecting features and target variables
# Selecting features and target variables
import joblib

# Selecting features (Ensure "Response_to_Treatment" is excluded)
X = dataset.drop(columns=["Patient_ID", "Response_to_Treatment"]).values
y = dataset["Response_to_Treatment"].values

# Save feature names (to ensure consistency during prediction)
feature_names = list(dataset.drop(columns=["Patient_ID", "Response_to_Treatment"]).columns)
joblib.dump(feature_names, "feature_names.pkl")  # Save feature names

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Save the trained scaler
joblib.dump(scaler, "scalers.pkl")
print("Scaler and feature names saved successfully!")



# Reshape for CNN input (assuming 1D features per patient)
X = np.expand_dims(X, axis=2)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build CNN model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),
    
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")

# Save the model
model.save("cnn_drug_discovery_model.h5")
print("Model saved successfully!")


Scaler and feature names saved successfully!
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test Accuracy: 0.50
Model saved successfully!


  saving_api.save_model(


In [None]:
import joblib
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder

# Load model, scaler, and feature names
model = load_model("cnn_drug_discovery_model.h5")
scaler = joblib.load("scalers.pkl")
expected_features = joblib.load("feature_names.pkl")  # Load expected feature names
print("Model, scaler, and feature names loaded successfully!")

# Print expected features for debugging
print("Expected Features from Training:", expected_features)

# Define label encoders
label_encoders = {
    "Gender": LabelEncoder().fit(["Male", "Female"]),
    "Medical_History": LabelEncoder().fit(["Diabetes", "Hypertension", "Cancer", "None"]),
    "Drug_Name": LabelEncoder().fit(["DrugA", "DrugB", "DrugC", "DrugD"]),
    "Side_Effects": LabelEncoder().fit(["None", "Nausea", "Dizziness", "Fatigue"]),
    "Disease_Type": LabelEncoder().fit(["Lung Cancer", "Breast Cancer", "Diabetes", "Heart Disease"]),
    "Genetic_Marker": LabelEncoder().fit(["MarkerA", "MarkerB", "MarkerC", "MarkerD"])
}

# Example input values (Ensure all expected features are included)
input_data = {
    "Age": 45,
    "Gender": "Male",
    "Medical_History": "Diabetes",
    "Drug_Name": "DrugA",
    "Dosage_mg": 900,
    "Treatment_Duration_days": 10,
    "Effectiveness_%": 95.4,
    "Side_Effects": "Nausea",
    "Disease_Type": "Lung Cancer",
    "Genetic_Marker": "MarkerB"
}

# Encode categorical values
for key in label_encoders:
    if key in input_data:
        input_data[key] = label_encoders[key].transform([input_data[key]])[0]

# Ensure input_data has all expected features
for feature in expected_features:
    if feature not in input_data:
        print(f"Warning: Missing feature '{feature}' in input data. Assigning default value 0.")
        input_data[feature] = 0  # Default value (adjust if needed)

# Convert input data to NumPy array in correct order
input_array = np.array([input_data[feature] for feature in expected_features]).reshape(1, -1)

# Verify feature count consistency
if input_array.shape[1] != scaler.n_features_in_:
    raise ValueError(f"Feature mismatch: Expected {scaler.n_features_in_}, but got {input_array.shape[1]}.")

# Normalize input using the pre-trained scaler
input_array = scaler.transform(input_array)

# Reshape for CNN input
input_array = np.expand_dims(input_array, axis=2)

# Make prediction
prediction = model.predict(input_array)
effectiveness_score = prediction[0][0] * 100  # Convert to percentage

print(f"Predicted Drug Effectiveness: {effectiveness_score:.2f}%")


Model, scaler, and feature names loaded successfully!
Expected Features from Training: ['Age', 'Gender', 'Medical_History', 'Drug_Name', 'Dosage_mg', 'Treatment_Duration_days', 'Effectiveness_%', 'Side_Effects', 'Disease_Type', 'Genetic_Marker', 'Success_Rate_%']
Predicted Drug Effectiveness: 69.87%
