In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import matplotlib.pyplot as plt

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Read Data
data_path = "/content/drive/My Drive/Senior Design/Pathology_Project/Student_Version.xlsx"
df = pd.read_excel(data_path)

# Filter out missing samples
df = df.dropna()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Extract features and target
X = []
y = []

for index, sample in df["sample"].items():
    try:
        data = pd.read_csv(f"/content/drive/My Drive/Senior Design/Pathology_Project/713_1217Spectra_csv_Tranformed/sample{sample}.csv",
                           header=None, names=["Column1", "Column2"])
        X.append(data["Column2"])
        y.append(df["Survival"][index])
    except Exception as e:
        pass

In [None]:
# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

In [None]:
# Normalize input data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)


In [None]:
# Neural Network Model
tf.random.set_seed(50)

model = tf.keras.Sequential([
    BatchNormalization(input_shape=(X_train.shape[1],)),
    Dense(100, activation='relu'),
    Dropout(0.2),  # Dropout regularization
    Dense(50, activation='relu'),
    Dropout(0.1),  # Dropout regularization
    Dense(1)
])

model.compile(loss=tf.keras.losses.mae,
              optimizer=tf.keras.optimizers.Adam(),  # Using Adam optimizer for better convergence
              metrics=["mae"])


In [None]:
# Learning rate scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6)


In [None]:
# Early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, callbacks=[early_stopping, lr_scheduler])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [None]:
# Plot training history
pd.DataFrame(history.history).plot()
plt.ylabel("Loss Function")
plt.xlabel("Epochs")

In [None]:
# Evaluate model on test set
mae = model.evaluate(X_test, y_test)[1]
print("Mean Absolute Error:", mae)

In [None]:
# # Save the improved model
# model.save("/content/drive/My Drive/Bishal/improve_accuracy2.h5")