<a href="https://colab.research.google.com/github/ogbidaniel/CodeLibrary/blob/main/Recurrent_Neural_Network_for_Breast_Cancer_Recurrence_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook Outline

1. Setup Notebook
2. Load Data
3. Define Model in Pytorch
4. Train Model
5. Evaluate

## Setup Notebook

In [None]:
# connect your google drive to the notebook runtime

from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## Load the dataset into the Notebook using Pandas

In [None]:
# prompt: load the gene data into a 'data' dataframe from a placeholder google drive file path

data = pd.read_csv('/content/drive/MyDrive/gene_expression.csv')


## Split the data
Training `80%` Testing `20%`[link text](https://)

In [None]:
# Assume df is your DataFrame and has already been loaded into the notebook
# Separate features and target
X = data.drop(columns=['DFS_STATUS'])
y = data['DFS_STATUS']

# Normalize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

## Define Dataset and Dataloader class

In [None]:
class BreastCancerDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [None]:
train_dataset = BreastCancerDataset(X_train_tensor, y_train_tensor)
test_dataset = BreastCancerDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## Define the Recurrent Neural Network Model

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate RNN
        out, _ = self.rnn(x, h0)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

input_size = X_train.shape[1]  # Number of features
hidden_size = 128  # Number of hidden units in RNN
num_layers = 2  # Number of RNN layers
output_size = 1  # Binary classification

model = RNNModel(input_size, hidden_size, num_layers, output_size)

## Training Loop

In [None]:
# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()  # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training the model
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        # Reshape input tensor to have the shape (batch_size, sequence_length, input_size)
        X_batch = X_batch.view(X_batch.size(0), 1, -1)

        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

## Evaluation

In [None]:
model.eval()
with torch.no_grad():
    y_pred = []
    y_true = []
    for X_batch, y_batch in test_loader:
        # Reshape input tensor to have the shape (batch_size, sequence_length, input_size)
        X_batch = X_batch.view(X_batch.size(0), 1, -1)

        # Predict
        outputs = model(X_batch)
        predictions = torch.round(torch.sigmoid(outputs.squeeze()))
        y_pred.extend(predictions.tolist())
        y_true.extend(y_batch.tolist())

# Convert to numpy arrays
y_pred = np.array(y_pred)
y_true = np.array(y_true)

# Calculate accuracy
accuracy = (y_pred == y_true).mean()
print(f'Accuracy: {accuracy:.4f}')