In [None]:
"""
[V1]
* resnest50_fast_2s2x40d
* Add Max./Min. Channels

[V2]
* 
* 
* 

[TODO]
* Separate gene expression, cell vaibility and other features
* PCGrad (Project Conflicting Gradients)
* Tuning resolution and image size

ResNeSt:
https://github.com/zhanghang1989/ResNeSt
"""

In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
import lightgbm as lgb
import torch
import torch.nn as nn
import torch.optim as optim
from warnings import filterwarnings
filterwarnings(action='ignore')

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.2)
# Scale the features using a standard scaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert the data to PyTorch tensors
X_tensor = torch.Tensor(X_scaled)
y_tensor = torch.Tensor(y)

# Define the VAE architecture using PyTorch
class VAE(nn.Module):
    def __init__(self, input_dim, encoding_dim):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, encoding_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_dim),
            nn.ReLU()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Define the VAE model
input_dim = X_scaled.shape[1]
encoding_dim = 10
vae = VAE(input_dim, encoding_dim)

# Define the loss function for VAE
criterion = nn.MSELoss()
optimizer = optim.Adam(vae.parameters(), lr=0.001)

# Define the number of folds for k-fold cross-validation
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)

# Initialize lists to store results from each fold
vae_classifiers = []
lgb_classifiers = []

# Perform k-fold cross-validation
for train_indices, val_indices in kfold.split(X_scaled):
    X_train, X_val = X_scaled[train_indices], X_scaled[val_indices]
    y_train, y_val = y[train_indices], y[val_indices]

    X_train_tensor = torch.Tensor(X_train)
    y_train_tensor = torch.Tensor(y_train)
    X_val_tensor = torch.Tensor(X_val)

    # Train the VAE
    num_epochs = 50
    batch_size = 32
    for epoch in range(num_epochs):
        for i in range(0, len(X_train), batch_size):
            inputs = X_train_tensor[i:i+batch_size]
            targets = X_train_tensor[i:i+batch_size]

            # Forward pass
            outputs = vae(inputs)
            loss = criterion(outputs, targets)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Extract features using the encoder part of the VAE
    X_train_encoded = vae.encoder(X_train_tensor).detach().numpy()
    X_val_encoded = vae.encoder(X_val_tensor).detach().numpy()

    # Train the VAE as a classifier
    vae_classifier = MLPClassifier(hidden_layer_sizes=(32, 16), activation='relu', random_state=42)
    vae_classifier.fit(X_train_encoded, y_train)

    # Train LightGBM classifier
    lgb_classifier = lgb.LGBMClassifier()
    lgb_classifier.fit(X_train, y_train)

    # Append the classifiers to the respective lists
    vae_classifiers.append(vae_classifier)
    lgb_classifiers.append(lgb_classifier)



In [3]:
from sklearn.metrics import accuracy_score
# Create an ensemble of models
ensemble_model = VotingClassifier(
    estimators=[('vae', vae_classifiers[0]), ('lgb', lgb_classifiers[0])],
    voting='hard'
)

# Train the ensemble model
ensemble_model.fit(X_scaled, y)

# Predict using the ensemble model
ensemble_pred = ensemble_model.predict(X_scaled)

# Evaluate the ensemble model
ensemble_accuracy = accuracy_score(y, ensemble_pred)
print("Ensemble Accuracy:{}%".format(ensemble_accuracy*100))

Ensemble Accuracy:100.0%


In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from torch.utils.data import DataLoader, TensorDataset

# Step 1: Prepare the Data
data = load_breast_cancer()
features = data.data
labels = data.target

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

# Step 2: Build and Train VAE
# Define VAE architecture and training process (encoder and decoder)

# Step 3: Extract Latent Features
# Use the trained encoder to extract latent features from input data

# Step 4: Prepare the Reduced Feature Dataset
# Combine extracted latent features with labels
train_dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
val_dataset = TensorDataset(torch.Tensor(X_val), torch.Tensor(y_val))

# Step 5: Build and Train Neural Network
# Define neural network architecture for classification

class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

input_dim = X_train.shape[1]  # Dimension of the input features
hidden_dim = 64  # Number of hidden units in the neural network
num_classes = len(np.unique(labels))  # Number of classes in the dataset

model = Net(input_dim, hidden_dim, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Create data loaders for training and validation sets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Train the neural network
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets.long())
        loss.backward()
        optimizer.step()
    
    # Evaluate on validation set
    model.eval()
    with torch.no_grad():
        val_loss = 0
        val_correct = 0
        total_samples = 0
        for inputs, targets in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, targets.long()).item()
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == targets).sum().item()
            total_samples += targets.size(0)
        
        val_loss /= len(val_loader)
        val_accuracy = val_correct / total_samples
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")


Epoch [1/10], Validation Loss: 1.4485, Validation Accuracy: 0.8158
Epoch [2/10], Validation Loss: 0.6308, Validation Accuracy: 0.7719
Epoch [3/10], Validation Loss: 0.3629, Validation Accuracy: 0.8947
Epoch [4/10], Validation Loss: 0.3054, Validation Accuracy: 0.8772
Epoch [5/10], Validation Loss: 0.6064, Validation Accuracy: 0.7456
Epoch [6/10], Validation Loss: 0.3072, Validation Accuracy: 0.8860
Epoch [7/10], Validation Loss: 0.1904, Validation Accuracy: 0.9123
Epoch [8/10], Validation Loss: 0.1881, Validation Accuracy: 0.9298
Epoch [9/10], Validation Loss: 0.2328, Validation Accuracy: 0.8947
Epoch [10/10], Validation Loss: 0.1429, Validation Accuracy: 0.9298
