In [1]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import h5py
import gc
import time


gc.collect()  # Clear unused memory
try:
    torch.cuda.empty_cache()
except RuntimeError as e:
    print(f"Warning: Unable to empty cache. Error: {e}")


start = time.time()

In [2]:
# Load the .h5 file into memory once
h5_file_path_train = r"C:\Users\nadav.k\Documents\DS\DL_classification\classification_data\training_10perc_of_20_subset.h5"
h5_file_path_test = r"C:\Users\nadav.k\Documents\DS\DL_classification\classification_data\testing_10perc_of_20_subset.h5"

# Open the H5 files
h5_train = h5py.File(h5_file_path_train, 'r')
h5_test = h5py.File(h5_file_path_test, 'r')

# Extract datasets
train_sen1_data = h5_train['sen1']
train_sen2_data = h5_train['sen2']
train_labels = h5_train['label']
# train_labels = h5_train['new_labels']

test_sen1_data = h5_test['sen1']
test_sen2_data = h5_test['sen2']
test_labels = h5_test['label']
# test_labels = h5_test['new_labels']


In [3]:
class SatelliteDataset(Dataset):
    def __init__(self, sen1_data, sen2_data, labels):
        self.sen1_data = sen1_data
        self.sen2_data = sen2_data
        self.labels = labels

    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, idx):
        sen1_image = self.sen1_data[idx]
        sen2_image = self.sen2_data[idx]
        label = self.labels[idx]

        # Convert to PyTorch tensors
        sen1_image = torch.tensor(sen1_image, dtype=torch.float32).permute(2, 0, 1)
        sen2_image = torch.tensor(sen2_image, dtype=torch.float32).permute(2, 0, 1)

        # Convert one-hot encoded label to class index
        label = torch.tensor(label, dtype=torch.float32)
        label = torch.argmax(label).long()

        return sen1_image, sen2_image, label


In [4]:
# Create datasets
train_dataset = SatelliteDataset(train_sen1_data, train_sen2_data, train_labels)
test_dataset = SatelliteDataset(test_sen1_data, test_sen2_data, test_labels)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [5]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=17):
        super(ConvNet, self).__init__()

        # Sentinel-1 branch
        self.sen1_conv1 = nn.Conv2d(8, 32, kernel_size=3, padding=1)
        self.sen1_dropout1 = nn.Dropout(p=0.25)
        self.sen1_conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.sen1_dropout2 = nn.Dropout(p=0.25)
        self.sen1_pool = nn.MaxPool2d(2, 2)

        # Sentinel-2 branch
        self.sen2_conv1 = nn.Conv2d(10, 32, kernel_size=3, padding=1)
        self.sen2_dropout1 = nn.Dropout(p=0.25)
        self.sen2_conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.sen2_dropout2 = nn.Dropout(p=0.25)
        self.sen2_pool = nn.MaxPool2d(2, 2)

        # Fully connected layers after concatenation
        self.fc1 = nn.Linear(64 * 16 * 16 * 2, 128)
        self.fc1_dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(128, 64)
        self.fc2_dropout = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, sen1, sen2):
        # Sentinel-1 forward pass
        x1 = F.relu(self.sen1_conv1(sen1))
        x1 = self.sen1_dropout1(x1)
        x1 = self.sen1_pool(F.relu(self.sen1_conv2(x1)))
        x1 = self.sen1_dropout2(x1)
        x1 = x1.view(x1.size(0), -1)

        # Sentinel-2 forward pass
        x2 = F.relu(self.sen2_conv1(sen2))
        x2 = self.sen2_dropout1(x2)
        x2 = self.sen2_pool(F.relu(self.sen2_conv2(x2)))
        x2 = self.sen2_dropout2(x2)
        x2 = x2.view(x2.size(0), -1)

        # Concatenate both branches
        x = torch.cat((x1, x2), dim=1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc1_dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc2_dropout(x)
        x = self.fc3(x)

        return x


In [6]:
# Training function with visualization and memory clearing
# def train_model(model, train_loader, criterion, optimizer, num_epochs, device='cuda'):
def train_model(model, train_loader, criterion, optimizer, num_epochs, device='cpu'):
    model.to(device)
    model.train()

    train_losses = []  # List to store training loss for visualization

    for epoch in range(num_epochs):
        running_loss = 0.0
        epoch_loss = 0.0

        for i, (sen1, sen2, labels) in enumerate(train_loader):
            sen1, sen2, labels = sen1.to(device), sen2.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(sen1, sen2)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            running_loss += loss.item()
            epoch_loss += loss.item()

            # Clear memory for each batch (optional but not usually necessary here)
            del outputs, loss

            if i % 100 == 99:  # Print every 100 mini-batches
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
                running_loss = 0.0

        # Average loss for the epoch
        epoch_loss /= len(train_loader)
        train_losses.append(epoch_loss)
        print(f'Epoch [{epoch + 1}/{num_epochs}] Average Loss: {epoch_loss:.4f}')

        # Clear unused memory after each epoch
        torch.cuda.empty_cache()  # Clear GPU memory
        gc.collect()  # Trigger garbage collection for CPU memory

    print('Training complete')

    # Visualization of training loss
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, marker='o', label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss Over Epochs')
    plt.legend()
    plt.grid()
    plt.show()


In [7]:
# def evaluate_model(model, test_loader, criterion, device='cuda'):
def evaluate_model(model, test_loader, criterion, device='cpu'):
    model.to(device)
    model.eval()

    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    val_losses = []  # Optional, to track across batches if needed
    val_accuracies = []  # Optional, to track across batches if needed

    with torch.no_grad():
        for sen1, sen2, labels in test_loader:
            sen1, sen2, labels = sen1.to(device), sen2.to(device), labels.to(device)
            outputs = model(sen1, sen2)

            # Calculate loss
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            # Calculate accuracy
            predicted = torch.argmax(outputs, dim=1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    avg_loss = total_loss / len(test_loader)
    accuracy = correct_predictions / total_samples

    print(f'Average Test Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

    # Visualization (optional)
    plt.figure(figsize=(6, 4))
    plt.bar(['Loss', 'Accuracy'], [avg_loss, accuracy])
    plt.title('Evaluation Results')
    plt.ylabel('Value')
    plt.grid()
    plt.show()

    return avg_loss, accuracy


In [8]:
# Initialize model, loss function, and optimizer
model = ConvNet(num_classes=17)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# # Train the model
# gc.collect()
# train_model(model, train_loader, criterion, optimizer, num_epochs=50, device='cuda')

In [10]:
# # Perform evaluation
# gc.collect()
# evaluate_model(model, test_loader, criterion, device='cuda')

In [11]:
end = time.time()
print((end-start)//60)

0.0


In [12]:
def objective(trial, train_dataset, val_dataset):
    # Hyperparameter search space
    kernel_size = trial.suggest_categorical("kernel_size", [3, 5, 7])
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_int("batch_size", 32, 256, step=32)

    # Model initialization
    model = ConvNet(num_classes=17)
    model.to(device)

    # Optimizer and criterion
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    num_epochs = 5
    best_val_accuracy = 0.0

    for epoch in range(num_epochs):
        # Training loop
        model.train()
        for sen1_inputs, sen2_inputs, labels in train_loader:
            sen1_inputs, sen2_inputs, labels = (
                sen1_inputs.to(device),
                sen2_inputs.to(device),
                labels.to(device),
            )
            optimizer.zero_grad()
            outputs = model(sen1_inputs, sen2_inputs)  # Pass both inputs to the model
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation loop
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for sen1_inputs, sen2_inputs, labels in val_loader:
                sen1_inputs, sen2_inputs, labels = (
                    sen1_inputs.to(device),
                    sen2_inputs.to(device),
                    labels.to(device),
                )
                outputs = model(sen1_inputs, sen2_inputs)
                _, predictions = torch.max(outputs, 1)
                correct += (predictions == labels).sum().item()
                total += labels.size(0)

        val_accuracy = correct / total
        best_val_accuracy = max(best_val_accuracy, val_accuracy)

    return best_val_accuracy


In [13]:
# Define the device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
# Run the study with datasets passed as arguments
study = optuna.create_study(direction="maximize")  # Maximize validation accuracy
study.optimize(lambda trial: objective(trial, train_dataset, test_dataset), n_trials=50)  # Pass datasets
#
# Print the best hyperparameters
print("Best Hyperparameters:", study.best_params)
print("Best Validation Accuracy:", study.best_value)



[I 2024-11-28 12:07:27,979] A new study created in memory with name: no-name-e750b205-b6fa-42b6-b912-16420816b291
[I 2024-11-28 12:09:13,128] Trial 0 finished with value: 0.5635566687539136 and parameters: {'kernel_size': 5, 'dropout_rate': 0.37728644038496684, 'learning_rate': 0.0014080529386716804, 'batch_size': 96}. Best is trial 0 with value: 0.5635566687539136.
[I 2024-11-28 12:10:53,965] Trial 1 finished with value: 0.4852849092047589 and parameters: {'kernel_size': 7, 'dropout_rate': 0.233781521545016, 'learning_rate': 0.00015452206072928617, 'batch_size': 128}. Best is trial 0 with value: 0.5635566687539136.
[I 2024-11-28 12:12:25,360] Trial 2 finished with value: 0.5397620538509705 and parameters: {'kernel_size': 3, 'dropout_rate': 0.36631682102444063, 'learning_rate': 0.002158105093367089, 'batch_size': 256}. Best is trial 0 with value: 0.5635566687539136.
[I 2024-11-28 12:14:00,324] Trial 3 finished with value: 0.4658735128365686 and parameters: {'kernel_size': 7, 'dropout_r

KeyboardInterrupt: 

In [None]:
# import sklearn
# print(sklearn.__version__)

optuna.visualization.plot_param_importances(study)
# optuna.visualization.plot_optimization_history(study)


In [None]:
# optuna.visualization.plot_optimization_history(study)


In [None]:
torch.save(model.state_dict(), r"C:\Users\nadav.k\Documents\DS\DL_classification\saved_models\two_stas_best_manual_fulldata_new_labels.pth")