In [None]:
import rasterio
import torch
import torch.nn as nn
import numpy
import tqdm
import glob
import os
import pandas as pd

In [2]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('DEVICE using {}'.format(DEVICE))

DEVICE using cuda


In [3]:
from Model import cnn_model
from Model import cnn_model_attention

cnn1d = cnn_model.CNN1D
cnn1d_lstm = cnn_model.CNNLSTM
cnn2d = cnn_model.CNN2D
cnn2d_lstm = cnn_model.CNNLSTM2D
cnn1d_att = cnn_model_attention.CNN1DATT
cnn1dlstm_att = cnn_model_attention.CNNLSTMATT
cnn2d_att = cnn_model_attention.CNN2DATT
cnn2dlstm_att = cnn_model_attention.CNNLSTM2DATT

model_1D = {'model_cnn1d': cnn1d, 'model_cnn1d_lstm': cnn1d_lstm, 'model_cnn1d_att': cnn1d_att, 'model_cnn1dlstm_att': cnn1dlstm_att}
model_2D = {'model_cnn2d': cnn2d, 'model_cnn2d_lstm': cnn2d_lstm, 'model_cnn2d_att': cnn2d_att, 'model_cnn2dlstm_att': cnn2dlstm_att}

In [6]:
mapping_object = '4class'
data = pd.read_csv("./merged_data2_{}.csv".format(mapping_object))
data = data.dropna()
data.head()

Unnamed: 0,Class2,V4,V5,V6,V7,V8,V9,V10,V11,V12,...,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184
0,4,0.051166,0.066034,0.063299,0.162909,0.17384,0.103043,0.014811,0.03894,0.054497,...,0.935761,-0.089926,-11.703547,-8.832947,2.8706,1.344082,0.050672,-1.567327,2.481404,2.481186
1,4,0.041163,0.073449,0.084577,0.097461,0.093803,0.045156,0.029049,0.014436,0.127593,...,1.269115,-0.145843,-11.510554,-7.380278,4.130276,1.812655,-0.764677,-0.599178,2.482623,2.475474
2,3,0.072416,0.129937,0.134894,0.097134,0.072244,0.057077,0.07075,0.059574,0.075071,...,1.244691,-0.129092,-11.651522,-7.962222,3.6893,2.267453,-0.142272,-1.5906,2.479603,2.477288
3,3,0.074673,0.111059,0.101137,0.076099,0.066573,0.047397,0.071096,0.087114,0.10663,...,1.828652,-0.176416,-12.470089,-5.601049,6.86904,1.481105,1.242591,1.165478,2.48127,2.469046
4,3,0.072276,0.119697,0.113054,0.085009,0.063719,0.042546,0.047473,0.063731,0.077336,...,1.314358,-0.124873,-12.745337,-8.258361,4.486977,1.469534,-0.189085,-1.052814,2.481925,2.477782


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = data.drop(columns=['Class2']).values
y = data['Class2'].values

y -= y.min()

#scaler = StandardScaler()
#X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# for 1D CNN
from torch.utils.data import DataLoader, TensorDataset

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((703, 181), (79, 181), (703,), (79,))

In [15]:
padded_arr = numpy.pad(X, ((0, 0), (0, 9)), mode='constant', constant_values=0)
print(padded_arr.shape)

(782, 190)


In [16]:
#for 2D CNN
X_reshaped = padded_arr.reshape(-1, 1, 19, 10)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_reshaped, y, test_size=0.1, random_state=42)

X_train_tensor2 = torch.tensor(X_train2, dtype=torch.float32)
y_train_tensor2 = torch.tensor(y_train2, dtype=torch.long)
X_test_tensor2 = torch.tensor(X_test2, dtype=torch.float32)
y_test_tensor2 = torch.tensor(y_test2, dtype=torch.long)

train_dataset2 = TensorDataset(X_train_tensor2, y_train_tensor2)
test_dataset2 = TensorDataset(X_test_tensor2, y_test_tensor2)

train_loader2 = DataLoader(train_dataset2, batch_size=8, shuffle=True)
test_loader2 = DataLoader(test_dataset2, batch_size=8, shuffle=False)

X_train2.shape, X_test2.shape, y_train2.shape, y_test2.shape

((703, 1, 19, 10), (79, 1, 19, 10), (703,), (79,))

In [17]:
# Define training parameters
num_epochs = 2500
learning_rate = 0.001
input_size = X_train_tensor.shape[1] 
num_classes = len(set(y_train))

# Loss function
criterion = torch.nn.CrossEntropyLoss()

print(input_size)
print(num_classes)

181
4


In [18]:
from torch.optim import SGD, Adam

# Loop through each model in the model_list
for model_name, model_class in model_1D.items():
    print(f"Training {model_name}")

    # Initialize the model and move it to the device
    model = model_class(input_size=input_size, num_classes=num_classes).to(DEVICE)
    optimizer = SGD(model.parameters(), lr=learning_rate)

    # Metrics storage
    metrics = {
        "epoch": [],
        "train_loss": [],
        "train_accuracy": [],
        "test_loss": [],
        "test_accuracy": []
    }

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:  
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)  

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Calculate training accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        avg_train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train

        # Model Evaluation (on the test set)
        model.eval()
        correct_val = 0
        total_val = 0
        running_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader:  # Assume test_loader is already defined
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        avg_val_loss = running_val_loss / len(test_loader)
        val_accuracy = 100 * correct_val / total_val

        # Print metrics for each epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Loss: {avg_val_loss:.4f}, Test Accuracy: {val_accuracy:.2f}%")

        # Save metrics to dictionary
        metrics["epoch"].append(epoch + 1)
        metrics["train_loss"].append(avg_train_loss)
        metrics["train_accuracy"].append(train_accuracy)
        metrics["test_loss"].append(avg_val_loss)
        metrics["test_accuracy"].append(val_accuracy)

    # Save the model (optional)
    torch.save(model.state_dict(), f"{model_name}_{mapping_object}_best.pth")
    print(f"{model_name} saved as {model_name}_{mapping_object}_best.pth\n")

    # Save the metrics to a CSV file
    df = pd.DataFrame(metrics)
    df.to_csv(f"{model_name}_{mapping_object}_metrics.csv", index=False)
    print(f"Metrics for {model_name} saved as {model_name}_{mapping_object}_metrics.csv\n")

Training model_cnn1d
Epoch [1/2500], Train Loss: 1.3588, Train Accuracy: 33.85%, Test Loss: 1.2757, Test Accuracy: 49.37%
Epoch [2/2500], Train Loss: 1.3227, Train Accuracy: 36.13%, Test Loss: 1.2775, Test Accuracy: 49.37%
Epoch [3/2500], Train Loss: 1.3242, Train Accuracy: 36.13%, Test Loss: 1.2280, Test Accuracy: 43.04%
Epoch [4/2500], Train Loss: 1.3046, Train Accuracy: 40.54%, Test Loss: 1.1951, Test Accuracy: 55.70%
Epoch [5/2500], Train Loss: 1.2746, Train Accuracy: 46.09%, Test Loss: 1.1467, Test Accuracy: 48.10%
Epoch [6/2500], Train Loss: 1.2808, Train Accuracy: 43.67%, Test Loss: 1.1448, Test Accuracy: 51.90%
Epoch [7/2500], Train Loss: 1.2517, Train Accuracy: 44.95%, Test Loss: 1.0955, Test Accuracy: 55.70%
Epoch [8/2500], Train Loss: 1.2234, Train Accuracy: 47.94%, Test Loss: 1.0534, Test Accuracy: 59.49%
Epoch [9/2500], Train Loss: 1.2301, Train Accuracy: 46.66%, Test Loss: 1.0579, Test Accuracy: 55.70%
Epoch [10/2500], Train Loss: 1.1994, Train Accuracy: 49.22%, Test Loss

In [19]:
from torch.optim import SGD
input_size = (1, 19, 10)
num_classes = len(set(y_train))

# Loop through each model in the model_list
for model_name, model_class in model_2D.items():
    print(f"Training {model_name}")

    # Initialize the model and move it to the device
    model = model_class(input_size=input_size, num_classes=num_classes).to(DEVICE)
    optimizer = SGD(model.parameters(), lr=learning_rate)

    # Metrics storage
    metrics = {
        "epoch": [],
        "train_loss": [],
        "train_accuracy": [],
        "test_loss": [],
        "test_accuracy": []
    }

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader2:  # Assume train_loader is already defined
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)  # Move to device

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Calculate training accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        avg_train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train

        # Model Evaluation (on the test set)
        model.eval()
        correct_val = 0
        total_val = 0
        running_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader2:  # Assume test_loader is already defined
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        avg_val_loss = running_val_loss / len(test_loader)
        val_accuracy = 100 * correct_val / total_val

        # Print metrics for each epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Loss: {avg_val_loss:.4f}, Test Accuracy: {val_accuracy:.2f}%")

        # Save metrics to dictionary
        metrics["epoch"].append(epoch + 1)
        metrics["train_loss"].append(avg_train_loss)
        metrics["train_accuracy"].append(train_accuracy)
        metrics["test_loss"].append(avg_val_loss)
        metrics["test_accuracy"].append(val_accuracy)

    # Save the model (optional)
    torch.save(model.state_dict(), f"{model_name}_{mapping_object}_best.pth")
    print(f"{model_name} saved as {model_name}_{mapping_object}_best.pth\n")

    # Save the metrics to a CSV file
    df = pd.DataFrame(metrics)
    df.to_csv(f"{model_name}_{mapping_object}_metrics.csv", index=False)
    print(f"Metrics for {model_name} saved as {model_name}_metrics_{mapping_object}.csv\n")

Training model_cnn2d
Epoch [1/2500], Train Loss: 1.4341, Train Accuracy: 39.69%, Test Loss: 1.2580, Test Accuracy: 49.37%
Epoch [2/2500], Train Loss: 1.2616, Train Accuracy: 46.51%, Test Loss: 1.3211, Test Accuracy: 49.37%
Epoch [3/2500], Train Loss: 1.2427, Train Accuracy: 50.64%, Test Loss: 1.0626, Test Accuracy: 58.23%
Epoch [4/2500], Train Loss: 1.2436, Train Accuracy: 48.22%, Test Loss: 0.9817, Test Accuracy: 56.96%
Epoch [5/2500], Train Loss: 1.2314, Train Accuracy: 50.21%, Test Loss: 1.2870, Test Accuracy: 48.10%
Epoch [6/2500], Train Loss: 1.2085, Train Accuracy: 49.79%, Test Loss: 1.0182, Test Accuracy: 56.96%
Epoch [7/2500], Train Loss: 1.2184, Train Accuracy: 50.07%, Test Loss: 1.2408, Test Accuracy: 44.30%
Epoch [8/2500], Train Loss: 1.1865, Train Accuracy: 52.20%, Test Loss: 1.0578, Test Accuracy: 62.03%
Epoch [9/2500], Train Loss: 1.1710, Train Accuracy: 50.78%, Test Loss: 1.1902, Test Accuracy: 58.23%
Epoch [10/2500], Train Loss: 1.1103, Train Accuracy: 53.77%, Test Loss

In [20]:
from Model import cnn_model_multiscale

cnn1d_multibranch = cnn_model_multiscale.MultiBranchCNN1D
cnn1d_multibranchattlstm = cnn_model_multiscale.MultiBranchCNN1DATTLSTM

model_1D_md = {'model_multibranch_cnn1d': cnn1d_multibranch, 'model_multibranch_cnn1d_lstm': cnn1d_multibranchattlstm}

In [25]:
from torch.optim import SGD, Adam

input_size = X_train_tensor.shape[1]
num_classes = len(set(y_train))

# Loop through each model in the model_list
for model_name, model_class in model_1D_md.items():
    print(f"Training {model_name}")

    # Initialize the model and move it to the device
    model = model_class(input_size=input_size, num_classes=num_classes).to(DEVICE)
    optimizer = SGD(model.parameters(), lr=learning_rate)

    # Metrics storage
    metrics = {
        "epoch": [],
        "train_loss": [],
        "train_accuracy": [],
        "test_loss": [],
        "test_accuracy": []
    }

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader:  
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)  

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Calculate training accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        avg_train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct_train / total_train

        # Model Evaluation (on the test set)
        model.eval()
        correct_val = 0
        total_val = 0
        running_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader:  # Assume test_loader is already defined
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        avg_val_loss = running_val_loss / len(test_loader)
        val_accuracy = 100 * correct_val / total_val

        # Print metrics for each epoch
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Loss: {avg_val_loss:.4f}, Test Accuracy: {val_accuracy:.2f}%")

        # Save metrics to dictionary
        metrics["epoch"].append(epoch + 1)
        metrics["train_loss"].append(avg_train_loss)
        metrics["train_accuracy"].append(train_accuracy)
        metrics["test_loss"].append(avg_val_loss)
        metrics["test_accuracy"].append(val_accuracy)

    # Save the model (optional)
    torch.save(model.state_dict(), f"{model_name}_{mapping_object}_best.pth")
    print(f"{model_name} saved as {model_name}_{mapping_object}_best.pth\n")

    # Save the metrics to a CSV file
    df = pd.DataFrame(metrics)
    df.to_csv(f"{model_name}_{mapping_object}_metrics.csv", index=False)
    print(f"Metrics for {model_name} saved as {model_name}_{mapping_object}_metrics.csv\n")

Training model_multibranch_cnn1d


RuntimeError: Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [1, 1, 1, 181]