In [25]:
import pandas as pd

# read in labels to split test/train set

palsa_labels = pd.read_csv("/home/nadjaflechner/Palsa_data/dataset_200m/palsa_labels.csv", index_col= 0)
palsa_labels.loc[palsa_labels['palsa_percentage'] > 0, 'palsa'] = 1
palsa_labels.loc[palsa_labels['palsa_percentage'] == 0, 'palsa'] = 0

binary_df = palsa_labels.drop('palsa_percentage', axis = 1)
binary_df.to_csv("/home/nadjaflechner/Palsa_data/binary_palsa_labels_200m.csv")


# train = palsa_labels.sample(frac=0.8,random_state=200)
# test = palsa_labels.drop(train.index)

# # train = palsa_labels[:11000]
# # test = palsa_labels[11000:14000]

# train.to_csv("/home/nadjaflechner/Palsa_data/train_100mtest.csv")
# test.to_csv("/home/nadjaflechner/Palsa_data/test100mtesst.csv")

In [3]:
palsa_labels.shape

(67876, 2)

In [4]:
67876*0.8

54300.8

In [16]:
palsa_labels.head(4)

Unnamed: 0,palsa_percentage,palsa
758_66_55_2018_crop_8311,8,1.0
758_66_55_2018_crop_8315,36,1.0
758_66_55_2018_crop_8316,9,1.0
758_66_55_2018_crop_8318,0,0.0


In [17]:
import os 

binary_df = palsa_labels.drop('palsa_percentage', axis = 1)
binary_df.to_csv("/Users/nadja/Documents/UU/Thesis/generated_tifs/binary_palsa_labels.csv")


In [28]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import rasterio
import numpy as np

# Define the custom dataset
class ImageDataset(Dataset):
    def __init__(self, image_dir, labels_df):
        self.image_dir = image_dir
        self.labels_df = labels_df

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.index[idx]
        img_path = os.path.join(self.image_dir, f"{img_name}.tif")

        # Open the TIF image using rasterio
        with rasterio.open(img_path) as src:
            # Read the image data
            image_data = src.read()
        image_array = np.array(image_data)
        image_tensor = torch.from_numpy(image_array)
        image_tensor = image_tensor.float()

        label = self.labels_df.iloc[idx, 0]

        return image_tensor, label

# Define the CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(40000, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # print(x.shape)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # print(x.shape)
        x = self.conv3(x)
        x = self.relu(x)
        x = self.maxpool(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        # print(x.shape)
        x = self.fc(x)
        return x


# Set the paths and parameters
image_dir = "/home/nadjaflechner/Palsa_data/dataset_100m/"
labels_file = "/home/nadjaflechner/Palsa_data/binary_palsa_labels_100m.csv"
batch_size = 40
num_epochs = 10
learning_rate = 0.01

# Load the labels from the CSV file
labels_df = pd.read_csv(labels_file, index_col=0).head(3000)

# Split the dataset into training and validation sets
train_df = labels_df.head(2400)
val_df = labels_df.drop(train_df.index)

# Create the datasets and data loaders
train_dataset = ImageDataset(image_dir, train_df)
val_dataset = ImageDataset(image_dir, val_df)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Create the CNN model
model = CNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluate the model on the validation set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_dataloader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "cnn_model.pth")

Epoch [1/10], Loss: 0.6459
Epoch [2/10], Loss: 0.6217
Epoch [3/10], Loss: 0.6687
Epoch [4/10], Loss: 0.6557
Epoch [5/10], Loss: 0.5872
Epoch [6/10], Loss: 0.5863
Epoch [7/10], Loss: 0.6598
Epoch [8/10], Loss: 0.6956
Epoch [9/10], Loss: 0.5488
Epoch [10/10], Loss: 0.6538
Validation Accuracy: 56.50%


#### Below is current working version 24 april

In [31]:
import torch
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import rasterio
import numpy as np

# Define the custom dataset
class ImageDataset(Dataset):
    def __init__(self, image_dir, labels_df):
        self.image_dir = image_dir
        self.labels_df = labels_df

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.index[idx]
        img_path = os.path.join(self.image_dir, f"{img_name}.tif")

        # Open the TIF image using rasterio
        with rasterio.open(img_path) as src:
            # Read the image data
            image_data = src.read()
        image_array = np.array(image_data)
        image_tensor = torch.from_numpy(image_array)
        image_tensor = image_tensor.float()

        label = self.labels_df.iloc[idx, 0]

        return image_tensor, label

class PermafrostCNN(nn.Module):
    def __init__(self):
        super(PermafrostCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=2, dilation=2)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=2, dilation=2)
        self.relu4 = nn.ReLU(inplace=True)
        self.pool4 = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc = nn.Linear(256, 2)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.pool4(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x
    


# Set the paths and parameters
image_dir = "/home/nadjaflechner/Palsa_data/dataset_100m/"
labels_file = "/home/nadjaflechner/Palsa_data/binary_palsa_labels_100m.csv"
batch_size = 40
num_epochs = 10
learning_rate = 0.001

# Load the labels from the CSV file
labels_df = pd.read_csv(labels_file, index_col=0).head(5000)

# Split the dataset into training and validation sets
train_df = labels_df.head(2400)
val_df = labels_df.drop(train_df.index)

# Create the datasets and data loaders
train_dataset = ImageDataset(image_dir, train_df)
val_dataset = ImageDataset(image_dir, val_df)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Create the CNN model
model = PermafrostCNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluate the model on the validation set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_dataloader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "cnn_model.pth")

Epoch [1/10], Loss: 0.5751
Epoch [2/10], Loss: 0.6789
Epoch [3/10], Loss: 0.6126
Epoch [4/10], Loss: 0.6290
Epoch [5/10], Loss: 0.6854
Epoch [6/10], Loss: 0.6870
Epoch [7/10], Loss: 0.6816
Epoch [8/10], Loss: 0.6943
Epoch [9/10], Loss: 0.6051
Epoch [10/10], Loss: 0.6276
Validation Accuracy: 60.23%


#### 200x200 version

In [None]:
import torch
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import rasterio
import numpy as np

# Define the custom dataset
class ImageDataset(Dataset):
    def __init__(self, image_dir, labels_df):
        self.image_dir = image_dir
        self.labels_df = labels_df

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.index[idx]
        img_path = os.path.join(self.image_dir, f"{img_name}.tif")

        # Open the TIF image using rasterio
        with rasterio.open(img_path) as src:
            # Read the image data
            image_data = src.read()
        image_array = np.array(image_data)
        image_tensor = torch.from_numpy(image_array)
        image_tensor = image_tensor.float()

        label = self.labels_df.iloc[idx, 0]

        return image_tensor, label

class PermafrostCNN(nn.Module):
    def __init__(self):
        super(PermafrostCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU(inplace=True)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU(inplace=True)
        self.pool5 = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc = nn.Linear(512, 2)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.pool4(x)
        
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.pool5(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        
        return x
    


# Set the paths and parameters
image_dir = "/home/nadjaflechner/Palsa_data/dataset_200m/"
labels_file = "/home/nadjaflechner/Palsa_data/binary_palsa_labels_200m.csv"
batch_size = 40
num_epochs = 10
learning_rate = 0.001

# Load the labels from the CSV file
labels_df = pd.read_csv(labels_file, index_col=0).head(5000)

# Split the dataset into training and validation sets
train_df = labels_df.head(2400)
val_df = labels_df.drop(train_df.index)

# Create the datasets and data loaders
train_dataset = ImageDataset(image_dir, train_df)
val_dataset = ImageDataset(image_dir, val_df)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

# Create the CNN model
model = PermafrostCNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Evaluate the model on the validation set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_dataloader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "cnn_model.pth")