<a href="https://colab.research.google.com/github/seismosmsr/machine_learning/blob/main/LandCover_UNET_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install rasterio scikit-image tensorflow keras gdown


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import os
import gdown
import zipfile
import rasterio
from skimage.transform import resize
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import random
from keras.utils import to_categorical
from skimage.util import random_noise

In [4]:
# Download the training dataset
url = 'https://drive.google.com/uc?id=1f4eGmykyiczmNz2VPeNNmQ7aC7q8N_hD'
output = '/content/california_land_use.zip'
gdown.download(url, output, quiet=False)

# Extract the dataset
cwd = os.getcwd()
with zipfile.ZipFile(output, 'r') as zip_ref:
    zip_ref.extractall(cwd+'/sample_data')


In [36]:
# Download the inference dataset
#https://drive.google.com/file/d/1mn00JDt51KlhyiVTfPJjRk7Ymzd_zB7n/view?usp=drive_link
url = 'https://drive.google.com/uc?id=1mn00JDt51KlhyiVTfPJjRk7Ymzd_zB7n'
output = '/content/california_land_use.zip'
gdown.download(url, output, quiet=False)

# Extract the dataset
cwd = os.getcwd()
with zipfile.ZipFile(output, 'r') as zip_ref:
    zip_ref.extractall(cwd+'/sample_data/inference_data/')

Downloading...
From: https://drive.google.com/uc?id=1mn00JDt51KlhyiVTfPJjRk7Ymzd_zB7n
To: /content/california_land_use.zip
100%|██████████| 331M/331M [00:05<00:00, 58.1MB/s]


In [35]:
import os
import rasterio
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models

class CustomDataset(Dataset):
    def __init__(self, rgb_dir, labels_dir, transform=None):
        self.rgb_dir = rgb_dir
        self.labels_dir = labels_dir
        self.transform = transform
        self.filenames = os.listdir(rgb_dir)

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        with rasterio.open(os.path.join(self.rgb_dir, self.filenames[idx])) as src:
            rgb_image = src.read().transpose((1, 2, 0))
        with rasterio.open(os.path.join(self.labels_dir, self.filenames[idx])) as src:
            label_image = src.read(1) - 1  # adjust labels to be 0-indexed
        if self.transform:
            rgb_image = self.transform(rgb_image)
            label_image = torch.from_numpy(label_image).long()
        return rgb_image, label_image


# Transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Datasets
train_dataset = CustomDataset('/content/sample_data/training/rgbNIR', '/content/sample_data/training/labels', transform=transform)
val_dataset = CustomDataset('/content/sample_data/validation/rgbNIR', '/content/sample_data/validation/labels', transform=transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2,drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=True, num_workers=2,drop_last=True)

# Initialize device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Model
model = models.segmentation.deeplabv3_resnet50(pretrained=True)
model.backbone.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
model.classifier[4] = nn.Conv2d(256, 9, kernel_size=(1, 1), stride=(1, 1))

# Transfer model to GPU if available
model = model.to(device)

# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Transfer inputs and labels to GPU
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)['out']
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:    # print every 10 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0
    model.eval()
    with torch.no_grad():
        running_val_loss = 0.0
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)['out']
            val_loss = criterion(outputs, labels)
            running_val_loss += val_loss.item()
        print('[%d] validation loss: %.3f' % (epoch + 1, running_val_loss / len(val_loader)))

print('Finished Training')




[1,    10] loss: 2.208
[1,    20] loss: 1.753
[1,    30] loss: 1.375
[1,    40] loss: 1.093
[1,    50] loss: 1.165
[1,    60] loss: 1.042
[1,    70] loss: 0.957
[1,    80] loss: 0.985
[1,    90] loss: 0.950
[1,   100] loss: 0.883
[1,   110] loss: 0.847
[1,   120] loss: 0.819
[1,   130] loss: 0.858
[1,   140] loss: 0.844
[1,   150] loss: 0.829
[1,   160] loss: 0.732
[1,   170] loss: 0.797
[1,   180] loss: 0.729
[1,   190] loss: 0.767
[1,   200] loss: 0.744
[1,   210] loss: 0.705
[1,   220] loss: 0.673
[1,   230] loss: 0.798
[1,   240] loss: 0.720
[1,   250] loss: 0.729
[1,   260] loss: 0.756
[1,   270] loss: 0.730
[1,   280] loss: 0.634
[1,   290] loss: 0.699
[1,   300] loss: 0.653
[1,   310] loss: 0.642
[1,   320] loss: 0.692
[1,   330] loss: 0.583
[1,   340] loss: 0.660
[1] validation loss: 0.901
[2,    10] loss: 0.608
[2,    20] loss: 0.657
[2,    30] loss: 0.573
[2,    40] loss: 0.563
[2,    50] loss: 0.608
[2,    60] loss: 0.661
[2,    70] loss: 0.524
[2,    80] loss: 0.586
[2,    

In [37]:
import os
import rasterio
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models

class SampleDataset(Dataset):
    def __init__(self, sample_dir, transform=None):
        self.sample_dir = sample_dir
        self.transform = transform
        self.filenames = os.listdir(sample_dir)

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        with rasterio.open(os.path.join(self.sample_dir, self.filenames[idx])) as src:
            sample_image = src.read().transpose((1, 2, 0))
        if self.transform:
            sample_image = self.transform(sample_image)
        return sample_image


# Transforms
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Dataset
sample_dataset = SampleDataset('/content/sample_data/inference_data', transform=transform)

# DataLoader
sample_loader = DataLoader(sample_dataset, batch_size=1, shuffle=False)

# Initialize device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.eval()

# Inference loop
results = []
for data in sample_loader:
    inputs = data.to(device)
    with torch.no_grad():
        outputs = model(inputs)['out']
    _, predicted_labels = torch.max(outputs, dim=1)
    predicted_labels = predicted_labels.squeeze().cpu().numpy()
    results.append(predicted_labels)

# Save results as new GeoTIFF files
for i, filename in enumerate(sample_dataset.filenames):
    input_filepath = os.path.join('/content/sample_data/inference_data', filename)
    output_filepath = os.path.join('/content/drive/MyDrive/Colab_Demo/Murrieta_Result_4/Predictions', filename)

    with rasterio.open(input_filepath) as src:
        meta = src.meta

    num_classes = outputs.shape[1]
    meta.update(count=num_classes, dtype=np.uint8)

    with rasterio.open(output_filepath, 'w', **meta) as dst:
        for band in range(num_classes):
            dst.write(results[i] == band, band + 1)


In [55]:
softmax = torch.nn.Softmax(dim=1)
for i, data in enumerate(sample_loader):
    inputs = data[0].to(device)  # Access the sample image from the batch
    filename = sample_dataset.filenames[i]
    output_filepath = os.path.join('/content/drive/MyDrive/Colab_Demo/Murrieta_Result_4/Predictions', filename)

    with torch.no_grad():
        outputs = model(inputs)['out']
        _, predicted_labels = torch.max(outputs, dim=1)
        probabilities = softmax(outputs) 
        predicted_labels = predicted_labels.squeeze().cpu().numpy()

    with rasterio.open(os.path.join('/content/sample_data/inference_data', filename)) as src:
        meta = src.meta

    num_classes = outputs.shape[1]
    meta.update(count=num_classes, dtype=np.uint8)

    with rasterio.open(output_filepath, 'w', **meta) as dst:
        for band in range(num_classes):
            dst.write((predicted_labels == band).astype(np.uint8), band + 1)


KeyboardInterrupt: ignored

In [54]:
# results[0] 

_, predicted_labels = torch.max(results[0], dim=0)

predicted_labels.numpy()

TypeError: ignored