In [1]:
import tensorflow as tf
import scipy.optimize
import scipy
import pandas as pd
from tensorflow import keras
import os
import random
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import EfficientNetB0
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset,random_split
from sklearn.metrics import classification_report, f1_score, roc_auc_score
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn as nn
import torchvision.models as models
from tqdm import tqdm
from torchvision import transforms

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Load the data
df = pd.read_csv('/content/drive/My Drive/data/train data/metadata.csv')
df.head()

Unnamed: 0,date,id_coord,plume,set,lat,lon,coord_x,coord_y,path
0,20230223,id_6675,yes,train,31.52875,74.330625,24,47,images/plume/20230223_methane_mixing_ratio_id_...
1,20230103,id_2542,yes,train,35.538,112.524,42,37,images/plume/20230103_methane_mixing_ratio_id_...
2,20230301,id_6546,yes,train,21.06,84.936667,58,15,images/plume/20230301_methane_mixing_ratio_id_...
3,20230225,id_6084,yes,train,26.756667,80.973333,28,62,images/plume/20230225_methane_mixing_ratio_id_...
4,20230105,id_2012,yes,train,34.8,40.77,59,44,images/plume/20230105_methane_mixing_ratio_id_...


In [4]:
# Split the data
train_df, valid_df = train_test_split(df, train_size =0.7, test_size=0.3, random_state=42)

In [5]:
# extract file names from path
train_df['new_path'] = train_df['path'].str.replace(r'images/(plume|no_plume)/', '', regex=True) + ".tif"
valid_df['new_path'] = valid_df['path'].str.replace(r'images/(plume|no_plume)/', '', regex=True) + ".tif"

In [6]:
#For each train and validation datasets seggregate data into separate folders based on plume or no plume
import os
import random
import shutil

# Set up paths
data_dir = '/content/drive/My Drive/data/train data/images'
train_dir= 'train'
val_dir = 'validation'

plume_dir = 'plume'
no_plume_dir = 'no_plume'

# Create validation directories
os.makedirs(os.path.join(train_dir, plume_dir), exist_ok=True)
os.makedirs(os.path.join(train_dir, no_plume_dir), exist_ok=True)
os.makedirs(os.path.join(val_dir, plume_dir), exist_ok=True)
os.makedirs(os.path.join(val_dir, no_plume_dir), exist_ok=True)

for index, row in train_df.iterrows():
    image_file = row['new_path']
    is_plume = row['plume']

    if is_plume == 'yes':
      if not os.path.exists(os.path.join(train_dir, plume_dir, image_file)):
        shutil.move(os.path.join(data_dir, plume_dir, image_file),
                os.path.join(train_dir, plume_dir))
    else:
      if not os.path.exists(os.path.join(train_dir, no_plume_dir, image_file)):
        shutil.move(os.path.join(data_dir, no_plume_dir, image_file),
                os.path.join(train_dir, no_plume_dir))

for index, row in valid_df.iterrows():
    image_file = row['new_path']
    is_plume = row['plume']

    if is_plume == 'yes':
      if not os.path.exists(os.path.join(val_dir, plume_dir, image_file)):
        shutil.move(os.path.join(data_dir, plume_dir, image_file),
                os.path.join(val_dir, plume_dir))
    else:
      if not os.path.exists(os.path.join(val_dir, no_plume_dir, image_file)):
        shutil.move(os.path.join(data_dir, no_plume_dir, image_file),
                os.path.join(val_dir, no_plume_dir))


In [7]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, augment=False):
        self.root_dir = root_dir
        self.augment = augment
        self.image_list = []
        self.labels = []
        self.augmented_images = []

        plume_images = []
        no_plume_images = []

        classes = os.listdir(root_dir)
        for class_name in classes:
            class_dir = os.path.join(root_dir, class_name)
            if os.path.isdir(class_dir):
                image_names = os.listdir(class_dir)
                tiff_image_names = [img_name for img_name in image_names if img_name.lower().endswith('.tiff') or img_name.lower().endswith('.tif')]

                if class_name == 'plume':
                    plume_images.extend([os.path.join(class_dir, img_name) for img_name in tiff_image_names])
                else:
                    no_plume_images.extend([os.path.join(class_dir, img_name) for img_name in tiff_image_names])

                self.labels.extend([1 if class_name == 'plume' else 0] * len(tiff_image_names))

        # Calculate the number of images in each class
        num_images = min(len(plume_images), len(no_plume_images))

        if augment:
            augmented_images = []
            for _ in range(num_images):
                # Randomly select plume and no plume images
                plume_image_path = random.choice(plume_images)
                no_plume_image_path = random.choice(no_plume_images)

                # Load and augment plume image
                plume_image = Image.open(plume_image_path)
                plume_image = plume_image.resize((64, 64))  # Resize to 64x64
                transform = transforms.Compose([
                    transforms.RandomHorizontalFlip(),
                    transforms.RandomRotation(10),
                    transforms.ToTensor()
                ])
                augmented_plume_image = transform(plume_image)
                augmented_images.append(augmented_plume_image)

                # Load and augment no plume image
                no_plume_image = Image.open(no_plume_image_path)
                no_plume_image = no_plume_image.resize((64, 64))  # Resize to 64x64
                augmented_no_plume_image = transform(no_plume_image)
                augmented_images.append(augmented_no_plume_image)

            self.augmented_images = augmented_images

        self.image_list = plume_images[:num_images] + no_plume_images[:num_images]
        self.labels = [1] * num_images + [0] * num_images

        if augment:
            num_augmented_images = len(self.augmented_images)
            self.image_list.extend(self.augmented_images)
            self.labels.extend(self.labels[:num_augmented_images])

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        image = self.image_list[idx]
        label = self.labels[idx]

        if isinstance(image, str):
            image = Image.open(image)
            image = image.resize((64, 64))  # Resize to 64x64

            transform = transforms.Compose([
                transforms.ToTensor()
            ])
            image = transform(image)

        return image, label

In [8]:
root_dir = "train"
val_dir = "validation"

train_dataset = CustomDataset(root_dir, augment=True)
val_dataset = CustomDataset(val_dir, augment=False)


# Define batch size for training and validation
batch_size = 32

# Create the train DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create the validation DataLoader
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Verify the train and validation data loaders
for images, labels in train_loader:
    print(f"Train batch - Images: {images.shape}, Labels: {labels.shape}")
    break

for images, labels in val_loader:
    print(f"Validation batch - Images: {images.shape}, Labels: {labels.shape}")
    break

Train batch - Images: torch.Size([32, 1, 64, 64]), Labels: torch.Size([32])
Validation batch - Images: torch.Size([32, 1, 64, 64]), Labels: torch.Size([32])


In [13]:
#We can check that our train dataset is balanced between plume and no plume
num_plumes = sum(label == 1 for label in train_dataset.labels)
num_no_plumes = sum(label == 0 for label in train_dataset.labels)

print("Number of training samples:", len(train_dataset))
print(f"Number of plumes: {num_plumes}")
print(f"Number of no plumes: {num_no_plumes}")

Number of training samples: 568
Number of plumes: 284
Number of no plumes: 284


#WideResNet50

In [10]:
# Define the device for training (CPU or GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained ResNet50 model
resnet50 = models.wide_resnet50_2(weights='Wide_ResNet50_2_Weights.IMAGENET1K_V2')

# Modify the first layer to accept single-channel grayscale images
resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Modify the last fully connected layer for binary classification with softmax activation
num_classes = 2  # 2 classes: 1 or 0
resnet50.fc = nn.Sequential(
    nn.Linear(resnet50.fc.in_features, num_classes)
)

Downloading: "https://download.pytorch.org/models/wide_resnet50_2-9ba9bcbe.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet50_2-9ba9bcbe.pth
100%|██████████| 263M/263M [00:23<00:00, 11.7MB/s]


In [11]:
# Move the model to the device
resnet50 = resnet50.to(device)

# Define the loss function (criterion)
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = torch.optim.Adam(resnet50.parameters(), lr=0.001)

num_epochs = 40

In [12]:
from sklearn.metrics import classification_report, f1_score, roc_auc_score
best_auc = 0.0
# Training loop
for epoch in tqdm(range(num_epochs), desc="Epochs"):
    resnet50.train()  # Set the model to training mode

    epoch_loss = 0.0  # Accumulator for epoch loss
    for images, labels in train_loader:
        images = images.to(device, dtype=torch.float)
        labels = labels.to(device)

        # Forward pass
        outputs = resnet50(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Accumulate the loss
        epoch_loss += loss.item()

    # Compute the average loss for the epoch
    epoch_loss /= len(train_loader)
    tqdm.write(f"Epoch {epoch+1}/{num_epochs} - Average Loss: {epoch_loss}")

    # Validation loop
    resnet50.eval()  # Set the model to evaluation mode

    true_probabilities = []
    predicted_probabilities = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device, dtype=torch.float)
            labels = labels.to(device)

            # Forward pass
            outputs = resnet50(images)
            probabilities = torch.softmax(outputs, dim=1)

            # Collect predicted probabilities and true labels
            predicted_probabilities.extend(probabilities[:, 1].cpu().numpy())  # Assuming binary classification
            true_probabilities.extend(labels.cpu().numpy())

    # Compute the AUC
    auc = roc_auc_score(true_probabilities, predicted_probabilities)
    print(f"Validation AUC: {auc}")

    # Check if current accuracy is better than the previous best accuracy
    if auc > best_auc:
        best_auc = auc
        torch.save(resnet50.state_dict(), f"resnet50_{best_auc:.4f}.pth")  # Save the model with AUC in the filename
        print("Saved")

Epochs:   0%|          | 0/40 [00:09<?, ?it/s]

Epoch 1/40 - Average Loss: 0.7008037236001756
Validation AUC: 0.7882653061224489


Epochs:   2%|▎         | 1/40 [00:10<07:00, 10.79s/it]

Saved


Epochs:   5%|▌         | 2/40 [00:12<03:33,  5.63s/it]

Epoch 2/40 - Average Loss: 0.6173184596829944
Validation AUC: 0.6154336734693878


Epochs:   5%|▌         | 2/40 [00:14<03:33,  5.63s/it]

Epoch 3/40 - Average Loss: 0.4349158720837699
Validation AUC: 0.8364158163265306


Epochs:   8%|▊         | 3/40 [00:15<02:35,  4.21s/it]

Saved


Epochs:  10%|█         | 4/40 [00:17<01:59,  3.33s/it]

Epoch 4/40 - Average Loss: 0.31937407039933735
Validation AUC: 0.7385204081632654


Epochs:  12%|█▎        | 5/40 [00:19<01:40,  2.86s/it]

Epoch 5/40 - Average Loss: 0.34237472381856704
Validation AUC: 0.7965561224489796


Epochs:  15%|█▌        | 6/40 [00:21<01:28,  2.59s/it]

Epoch 6/40 - Average Loss: 0.2711321682565742
Validation AUC: 0.6463647959183674


Epochs:  18%|█▊        | 7/40 [00:23<01:19,  2.40s/it]

Epoch 7/40 - Average Loss: 0.1668978457649549
Validation AUC: 0.7889030612244897


Epochs:  20%|██        | 8/40 [00:25<01:12,  2.26s/it]

Epoch 8/40 - Average Loss: 0.14379617577004764
Validation AUC: 0.767219387755102


Epochs:  22%|██▎       | 9/40 [00:27<01:07,  2.18s/it]

Epoch 9/40 - Average Loss: 0.07712962074826162
Validation AUC: 0.8026147959183674


Epochs:  25%|██▌       | 10/40 [00:29<01:03,  2.11s/it]

Epoch 10/40 - Average Loss: 0.11857124314539963
Validation AUC: 0.5070153061224489


Epochs:  28%|██▊       | 11/40 [00:31<01:00,  2.07s/it]

Epoch 11/40 - Average Loss: 0.2507282081577513
Validation AUC: 0.5012755102040817


Epochs:  30%|███       | 12/40 [00:33<00:57,  2.07s/it]

Epoch 12/40 - Average Loss: 0.21262330851621097
Validation AUC: 0.6948341836734694


Epochs:  32%|███▎      | 13/40 [00:35<00:56,  2.07s/it]

Epoch 13/40 - Average Loss: 0.1689421675271458
Validation AUC: 0.7244897959183674


Epochs:  35%|███▌      | 14/40 [00:37<00:53,  2.05s/it]

Epoch 14/40 - Average Loss: 0.11053391132089826
Validation AUC: 0.6693239795918368


Epochs:  38%|███▊      | 15/40 [00:39<00:50,  2.02s/it]

Epoch 15/40 - Average Loss: 0.09326186703724994
Validation AUC: 0.7949617346938775


Epochs:  40%|████      | 16/40 [00:41<00:48,  2.01s/it]

Epoch 16/40 - Average Loss: 0.13897912359486023
Validation AUC: 0.764030612244898


Epochs:  42%|████▎     | 17/40 [00:43<00:46,  2.00s/it]

Epoch 17/40 - Average Loss: 0.10989986546337605
Validation AUC: 0.7850765306122449


Epochs:  45%|████▌     | 18/40 [00:45<00:43,  2.00s/it]

Epoch 18/40 - Average Loss: 0.06401857159410913
Validation AUC: 0.7866709183673469


Epochs:  48%|████▊     | 19/40 [00:47<00:42,  2.02s/it]

Epoch 19/40 - Average Loss: 0.06917118014664286
Validation AUC: 0.8134566326530611


Epochs:  50%|█████     | 20/40 [00:49<00:40,  2.03s/it]

Epoch 20/40 - Average Loss: 0.09838763614081675
Validation AUC: 0.7930484693877551


Epochs:  52%|█████▎    | 21/40 [00:51<00:38,  2.02s/it]

Epoch 21/40 - Average Loss: 0.08384091906353003
Validation AUC: 0.7855548469387755


Epochs:  55%|█████▌    | 22/40 [00:53<00:36,  2.01s/it]

Epoch 22/40 - Average Loss: 0.07245785598125723
Validation AUC: 0.7420280612244898


Epochs:  57%|█████▊    | 23/40 [00:55<00:34,  2.00s/it]

Epoch 23/40 - Average Loss: 0.07409490225836635
Validation AUC: 0.7806122448979591


Epochs:  60%|██████    | 24/40 [00:57<00:31,  2.00s/it]

Epoch 24/40 - Average Loss: 0.05383376751302017
Validation AUC: 0.8198341836734694


Epochs:  62%|██████▎   | 25/40 [00:59<00:30,  2.01s/it]

Epoch 25/40 - Average Loss: 0.10661433176654908
Validation AUC: 0.7933673469387756


Epochs:  65%|██████▌   | 26/40 [01:01<00:28,  2.04s/it]

Epoch 26/40 - Average Loss: 0.060670214601688914
Validation AUC: 0.7576530612244897


Epochs:  68%|██████▊   | 27/40 [01:03<00:26,  2.04s/it]

Epoch 27/40 - Average Loss: 0.061640865486373916
Validation AUC: 0.7206632653061225


Epochs:  70%|███████   | 28/40 [01:05<00:24,  2.03s/it]

Epoch 28/40 - Average Loss: 0.046890373225323856
Validation AUC: 0.7069515306122449


Epochs:  72%|███████▎  | 29/40 [01:07<00:22,  2.02s/it]

Epoch 29/40 - Average Loss: 0.10570986067048377
Validation AUC: 0.7538265306122449


Epochs:  75%|███████▌  | 30/40 [01:09<00:20,  2.02s/it]

Epoch 30/40 - Average Loss: 0.10426322308679421
Validation AUC: 0.7940051020408163


Epochs:  78%|███████▊  | 31/40 [01:11<00:18,  2.02s/it]

Epoch 31/40 - Average Loss: 0.0668113346522053
Validation AUC: 0.7713647959183674


Epochs:  80%|████████  | 32/40 [01:13<00:16,  2.04s/it]

Epoch 32/40 - Average Loss: 0.05247772730783456
Validation AUC: 0.8070790816326531


Epochs:  82%|████████▎ | 33/40 [01:15<00:14,  2.07s/it]

Epoch 33/40 - Average Loss: 0.046894665374161884
Validation AUC: 0.7726403061224489


Epochs:  85%|████████▌ | 34/40 [01:17<00:12,  2.06s/it]

Epoch 34/40 - Average Loss: 0.03710214377173947
Validation AUC: 0.8032525510204082


Epochs:  88%|████████▊ | 35/40 [01:19<00:10,  2.05s/it]

Epoch 35/40 - Average Loss: 0.0242405469083072
Validation AUC: 0.7924107142857143


Epochs:  90%|█████████ | 36/40 [01:22<00:08,  2.05s/it]

Epoch 36/40 - Average Loss: 0.026113977657385275
Validation AUC: 0.8105867346938777


Epochs:  92%|█████████▎| 37/40 [01:24<00:06,  2.04s/it]

Epoch 37/40 - Average Loss: 0.13039602006837311
Validation AUC: 0.7123724489795918


Epochs:  95%|█████████▌| 38/40 [01:26<00:04,  2.04s/it]

Epoch 38/40 - Average Loss: 0.22460928559303284
Validation AUC: 0.7302295918367347


Epochs:  98%|█████████▊| 39/40 [01:28<00:02,  2.07s/it]

Epoch 39/40 - Average Loss: 0.1745430175215006
Validation AUC: 0.7563775510204082


Epochs:  98%|█████████▊| 39/40 [01:30<00:02,  2.07s/it]

Epoch 40/40 - Average Loss: 0.15734893093920416
Validation AUC: 0.8517219387755103


Epochs: 100%|██████████| 40/40 [01:30<00:00,  2.27s/it]

Saved



