#Creating Lables

this code creates lables in the TreeOrNoTree-2 folder

In [29]:
import os
import json
import csv

# Function to extract filenames and labels from JSON annotations
def extract_filenames_and_labels(json_file):
    with open(json_file) as f:
        data = json.load(f)

    filenames = []
    labels = []

    # Map of image IDs to filenames
    image_id_to_filename = {image['id']: image['file_name'] for image in data['images']}

    # Set of image IDs that have trees
    image_ids_with_trees = set(annotation['image_id'] for annotation in data['annotations'] if annotation['category_id'] == 1)

    # Generate filenames and labels
    for image_id, filename in image_id_to_filename.items():
        filenames.append(filename)
        labels.append(1 if image_id in image_ids_with_trees else 0)

    return filenames, labels

# Main function to process directories and generate CSV files
def process_directories(base_dir):
    # Sub-directories to process
    sub_dirs = ['test', 'train', 'valid']

    for sub_dir in sub_dirs:
        current_dir = os.path.join(base_dir, sub_dir)
        
        # Process each JSON file in the directory
        for file in os.listdir(current_dir):
            if file.endswith('.json'):
                json_file_path = os.path.join(current_dir, file)
                filenames, labels = extract_filenames_and_labels(json_file_path)

                # Create a corresponding CSV file
                csv_file_path = os.path.join(current_dir, os.path.splitext(file)[0] + '.csv')
                with open(csv_file_path, mode='w', newline='') as csv_file:
                    writer = csv.writer(csv_file)
                    writer.writerow(['filename', 'label'])  # Write header
                    for filename, label in zip(filenames, labels):
                        writer.writerow([filename, label])

                print(f"Processed {json_file_path} -> {csv_file_path}")

# Example usage
base_dir = '/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2'  # Update this to your base directory path
process_directories(base_dir)



Processed /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/test/_annotations.coco.json -> /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/test/_annotations.coco.csv
Processed /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/train/_annotations.coco.json -> /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/train/_annotations.coco.csv
Processed /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/valid/_annotations.coco.json -> /Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechn

In [30]:
import os
import pandas as pd
import csv
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

# Define paths
base_dir = "/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2"
sub_dirs = {
    "train": "train",
    "valid": "valid",
    "test": "test"
}

# Custom Dataset Class
class TreeDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.tree_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.tree_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.img_dir, self.tree_frame.iloc[idx, 0])
        image = Image.open(img_name)
        label = int(self.tree_frame.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label

# Transformation for the image
transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Loaders
loaders = {}

for phase in ['train', 'valid', 'test']:
    dir_path = os.path.join(base_dir, sub_dirs[phase])
    csv_file = os.path.join(dir_path, "_annotations.coco.csv")
    
    dataset = TreeDataset(csv_file=csv_file, img_dir=os.path.join(dir_path, 'images'), transform=transform)
    
    if phase == 'train':
        batch_size = 32  # For training
    else:
        batch_size = 16  # For validation/testing to reduce memory usage
    
    loaders[phase] = DataLoader(dataset, batch_size=batch_size, shuffle=True if phase == 'train' else False)

# Now, you can use loaders['train'], loaders['valid'], and loaders['test'] for your training, validation, and testing loops.


In [39]:
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler
import torch
from torch.utils.data import DataLoader
from torchvision import models, transforms
import torch.nn as nn
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import Adam
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize model
model = models.resnet50(pretrained=True)
nr_filters = model.fc.in_features  # number of input features of the last layer
model.fc = nn.Linear(nr_filters, 1)  # Adjusting for binary classification
model = model.to(device)

# Loss and optimizer
loss_fn = BCEWithLogitsLoss()
optimizer = Adam(model.fc.parameters(), lr=0.01)

# Training step function
def make_train_step(model, optimizer, loss_fn):
    def train_step(x, y):
        model.train()  # Enter train mode
        yhat = model(x)  # Make prediction
        loss = loss_fn(yhat, y)  # Compute loss
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update parameters
        optimizer.zero_grad()  # Reset gradients
        
        # Calculate accuracy
        yhat_sig = torch.sigmoid(yhat)
        acc = accuracy(yhat_sig, y)
        return loss.item(), acc
    return train_step

# Accuracy calculation
def accuracy(preds, labels):
    preds_rounded = torch.round(torch.sigmoid(preds))
    correct = (preds_rounded == labels).float()  # convert into float for division
    acc = correct.sum() / len(correct)
    return acc.item()


# Prepare dataset and dataloaders
transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_csv = '/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/train/_annotations.coco.csv'
train_dir = '/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/train'
valid_csv = '/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/valid/_annotations.coco.csv'
valid_dir = '/Users/marco/Library/CloudStorage/OneDrive-DelftUniversityofTechnology/Control&Simulation/deeplearning/github/StreetviewCropTypeMapping/TreeOrNoTree-2/valid'

train_dataset = TreeDataset(csv_file=train_csv, img_dir=train_dir, transform=transform)
valid_dataset = TreeDataset(csv_file=valid_csv, img_dir=valid_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16)

# Train step
train_step = make_train_step(model, optimizer, loss_fn)

# Training loop
n_epochs = 20
for epoch in range(n_epochs):
    # Training
    model.train()
    train_losses, train_accs = [], []
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{n_epochs}, Train"):
        images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
        loss, acc = train_step(images, labels)
        train_losses.append(loss)
        train_accs.append(acc)
    
    avg_train_loss = sum(train_losses) / len(train_losses)
    avg_train_acc = sum(train_accs) / len(train_accs)
    print(f"Training loss: {avg_train_loss:.4f}, Accuracy: {avg_train_acc:.4f}")
    
    # Validation
    model.eval()
    valid_losses, valid_accs = [], []
    with torch.no_grad():
        for images, labels in tqdm(valid_loader, desc=f"Epoch {epoch+1}/{n_epochs}, Valid"):
            images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            acc = accuracy(outputs, labels)
            valid_losses.append(loss.item())
            valid_accs.append(acc)
    
    avg_valid_loss = sum(valid_losses) / len(valid_losses)
    avg_valid_acc = sum(valid_accs) / len(valid_accs)
    print(f"Validation loss: {avg_valid_loss:.4f}, Accuracy: {avg_valid_acc:.4f}")





Epoch 1/20, Train:   0%|          | 0/22 [00:43<?, ?it/s]


KeyboardInterrupt: 

In [None]:
TEST

In [None]:
from sklearn.metrics import precision_score, recall_score

# print('Labels', np.array(labels).shape)


print(np.array(testloader))

def make_train_step(model, optimizer, loss_fn):
  def train_step(x,y):
    #make prediction
    yhat = model(x)
    #enter train mode
    model.train()
    #compute loss
    loss = loss_fn(yhat,y)

    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    #optimizer.cleargrads()
    yhatsig = torch.sigmoid(yhat)
    acc = accuracy(yhatsig, y)

    return loss, acc
  return train_step

def accuracy(preds, labels):
    acc = 0
    for i, pred in enumerate(preds):
        p = torch.argmax(pred)

        if torch.round(pred) == labels[i]:
            acc +=1

    return acc/len(preds)

from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

#loss
loss_fn = BCEWithLogitsLoss() #binary cross entropy with sigmoid, so no need to use sigmoid in the model

#optimizer
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.01, betas=(0.9, 0.999))

#train step
train_step = make_train_step(model, optimizer, loss_fn)

from tqdm import tqdm

losses = []
val_losses = []
accs = []
val_accs = []
epoch_train_losses = []
epoch_test_losses = []
epoch_train_accs = []
epoch_test_accs = []

n_epochs = 1
early_stopping_tolerance = 4
early_stopping_threshold = 1.0
y_true = []
y_pred = []
for epoch in range(n_epochs):
  epoch_loss = 0
  epoch_acc = 0
  print(epoch)
  print(len(trainloader))

  #validation doesnt requires gradient
  with torch.no_grad():
    cum_loss = 0
    cum_acc = 0

    for x_batch, y_batch in testloader:
      x_batch = x_batch.to(device)
      y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
      y_batch = y_batch.to(device)

      #model to eval mode
      model.eval()

      yhat = model(x_batch)
      yhatsig = torch.sigmoid(yhat)
      # for i in range(len(yhatsig)):
      #   print(yhatsig[i], y_batch[i])
      y_true.extend(y_batch.cpu().numpy())  # Append true labels
      y_pred.extend(yhatsig.cpu().numpy())  # Append predicted labels
      val_loss = loss_fn(yhat,y_batch)
      cum_loss += val_loss/len(testloader)
      val_losses.append(val_loss.item())

      val_acc = accuracy(yhatsig, y_batch)
      cum_acc += val_acc/len(testloader)
      val_accs.append(val_acc)


    epoch_test_losses.append(cum_loss)
    epoch_test_accs.append(cum_acc)

    print('Epoch : {}, test loss : {} test acc: {}'.format(epoch+1,cum_loss, cum_acc))
    print(np.count_nonzero(np.array(y_pred) > 0.5))
    print(y_true.count(1))
    # print((np.array(y_pred)).astype(int))
    precision = precision_score(y_true, (np.array(y_pred)).astype(int))
    recall = recall_score(y_true, (np.array(y_pred)).astype(int))
    print("Precision", precision)
    print("Recall", recall)

    best_loss = min(epoch_test_losses)
    best_acc = max(epoch_test_accs)
    #save best model
    if cum_acc >= best_acc:
      best_acc_model_wts = model.state_dict()
    if cum_loss <= best_loss:
      best_loss_model_wts = model.state_dict()

    #early stopping
    early_stopping_counter = 0
    if cum_acc < best_acc:
      early_stopping_counter +=1

    if (early_stopping_counter == early_stopping_tolerance) or (best_acc >= early_stopping_threshold):
      print("/nTerminating: early stopping")
      break #terminate training

In [None]:
def testModels(model, testloader):
  with torch.no_grad():
    cum_loss = 0
    cum_acc = 0
    for x_batch, y_batch in testloader:
      x_batch = x_batch.to(device)
      y_batch = y_batch.unsqueeze(1).float() #convert target to same nn output shape
      y_batch = y_batch.to(device)

      #model to eval mode
      model.eval()

      yhat = model(x_batch)
      yhatsig = torch.sigmoid(yhat)

      test_loss = loss_fn(yhat,y_batch)
      cum_loss += test_loss/len(testloader)

      test_acc = accuracy(yhatsig, y_batch)
      cum_acc += test_acc/len(testloader)

    print('Test loss : {} test acc: {}'.format(cum_loss, cum_acc))

# model.load_state_dict(best_acc_model_wts)
testModels(model, testloader)


# Inference Section

In [None]:
def saveModel(model, PATH):
  torch.save(model.state_dict(), PATH)


def loadModel(PATH):
  # model = resnet18(pretrained=True, num_classes=4)  # where num_classes will be different

  model = models.resnet18(pretrained=True)
  nr_filters = model.fc.in_features
  model.fc = nn.Linear(nr_filters, 1)
  model.load_state_dict(torch.load(PATH))
  model.eval()
  return model

PATH = imagesRoot + "fieldOrNot-ResNet18-87%.pt"
model = loadModel(PATH)

# saveModel(model, PATH)

In [None]:


folderPath = imagesRoot+"Thailand16/"
filteredFilenames = os.listdir(folderPath)
print(len(filteredFilenames))

In [None]:
outFolderPath = imagesRoot +'ThailandFieldOrNot/'
classes = {0: 'field/', 1: 'notField/'}
out = os.listdir(outFolderPath+'field/')
print(len(out))
out += os.listdir(outFolderPath+'notField/')

allfiles = [x for x in filteredFilenames if x not in out]
print(len(allfiles))


In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image
from tqdm import tqdm
import os
import concurrent.futures

class CustomDataset(Dataset):
    def __init__(self, folder_path, filenames):
        self.folder_path = folder_path
        self.filenames = [fn for fn in filenames if fn.lower().endswith('.jpg')]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = os.path.join(self.folder_path, self.filenames[idx])
        image = torchvision.io.read_image(file_path).to(torch.float)
        return image, self.filenames[idx]

def save_image(out_folder, class_folder, filename, image):
    out_path = os.path.join(out_folder, class_folder, filename)
    image = Image.fromarray(image)
    image.save(out_path)

def saveModelPreds(folderPath, outFolderPath, filenames, classes, numSaved=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    dataset = CustomDataset(folderPath, filenames)
    imsLoader = DataLoader(dataset, batch_size=32)

    model.eval()
    outs = 0
    model.to(device)
    with torch.no_grad(), concurrent.futures.ThreadPoolExecutor() as executor:
        for x_batch, fils in tqdm(imsLoader, total=len(imsLoader)):
            x_batch = x_batch.to(device)
            # print(x_batch.shape)
            yhat = model(x_batch)
            yhatsig = torch.sigmoid(yhat).cpu().numpy()
            # print(np.rint(yhatsig[:, 0]).astype(int))
            classFolders = [classes[int(index)] for index in np.rint(yhatsig[:, 0])]
            im_batch = x_batch.permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)

            for j in range(len(x_batch)):
                future = save_image(outFolderPath, classFolders[j], fils[j], im_batch[j])
                outs += 1

    print("Images Classified:", outs)

outFolderPath = imagesRoot +'ThailandFieldOrNot/'
folderPath = imagesRoot+"Thailand15/"

classes = {0: 'field/', 1: 'notField/'}
# saveModelPreds(folderPath, outFolderPath, filteredFilenames, classes)
saveModelPreds(folderPath, outFolderPath, allfiles, classes)

