In [1]:

#####
#####
#####  All training data you can find in TreeOrNoTree-2.zip

import os
import json
import csv

# Function to extract filenames and labels from JSON annotations
def extract_filenames_and_labels(json_file):
    with open(json_file) as f:
        data = json.load(f)

    filenames = []
    labels = []

    # Map of image IDs to filenames
    image_id_to_filename = {image['id']: image['file_name'] for image in data['images']}

    # Set of image IDs that have trees
    image_ids_with_trees = set(annotation['image_id'] for annotation in data['annotations'] if annotation['category_id'] == 1)

    # Generate filenames and labels
    for image_id, filename in image_id_to_filename.items():
        filenames.append(filename)
        labels.append(1 if image_id in image_ids_with_trees else 0)

    return filenames, labels

# Main function to process directories and generate CSV files
def process_directories(base_dir):
    # Sub-directories to process
    sub_dirs = ['test', 'train', 'valid']

    for sub_dir in sub_dirs:
        current_dir = os.path.join(base_dir, sub_dir)
        
        # Process each JSON file in the directory
        for file in os.listdir(current_dir):
            if file.endswith('.json'):
                json_file_path = os.path.join(current_dir, file)
                filenames, labels = extract_filenames_and_labels(json_file_path)

                # Create a corresponding CSV file
                csv_file_path = os.path.join(current_dir, os.path.splitext(file)[0] + '.csv')
                with open(csv_file_path, mode='w', newline='') as csv_file:
                    writer = csv.writer(csv_file)
                    writer.writerow(['filename', 'label'])  # Write header
                    for filename, label in zip(filenames, labels):
                        writer.writerow([filename, label])

                print(f"Processed {json_file_path} -> {csv_file_path}")

# Example usage
base_dir = ''  # Update this to your base directory path
#directory should look as here below

# directory
# -test
# -- annotation.coco.json 
# -- image 1
# -- image 2
# -- image 3 ...
# -train
# -- annotation.coco.json 
# -- image 1
# -- image 2
# -- image 3 ...
# -validate
# -- annotation.coco.json 
# -- image 1
# -- image 2
# -- image 3 ...




process_directories(base_dir)



FileNotFoundError: [Errno 2] No such file or directory: 'test'

In [None]:
import os
import pandas as pd
import csv
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

# Define paths
base_dir = "" # add directory as descirbed in cell here above
sub_dirs = {
    "train": "train",
    "valid": "valid",
    "test": "test"
}

# Custom Dataset Class
class TreeDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.tree_frame = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.tree_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.img_dir, self.tree_frame.iloc[idx, 0])
        image = Image.open(img_name)
        label = int(self.tree_frame.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)
      
        return image, label

# Transformation for the image
transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Loaders
loaders = {}

for phase in ['train', 'valid', 'test']:
    dir_path = os.path.join(base_dir, sub_dirs[phase])
    csv_file = os.path.join(dir_path, "_annotations.coco.csv") #replace annotation file name for your annotation file name
    
    dataset = TreeDataset(csv_file=csv_file, img_dir=os.path.join(dir_path, 'images'), transform=transform)
    
    if phase == 'train':
        batch_size = 16  # For training
    else:
        batch_size = 16  # For validation/testing to reduce memory usage
    
    loaders[phase] = DataLoader(dataset, batch_size=batch_size, shuffle=True if phase == 'train' else False)

# Now, you can use loaders['train'], loaders['valid'], and loaders['test'] for your training, validation, and testing loops.


In [None]:
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler
import torch
from torch.utils.data import DataLoader
from torchvision import models, transforms
import torch.nn as nn
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import Adam
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

device = "cuda" if torch.cuda.is_available() else "cpu"
writer = SummaryWriter()

# Initialize model
model = models.resnet18(pretrained=True)
nr_filters = model.fc.in_features  # number of input features of the last layer
model.fc = nn.Linear(nr_filters, 1)  # Adjusting for binary classification
model = model.to(device)

# Loss and optimizer
loss_fn = BCEWithLogitsLoss()
optimizer = Adam(model.fc.parameters(), lr=0.01)

# Training step function
def make_train_step(model, optimizer, loss_fn):
    def train_step(x, y):
        model.train()  # Enter train mode
        yhat = model(x)  # Make prediction
        loss = loss_fn(yhat, y)  # Compute loss
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update parameters
        optimizer.zero_grad()  # Reset gradients
        
        # Calculate accuracy
        yhat_sig = torch.sigmoid(yhat)
        acc = accuracy(yhat_sig, y)
        return loss.item(), acc
    return train_step

# Accuracy calculation
def accuracy(preds, labels):
    preds_rounded = torch.round(torch.sigmoid(preds))
    correct = (preds_rounded == labels).float()  # convert into float for division
    acc = correct.sum() / len(correct)
    
    return acc.item()


# Prepare dataset and dataloaders
transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

### directories should be changed to directories as decribed in first cell of this notebook
train_csv = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/train/_annotations.coco.csv'
train_dir = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/train'
valid_csv = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/valid/_annotations.coco.csv'
valid_dir = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/valid'

train_dataset = TreeDataset(csv_file=train_csv, img_dir=train_dir, transform=transform)
valid_dataset = TreeDataset(csv_file=valid_csv, img_dir=valid_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=16)

# Train step
train_step = make_train_step(model, optimizer, loss_fn)

# Training loop
n_epochs = 50
best_valid_acc = 0.0  # Initialize with a baseline accuracy
for epoch in range(n_epochs):
    # Training
    model.train()
    train_losses, train_accs = [], []
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{n_epochs}, Train"):
        images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
        loss, acc = train_step(images, labels)
        train_losses.append(loss)
        train_accs.append(acc)
    
    avg_train_loss = sum(train_losses) / len(train_losses)
    avg_train_acc = sum(train_accs) / len(train_accs)
    print(f"Training loss: {avg_train_loss:.4f}, Accuracy: {avg_train_acc:.4f}")
    
    # Validation
    model.eval()
    valid_losses, valid_accs = [], []
    with torch.no_grad():
        for images, labels in tqdm(valid_loader, desc=f"Epoch {epoch+1}/{n_epochs}, Valid"):
            images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            acc = accuracy(outputs, labels)
            valid_losses.append(loss.item())
            valid_accs.append(acc)
    
    avg_valid_loss = sum(valid_losses) / len(valid_losses)
    avg_valid_acc = sum(valid_accs) / len(valid_accs)
    print(f"Validation loss: {avg_valid_loss:.4f}, Accuracy: {avg_valid_acc:.4f}")
    
    if avg_valid_acc > best_valid_acc:
        best_valid_acc = avg_valid_acc
        model_save_path = os.path.join(writer.log_dir, 'best_model.pth')
        torch.save(model.state_dict(), model_save_path)
        print(f"Saved Best Model at {model_save_path}")
        
    writer.add_scalar('Loss/train', avg_train_loss, epoch)
    writer.add_scalar('Accuracy/train', avg_train_acc, epoch)
    writer.add_scalar('Loss/valid', avg_valid_loss, epoch)
    writer.add_scalar('Accuracy/valid', avg_valid_acc, epoch)

writer.close()  # Close the TensorBoard SummaryWriter  
torch.save(model.state_dict(), f'model_epoch_{epoch+1}.pth')

    



In [None]:
len(train_dataset)


# Testing The model on test dataset


In [None]:
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torch.nn.modules.loss import BCEWithLogitsLoss

# Assuming TreeDataset is defined elsewhere and properly imported here

# Define device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize model (ensure this matches the model used for training)
model = models.resnet18(pretrained=True)
nr_filters = model.fc.in_features
model.fc = torch.nn.Linear(nr_filters, 1)
model = model.to(device)

# Load trained model weights (replace 'path_to_trained_model_weights.pth' with your actual model's saved weights)
model.load_state_dict(torch.load('/kaggle/working/model_epoch_50.pth', map_location=device))
model.eval()

# Define the test dataset and dataloader
test_transform = transforms.Compose([
    transforms.Resize((600, 600)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_csv = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/test/_annotations.coco.csv'
test_dir = '/kaggle/input/tree-or-no-tree-labled/TreeOrNoTree-2/test'

test_dataset = TreeDataset(csv_file=test_csv, img_dir=test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Function to calculate accuracy
def accuracy(preds, labels):
    preds_rounded = torch.round(torch.sigmoid(preds))
    correct = (preds_rounded == labels).float()  # convert into float for division 
    acc = correct.sum() / len(correct)
    return acc.item()

# Testing loop
with torch.no_grad():
    test_losses = []
    test_accs = []
    loss_fn = BCEWithLogitsLoss()

    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        test_losses.append(loss.item())
        test_accs.append(accuracy(outputs, labels))

    avg_test_loss = sum(test_losses) / len(test_losses)
    avg_test_acc = sum(test_accs) / len(test_accs)
    print(f"Test loss: {avg_test_loss:.4f}, Accuracy: {avg_test_acc:.4f}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random

def show_images_with_predictions(images, outputs, labels):
    fig, axs = plt.subplots(1, 5, figsize=(20, 4))
    for i, ax in enumerate(axs.flatten()):
        img = np.transpose(images[i], (1, 2, 0))
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])  # Unnormalize
        img = np.clip(img, 0, 1)
        
        ax.imshow(img)
        predicted_label = '1' if outputs[i] > 0.5 else '0'
        true_label = '1' if labels[i] > 0.5 else '0'
        ax.text(0, -5, f'Pred: {predicted_label} - True: {true_label}', color='red', fontsize=12, backgroundcolor='white')
        ax.axis('off')
    plt.show()

# Randomly select a batch from the test loader
random_batch = random.choice([x for x in test_loader])
images, labels = random_batch
images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
outputs = model(images)
outputs_sigmoid = torch.sigmoid(outputs).cpu().detach().numpy()
labels = labels.cpu().detach().numpy()

# Select 10 random images from the batch
idxs = np.random.choice(images.size(0), 5)
selected_images = images[idxs].cpu().numpy()
selected_outputs = outputs_sigmoid[idxs]
selected_labels = labels[idxs]

show_images_with_predictions(selected_images, selected_outputs, selected_labels)


In [None]:
# Load the TensorBoard notebook extension (might not be necessary or effective in Kaggle)
%load_ext tensorboard

# Launch TensorBoard pointing to the directory where logs are saved
%tensorboard --logdir '/kaggle/working/runs/Apr02_09-30-51_0c98c022bac7'
