In [1]:
import glob
import numpy as np
import matplotlib.pyplot as plt

import wandb
import torch

import lightning as L
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

torch.set_float32_matmul_precision('medium')

%load_ext autoreload
%autoreload 2
from src import utils, cnn

### Define some parameters
Importantly, set classification to true for cultivar classification or false for regression towards yield, stomatal conductance, chlorophyll flourescence and fertilizer amount.

In [2]:
resume = False

epochs = 5000
lr = 0.00001

batch_size = 16
num_accumulated_batches = 1 # Number of gradient accumulation steps
# Effective batch size = batch_size * num_accumulated_batches

# wandb project name
project_name = f'outlier_challenge'

# Model checkpointing settings
model_path = f'models/'
model_name = f'resnet50'

### Create dataloaders

In [3]:
# Get dataloaders
train_loader, val_loader = utils.get_data_loaders(batch_size)

### Train model

In [None]:
# Set up WandB logger
if resume:
    logger = WandbLogger(name=model_name, project=project_name, id=f'{project_name}_{model_name}', log_model=False, resume='must')
else:
    logger = WandbLogger(name=model_name, project=project_name, id=f'{project_name}_{model_name}', log_model=False)
    

# Set up callbacks
best_checkpoint_callback = ModelCheckpoint(dirpath=model_path,
                                           filename=f'{model_name}_best',
                                           monitor='val/loss',
                                           enable_version_counter=False)

last_checkpoint_callback = ModelCheckpoint(dirpath=model_path,
                                           filename=f'{model_name}',
                                           monitor=None,
                                           enable_version_counter=False)

early_stopping_callback = EarlyStopping(monitor='val/loss',
                                        min_delta=0.00,
                                        patience=20,
                                        verbose=True,
                                        mode='min')

callbacks = [best_checkpoint_callback, last_checkpoint_callback, early_stopping_callback]

# Create model
model = cnn.CNN(lr=lr)

# Set up trainer
trainer = L.Trainer(max_epochs=epochs,
                    precision='16-mixed',
                    log_every_n_steps=1,
                    logger=logger,
                    callbacks=callbacks,
                    accelerator="gpu",
                    accumulate_grad_batches=num_accumulated_batches)

# Train model
if resume:
    trainer.fit(model, train_loader, val_loader, ckpt_path=f'{model_path}/{model_name}.ckpt')
else:
    trainer.fit(model, train_loader, val_loader)

wandb.finish()

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
[34m[1mwandb[0m: Currently logged in as: [33mwillap[0m. Use [1m`wandb login --relogin`[0m to force relogin


C:\Users\Billy\miniconda3\envs\matrix\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:639: Checkpoint directory C:\Users\Billy\Downloads\challenge_data\challenge_data\models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params
--------------------------------------
0 | net    | Sequential | 25.6 M
1 | linear | Linear     | 1.0 K 
--------------------------------------
25.6 M    Trainable params
0         Non-trainable params
25.6 M    Total params
102.232   Total estimated model params size (MB)


Sanity Checking: |                                       | 0/? [00:00<?, ?it/s]

In [11]:
from torchvision.transforms import v2
from torchvision import tv_tensors
import torch

def load_25D(file):
    
    image = utils.read_file(file)

    img0 = np.std(image[100:140], axis=0)
    img1 = np.std(image[:,100:140], axis=1)
    img2 = np.std(image[:,:,100:140], axis=2)
    
    image = np.stack([img0, img1, img2], axis=-1)
    
    return image

import pandas as pd

def process_data(df):
    
    data = []
    
    for i in range(len(df)):
        
        image = load_25D(df.image_file.iloc[i])
        mask = load_25D(df.mask_file.iloc[i])
        dist = load_25D(df.dist_file.iloc[i])
        
        image = np.concatenate([image,mask,dist], axis=-1)
        
        data.append(image)

        if i % 100 == 0:
            print(i, len(df))
        
    return np.array(data)
    
train_data = process_data(pd.read_csv('train_data.csv'))
test_data = process_data(pd.read_csv('test_data.csv'))

np.save('train_data.npy', train_data)
np.save('test_data.npy', test_data)

0 2184


  x = um.multiply(x, x, out=x)


100 2184
200 2184
300 2184
400 2184
500 2184
600 2184
700 2184
800 2184
900 2184
1000 2184
1100 2184
1200 2184
1300 2184
1400 2184
1500 2184
1600 2184
1700 2184
1800 2184
1900 2184
2000 2184
2100 2184
0 547
100 547
200 547
300 547
400 547
500 547


In [None]:
def process_image(image_file, mask_file, dist_file):
    
    transforms = v2.Compose([utils.Standardize(),
                                  v2.Resize(size=(224,224))])

    
    image = load_25D(image_file)
    mask = load_25D(mask_file)
    dist = load_25D(dist_file)

    image = np.concatenate([image,mask,dist], axis=-1)

    image = np.moveaxis(image, -1, 0)

    # Convert to tv_tensors
    image = tv_tensors.Image(torch.tensor(image)

    # Apply transforms
    image = transforms(image)
    
    return image.to(torch.float32).cuda()

In [None]:
import vtk
import numpy as np
import os
import argparse
import matplotlib.pyplot as plt
import pickle
import json

test_files = np.sort(glob.glob('test/crops/*label*'))

def create_detection(model, X_test, test_files, only_200=True):
    
    test_files_200 = np.genfromtxt('test_files_200.txt', str)

    model.cuda()

    num_outlier = 0

    # Create results
    test_results = []
    for i in range(len(test_files)):
        
        scan_id = test_files[i].split('\\')[-1].split('_')[0] + '_' + test_files[i].split('\\')[-1].split('_')[1]

        if only_200:
            if scan_id in test_files_200:
            
                prob = torch.nn.functional.sigmoid(model(process_image(X_test[i])[None,...]))
                prob = prob.detach().cpu().numpy().ravel()[0]
                pred = int(prob > 0.5)

                if pred == 1:
                    num_outlier += 1
        
                # Remember to cast bools to int for json serialization
                test_results.append({"scan_id": scan_id, "outlier": pred})
        else:
            
            prob = torch.nn.functional.sigmoid(model(process_image(X_test[i])[None,...]))
            prob = prob.detach().cpu().numpy().ravel()[0]
    
            # Remember to cast bools to int for json serialization
            test_results.append({"scan_id": scan_id, "outlier": int(prob > 0.5)})

    print(num_outlier / 200)
    # Write results to JSON file
    if only_200:
        with open("test_results_200.json", 'w') as json_file:
            json.dump(test_results, json_file, indent=4)
    else:
        with open("test_results.json", 'w') as json_file:
            json.dump(test_results, json_file, indent=4)

create_detection(model, X_test, test_files, only_200=True)

In [None]:
create_detection(model, X_test, test_files, only_200=False)