# Deep learning model based on spectral feature data

This nb shows the code used to run the spectral deep learning model.

The data used in this nb was prepared on "msi_processing.ipynb"

This motebook has the code to run the spectral module individually, it employs fastai and pytorch libraries to create to load the data and train the model.

The custom functions were created for this project, they are based on fastai and pytorch forum discussions on how to use multispectral images in deep learning.

In [None]:
# Load libraries

%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Import libraries
from fastai.vision.all import *
import torch
from ipywidgets import IntProgress
from glob import glob

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import cv2

# Custom functions
from msi_utils import *
from fold_utils import *


# Check that you are using gpu, if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
# Path to where the images are located
path = Path('/data/g2f_data/input_data/')

## Reference Tables

In [None]:
df_test = pd.read_csv('/data/fielddata/df_test.csv')
df_train_val = pd.read_csv('/data/fielddata/df_train_val.csv')

In [None]:
# Use random splitter function from fastai
splitter = RandomSplitter(seed=42)
splits = splitter(range_of(df_train_val))
splits

## Kfold validation

In [None]:
# KFOLD VALIDATION
val_loss = []
rmse_kfold = []
rmse_pct_kfold =[]
r2_kfold=[]

kfold_preds = pd.DataFrame(columns=['predictions', 'target_yield'])
split_list = kfold_splitter(df=df_train_val)

# Callbacks
csvlogger = CSVLogger('/data/results/spectral_5fold_metrics.csv', append=True)
early_stopping = EarlyStoppingCallback(monitor='valid_loss', patience=5, min_delta=0.01)
cbs = [csvlogger, early_stopping]
    
for i in range(5):
    getter = get_fold(split_list, fold=i)
    # Call MSI dataloader
    msi_fold = DataBlock(blocks = (MSITensorBlock, RegressionBlock),
                          get_items = get_npy,
                          get_y = get_y,
                          splitter = getter)
    msi_dl = msi_fold.dataloaders(df_train_val, bs=8)    
    
    # Learner
    model_msi = xresnet18(n_out=1, c_in=13, pretrained=False, sa=True, p=0.5, ndim=2)
    learn_msi = Learner(msi_dl, 
                model_msi,
                opt_func=Adam, 
                loss_func=root_mean_squared_error,
                metrics=[rmse, R2Score()])
    
    # Disable Fastai progress bar (optional but cleaner)
    with learn_msi.no_bar()and learn_msi.no_logging():
        learn_msi.fit_one_cycle(100, 1e-3, cbs=cbs)
    
    df_ymin, df_ymax = df_train_val['Yield'].min(), df_train_val['Yield'].max()
    val_loss_k, rmse_k, r2score_k = learn_msi.validate()
    val_loss.append(val_loss_k)
    rmse_kfold.append(rmse_k)
    rmse_pct_kfold.append(((rmse_k/(df_ymax - df_ymin))*100))
    r2_kfold.append(r2score_k)
    
    # Extract the predictions and save in vis_results
    ypred, yval = learn_msi.get_preds()

    pn = msi_dl.valid_ds.items
    images_id = []
    for i in range(len(pn)):
        name = pn[i].stem
        images_id.append(name)
    
    vis_df = pd.DataFrame()
    vis_df['items'] = images_id
    vis_df['items'] = vis_df['items'].str.replace('id_', '')
    vis_df['predictions'] = ypred.flatten()
    vis_df['target_yield'] = yval
    vis_df = vis_df.merge(df_train_val, how='left', left_on='items', right_on='Barcode')
    
    kfold_preds = kfold_preds.append(vis_df)

# Allows you to save the predictions performed on each kfold, and then calculate the desired metrics
kfold_preds.to_csv('/data/results/spectral_5fold_predictions.csv')

In [None]:
# Stratified kfold with emb_ps, ps and wd for around 18 epochs with early stopping
d ={"validation loss":val_loss, "rmse": rmse_kfold, "rmse %": rmse_pct_kfold, "r2score":r2_kfold}

fastkfold = pd.DataFrame(data=d)
fastkfold['rmse %'] = fastkfold['rmse %'].apply(lambda x: np.mean(x))
fastkfold.to_csv('/data/results/spetral_5fold_summary_metrics.csv', index=False)
fastkfold

## Test predictions

In [None]:
data_load = DataBlock(blocks = (MSITensorBlock, RegressionBlock),
                          get_items = get_npy,
                          get_y = get_y,
                          splitter = splitter)
    
msi_dls = data_load.dataloaders(df_train_val, bs=8)
model_msi = xresnet18(n_out=1, c_in=13, pretrained=False, sa=True, p=0.5, ndim=2)
early_stopping = EarlyStoppingCallback(monitor='valid_loss', patience=3, min_delta=0.01)

learn_msi = Learner(msi_dls, 
                model_msi,
                opt_func=Adam, 
                loss_func=root_mean_squared_error,
                metrics=[rmse, R2Score()])

In [None]:
learn_msi.fit_one_cycle(100, 1e-3,cbs=early_stopping)

In [None]:
# Export and save the model
learn_msi.save('/data/model_weights/xresnet18_model')

# If you want to load the model use the command below
# learn_msi.load('/nbs_dir/g2f/model_weights/VIS_resnet18_v5_8epochs_earlystop')

The prediction of the test (holdout) dataset is made in two parts.

In [None]:
# Part 1- Load the test set
test_dls = data_load.dataloaders(df_test)
learn_msi.dls.loaders.append(msi_dls.test_dl(test_dls[0].items, with_labels=True))
dl_testing = learn_msi.dls.test_dl(test_dls[0].items, with_labels=True)
predicts, targets = learn_msi.get_preds(dl=dl_testing)
fi = dl_testing.items

images_id = []
# Get the items idx
for i in range(len(fi)):
    name = fi[i].stem
    images_id.append(name)

test_results = pd.DataFrame()
test_results['Items'] = images_id
test_results['Items'] = test_results['Items'].str.replace('id_', '')
test_results['Predictions'] = predicts.flatten().tolist()
test_results['Target_yield'] = targets
test_results = test_results.merge(df_test, how='left', left_on='Items', right_on='Barcode')


In [None]:
# Part 2 - Repeat the step above with the second hald of the holdout dataset
learn_msi.dls.loaders.append(msi_dls.test_dl(test_dls[1].items, with_labels=True))
dl_testing = learn_msi.dls.test_dl(test_dls[1].items, with_labels=True)
predicts, targets = learn_msi.get_preds(dl=dl_testing)
fi = dl_testing.items

images_id = []
# Get the items idx
for i in range(len(fi)):
    name = fi[i].stem
    images_id.append(name)

test_results1 = pd.DataFrame()
test_results1['Items'] = images_id
test_results1['Items'] = test_results1['Items'].str.replace('id_', '')
test_results1['Predictions'] = predicts.flatten().tolist()
test_results1['Target_yield'] = targets
test_results1 = test_results1.merge(df_test, how='left', left_on='Items', right_on='Barcode')

In [None]:
test_df = test_results.append(test_results1)
test_df.to_csv('/data/results/spetral_prediction_on_holdout_dataset.csv', index=False)