# A Computer Vision Approach for Predicting Sheep Body Weight in Livestock Farms

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

os.environ["TF_ENABLE_ONEDNN_OPTS"] = '0'
os.environ["KERAS_BACKEND"] = "tensorflow"

import requests, glob, shutil
import os.path

import matplotlib.pyplot as plt

import pandas as pd
import numpy  as np

def dummy_npwarn_decorator_factory():
  def npwarn_decorator(x):
    return x
  return npwarn_decorator
np._no_nep50_warning = getattr(np, '_no_nep50_warning', dummy_npwarn_decorator_factory)

import tensorflow as tf
import keras

import wandb
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint, WandbEvalCallback

In [None]:
import sys, os

# root folder: bego-analysis
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from lib import metadata, partitioning, datasets, transformations

In [None]:
# Keeping pandas from truncating long strings
pd.set_option('display.max_colwidth', 120)

In [None]:
# Folders constants
data_dir_path = f'{module_path}/data'

source_dir_path = f'{data_dir_path}/source'
work_dir_path   = f'{data_dir_path}/work'

dataset_dir_path   = f'{source_dir_path}/dataset'
work_imgs_dir_path = f'{work_dir_path}/images'
suited_imgs_path   = f'{work_imgs_dir_path}/suited'

# Create Metadata Dataset

Load the dataframe with data acquired during the collects.

In [None]:
dataset_v0 = metadata.MetadataProvider(source_dir_path=source_dir_path).load_dataframe()
dataset_v0.shape

In [None]:
dataset_v0.groupby(['collect_id','place']).size()

In [None]:
dataset_v1 = dataset_v0.query('label == 0')
dataset_v1.shape

## Move suited images do work directory

In [None]:
file_dict = {}

file_list = glob.glob(f"{source_dir_path}/images/**/*DEPTH_320_240_1.png",recursive=True)
for file in file_list:
    file_dict[file.split('/')[-1]] = file

In [None]:
def move_suited_img_to_work_dir(file_name: str, save_dir: str, save_as: str = None):
    if save_as is None:
        save_as = file_name
        
    if not os.path.isfile(f'{save_dir}/{file_name}'):
        shutil.copyfile(file_dict[file_name], f'{save_dir}/{save_as}')

In [None]:
dataset_v1.apply(
    lambda row: move_suited_img_to_work_dir(file_name=row['depth'], save_dir=suited_imgs_path), 
    axis=1
)
dataset_v1.shape

# Experiment

**PROCESS** 

Prepare data -> Split data -> Compile the model -> Fit the model -> Predict result from unseen data (test set) -> Evaluate model

## Dataset

*In **run-6** and **run-7**, the code below should be changed to `dataset_exp = dataset_v1.query('place == "Farm Code"').iloc[:,:]` changing the Farm Code form **Farm A** or **Farm B*** 

In [38]:
dataset_exp = dataset_v1.iloc[:,:]
dataset_exp.shape

(1772, 25)

### Dataset Partition

Partitioning by TAG (animal identification code), with 85% of the animals for Training and 15% for Testing. In **run-5**, the partitioning strategy used was `partitioning.SplitBySingleFieldLogic()`.

In [None]:
split_stg  = partitioning.SplitRandomBySingleField()
partitions = split_stg.split(
    field_name='tag', 
    dataset=dataset_exp,
    train_size=0.85
)
partitions.groupby('partition').size()

In [None]:
dataset_exp = dataset_exp.merge(partitions, on='tag')
dataset_exp.shape

In [None]:
split_dict = dataset_exp.groupby(
    ['partition','place']
).size().reset_index(
    name='qtd'
).to_dict(orient='records')

In [None]:
split_dict

### Init Tracking

In [None]:
# Define the index value and start a run in wandb (https://wandb.ai/), tracking hyperparameters
index = 1
wandb.init(
    # set the wandb project where this run will be logged
    project="collie-x3",
    name=f'run_{index}',

    # track hyperparameters and run metadata with wandb.config
    config={
        "train_size": 0.8,
        "epochs": (3,200),
        "dense_units": 128,        
        "split": split_dict,
    }
)

# [optional] use wandb.config as your config
config = wandb.config

In [None]:
# Implement your model prediction visualization callback
class WandbClfEvalCallback(WandbEvalCallback):
    def __init__(
        self, validation_data, data_table_columns, pred_table_columns, num_samples=100
    ):
        super().__init__(data_table_columns, pred_table_columns)

        self.x = validation_data[0]
        self.y = validation_data[1]

    def add_ground_truth(self, logs=None):
        pass
    
    """
    This function correspond to the stage Measure metrics and are executed in epochs 50, 100, 150, 165, 180, 190 and 200
    """
    def add_model_predictions(self, epoch, logs=None):
        if epoch+1 in [50,100,150,165,180,190,200]:
            score = self.model.evaluate(self.x, self.y)
            wandb.log({
                'test/loss':score[0], 
                'test/r2_score':score[1],
                'test/root_mean_squared_error':score[2],
                'test/mean_squared_error':score[3],
                'test/mean_absolute_error':score[4],
                'test/mean_absolute_percentage_error':score[5],
            })
            self.model.save(f'output/model_run{index}_epoch{epoch+1}.keras')

In [None]:
dataset_exp.to_csv(f'output/dataset{index}.csv')

### Dataset Load and Transformation

Load the dataset with the depth images of the animals and apply the transformations to all images. For more information about the behavior of the methods below, see their descriptions in `lib/transformations.py`.

In [None]:
(X_train, Y_train), (X_test, Y_test) = datasets.Dataset().load_data(
    dataframe = dataset_exp,
    img_col_name = 'depth', 
    img_dir = suited_imgs_path,
    truth_col_name = 'weight',
    transformations = [
        transformations.NoiseRemovalSetMaxValue(max_value=1950),
        transformations.AdjustScaleWithFixedMaxValue(max_value=1950),
        transformations.Replicate1DtoNDimChannel(dim=3),
        transformations.ResizeImageWithPadding(shape=(300,300)),
    ],
    replicators = []
)

print('Training:', X_train.shape, Y_train.shape)
print(' Testing:', X_test.shape,  Y_test.shape)

### Model Compile & Fit

- Compiles the model based on EfficientNetV2-B3, with weights trained via imageNet.

In [None]:
def plot_hist(hist):
    plt.plot(hist.history["mean_squared_error"])
    plt.plot(hist.history["val_mean_squared_error"])
    plt.title("model accuracy")
    plt.ylabel("MSE")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

In [None]:
metrics = [
    keras.metrics.R2Score(class_aggregation="uniform_average"),
    keras.metrics.RootMeanSquaredError(),
    keras.metrics.MeanSquaredError(), 
    keras.metrics.MeanAbsoluteError(),
    keras.metrics.MeanAbsolutePercentageError()
] 

base_model = keras.applications.EfficientNetV2B3(
    include_top=False,
    weights='imagenet',
    input_shape=(300,300,3)
)

In [None]:
# Freeze the pretrained weights | inference mode
base_model.trainable = False

model = keras.models.Sequential([
    keras.Input(shape=(300, 300, 3)),
    keras.layers.RandomFlip(mode="horizontal_and_vertical"),
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(config.dense_units, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(1)
])

model.summary(show_trainable=True)

In [None]:
model.compile(
    optimizer = keras.optimizers.Adam(), 
    loss = keras.losses.MeanSquaredError(), 
    metrics = metrics
)

hist_tl = model.fit(
    X_train, 
    Y_train, 
    validation_split=0.15, 
    epochs=config.epochs[0]
)

In [None]:
plot_hist(hist_tl)

In [None]:
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
base_model.trainable = True
model.summary(
    show_trainable=True
)

In [None]:
model.compile(
    optimizer = keras.optimizers.Adam(1e-5), 
    loss = keras.losses.MeanSquaredError(), 
    metrics = metrics
)

hist_ft = model.fit(
    X_train, 
    Y_train, 
    validation_split=0.15, 
    epochs=config.epochs[1],
    callbacks=[
        WandbMetricsLogger(log_freq='epoch'),
        WandbClfEvalCallback(
            validation_data=(X_test, Y_test),
            data_table_columns=[],
            pred_table_columns=[],
        )
    ]
)

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

In [None]:
plot_hist(hist_ft)

### Model Evaluate

In [None]:
score = model.evaluate(X_test, Y_test, verbose = 0) 
score