# Meat Quality Assessmnt - Data Exploration

---

We start this project by having a look at the data. 

Because we dataset is very simple and all the images are photos of the same piece of meat taken from the same angle at regular intervals, we don't expect to employ many advanced image processing techniques. However it is still interesting to come up with some basic characterisation of the images, particularly with respect to their colour distribution between the two classes.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import pathlib

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from loguru import logger

# Set parent folder as root to import local modules
module_path = os.path.abspath(os.path.join(".."))
sys.path.append(module_path)

# Remove default logger and set level to INFO
logger.remove()
logger.add(sys.stderr, level="INFO")

In [None]:
data_path = pathlib.Path(module_path) / "data/meat-quality-assessment-based-on-deep-learning"
output_path = pathlib.Path(module_path) / "notebooks/output"

In [None]:
def display_images(data_path: os.PathLike, class_name: str, n_samples: int=5, seed: int=456) -> None:
    """Display a sample of images from one class.

    Args:
        data_path (os.PathLike): 
        class_name (str): 
        n_samples (int, optional):
        seed (int, optional):  
    """
    try:
        assert(n_samples >= 1)
    except AssertionError:
        msg = ""
        raise ValueError(msg)
    rng = np.random.default_rng(seed=seed)
    samples = rng.choice(
        [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith(".jpg")], 
        size=n_samples, 
        replace=False
    )
    ncols = np.minimum(5, n_samples)
    nrows = int(np.ceil(n_samples / ncols))
    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 4, nrows * 2.5))
    for e, s in enumerate(samples):
        axs.flatten()[e].imshow(mpimg.imread(s))
        axs.flatten()[e].set_axis_off()

    for j in range(len(samples), nrows * ncols):
        axs.flatten()[j].axis("off")
    
    fig.suptitle(f"Samples of {class_name.lower()} meat")
    plt.tight_layout()
    plt.show()

In [None]:
display_images(data_path=(data_path / "Fresh"), class_name="Fresh", n_samples=15, seed=999)

In [None]:
display_images(data_path=(data_path / "Spoiled"), class_name="Spoiled", n_samples=15, seed=3)

From an initial visual inspection, we can already make some observation about the data:

- The photos look very similar to each other, this is clearly expected because of the nature of the dataset
- The spoiled meat looks darker, although not to a particularly higher extent compared to when the meat was fresh

As the next step, we decompose the photos into their Red, Blue, and Green (RGB) channels, to check for any difference in this domain.

In [None]:
def display_rgb_images(data_path: os.PathLike, class_name: str, n_samples: int=5, seed: int=456) -> None:
    """Display a sample of images from one class decomposing them into their RGB components.

    Args:
        data_path (os.PathLike): 
        class_name (str): 
        n_samples (int, optional):
        seed (int, optional):  
    """
    try:
        assert(n_samples >= 1)
    except AssertionError:
        msg = ""
        raise ValueError(msg)
    rng = np.random.default_rng(seed=seed)
    samples = rng.choice(
        [os.path.join(data_path, f) for f in os.listdir(data_path) if f.endswith(".jpg")], 
        size=n_samples, 
        replace=False
    )
    ncols = 4
    nrows = n_samples
    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols * 4, nrows * 2.5), sharex=True, sharey=True)
    for e, s in enumerate(samples):
        sample = mpimg.imread(s) 
        axs[e][0].imshow(sample)
        axs[e][1].imshow(sample[:, :, 0])
        axs[e][1].set_xlabel(f"Mean R value: {np.mean(sample[:, :, 0])}")
        axs[e][2].imshow(sample[:, :, 1])
        axs[e][2].set_xlabel(f"Mean G value: {np.mean(sample[:, :, 1])}") 
        axs[e][3].imshow(sample[:, :, 2])
        axs[e][3].set_xlabel(f"Mean B value: {np.mean(sample[:, :, 2])}")

    axs[0][0].set_title("Original Image")
    axs[0][1].set_title("R Channel")
    axs[0][2].set_title("G Channel") 
    axs[0][3].set_title("B Channel")

    for a in axs.flatten():
        a.set_axis_off()
    
    fig.suptitle(f"Samples of {class_name.lower()} meat in RGB")
    plt.tight_layout()
    plt.show()

In [None]:
display_rgb_images(data_path=(data_path / "Fresh"), class_name="Fresh", n_samples=5, seed=12)

In [None]:
display_rgb_images(data_path=(data_path / "Spoiled"), class_name="Spoiled", n_samples=5, seed=1293)