---
# COSC2753 | Machine Learning

## Task 1: Disease Classification using the visual features of the image
---

In [None]:
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

# Reproducible results
np.random.seed(42)

# Misc.
plt.style.use('ggplot')
dir_train_dataset = '../data/processed_images/'
dir_test_dataset = 'data/processed_images/'

In [2]:
# Load the CSV file
df = pd.read_csv("../data/meta_train.csv")

# Construct full image paths
df["image_path"] = df.apply(
    lambda row: os.path.join("../data/train_images", row["label"], row["image_id"]),
    axis=1,
)

# Basic info
print("Total records:", len(df))
df.head()

Total records: 10407


Unnamed: 0,image_id,label,variety,age,image_path
0,100330.jpg,bacterial_leaf_blight,ADT45,45,../data/train_images\bacterial_leaf_blight\100...
1,100365.jpg,bacterial_leaf_blight,ADT45,45,../data/train_images\bacterial_leaf_blight\100...
2,100382.jpg,bacterial_leaf_blight,ADT45,45,../data/train_images\bacterial_leaf_blight\100...
3,100632.jpg,bacterial_leaf_blight,ADT45,45,../data/train_images\bacterial_leaf_blight\100...
4,101918.jpg,bacterial_leaf_blight,ADT45,45,../data/train_images\bacterial_leaf_blight\101...


**Remove Transparency**

In [None]:
def remove_transparency(image: Image) -> Image:
    if image.mode in ('RGBA', 'RGBa', 'LA', 'La', 'PA', 'P'):
        if image.mode != 'RGBA':
            image = image.convert('RGBA')
        image = image.convert('RGB')
    return image

**Resize Image**

In [None]:
def resize_crop(image: Image, width: int, height: int) -> Image:
    original_aspect_ratio = image.width / image.height
    target_aspect_ratio = width / height

    if original_aspect_ratio > target_aspect_ratio:
        # Crop horizontally
        new_width = int(image.height * target_aspect_ratio)
        left = (image.width - new_width) // 2
        upper = 0
        right = left + new_width
        lower = image.height
    else:
        # Crop vertically
        new_height = int(image.width / target_aspect_ratio)
        left = 0
        upper = (image.height - new_height) // 2
        right = image.width
        lower = upper + new_height

    cropped_image = image.crop((left, upper, right, lower))
    resized_image = cropped_image.resize((width, height), Image.Resampling.LANCZOS)

    return resized_image

**Normalize Image**

In [None]:
def normalize_pixels(image: Image) -> Image:
    image_array = np.array(image)
    normalized_image_array = image_array / 255.0  # Normalize pixel values to the range [0, 1]
    return Image.fromarray((normalized_image_array * 255).astype(np.uint8))