<a href="https://colab.research.google.com/github/mersalas/MLBS-2025_workshop/blob/main/Lab_2a_Vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import fastai library
from fastai.vision.all import *

Download either of the following datasets:


*   [Philippine rice diseases](https://github.com/mersalas/MLBS-2025_workshop/blob/main/Datasets/rice_disease.zip)
*   [Freshwater fish diseases](https://github.com/mersalas/MLBS-2025_workshop/blob/main/Datasets/fish_disease.zip)



In [None]:
# Download dataset
!wget https://raw.githubusercontent.com/mersalas/MLBS-2025_workshop/main/Datasets/fish_disease.zip -O fish_disease.zip

In [None]:
# Unzip folder
from zipfile import ZipFile

zip_path = "/content/fish_disease.zip"

with ZipFile(zip_path, "r") as archive:
             archive.extractall("fish_disease")

In [None]:
# Delete the zip file
import os
os.remove(zip_path)

In [None]:
# Set the path to the dataset
path = Path("/content/fish_disease/fish_disease")
path.ls()

In [None]:
# List all image files in train directory
trn_path = path/'Train'
files = get_image_files(trn_path)
files

In [None]:
# View the first image
img = PILImage.create(files[0])
print(img.size)
img.to_thumb(128)

In [None]:
# Use parallel processing to get sizes of all images
from fastcore.parallel import*

def f(o): return PILImage.create(o).size
sizes = parallel(f, files, n_workers=8)
pd.Series(sizes).value_counts()

In [None]:
# Create dataloader
dls = ImageDataLoaders.from_folder(trn_path, valid_pct=0.2, seed=42,
                                   item_tfms=Resize(62, method='squish'),
                                   bacth_tfms=aug_transforms(size=128, min_scale=0.75))

dls.show_batch(max_n=6)

Pick model architecture here: [The best vision models for fine-tuning](https://www.kaggle.com/code/jhoward/the-best-vision-models-for-fine-tuning)

In [None]:
# Create a CNN learner
learn = vision_learner(dls, 'resnet26d', metrics=[error_rate, accuracy], path='.').to_fp16()

In [None]:
# Find appropriate learning_rate
learn.lr_find(suggest_funcs=(valley, slide))

In [None]:
# Fine tune the model
learn.fine_tune(3, 1e-1)

In [None]:
learn.show_results()

In [None]:
# Create test dataloader
tst_files = get_image_files(path/'Test')
tst_dl = dls.test_dl(tst_files)

In [None]:
# Get predictions
preds, _ = learn.get_preds(dl=tst_dl)

In [None]:
# Get the index of the highest probability class for each prediction
pred_idxs = preds.argmax(dim=1)

# Get the probability of the predicted class for each prediction
pred_probs = preds.max(dim=1).values

In [None]:
# Map prediction indices to class labels
pred_labels = [dls.vocab[i] for i in pred_idxs]

In [None]:
# Compile results into a DataFrame
results = pd.DataFrame({
    'filename': [f.name for f in tst_files],
    'predicted_class': pred_labels,
    'probability': pred_probs.numpy()
})

# Display the results
print(results.head())

## Exercise 2a

Train a CNN model incorporating the following:

*   convnext_small_in22k
*   padding
*   test time augmentation
*   [sample](https://www.kaggle.com/code/jhoward/small-models-road-to-the-top-part-2/)







