In [41]:
import pandas as pd
import numpy as np
import matplotlib as plt
import sys
import os
from PIL import Image
from glob import glob
from tensorflow.keras.applications import EfficientNetB0
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

In [11]:
# get data path
base_path = '/Users/melissaaprilcastro/FeatherFind/Data/CUB_200_2011/images'

In [12]:
''' lets get image paths from data 
recall that data is filed so that
>DATA(file)
  > Bird Species(file)
    > image1...
    ...
    >imagek
  > Bird Species 2
    >image1
    ...
    >imagek
...

'''
image_paths = glob(f'{base_path}/**/*.jpg', recursive=True)
# print total images in data
print(f'total images: {len(image_paths)}')

total images: 11788


In [33]:
# print classes in the folder aka bird species or classifications
print(os.listdir(base_path))

path_directories = os.listdir(base_path)


['005.Crested_Auklet', '015.Lazuli_Bunting', '156.White_eyed_Vireo', '081.Pied_Kingfisher', '135.Bank_Swallow', '200.Common_Yellowthroat', '086.Pacific_Loon', '067.Anna_Hummingbird', '076.Dark_eyed_Junco', '149.Brown_Thrasher', '127.Savannah_Sparrow', '041.Scissor_tailed_Flycatcher', '141.Artic_Tern', '082.Ringed_Kingfisher', '099.Ovenbird', '013.Bobolink', '104.American_Pipit', '023.Brandt_Cormorant', '168.Kentucky_Warbler', '072.Pomarine_Jaeger', '040.Olive_sided_Flycatcher', '185.Bohemian_Waxwing', '144.Common_Tern', '034.Gray_crowned_Rosy_Finch', '126.Nelson_Sharp_tailed_Sparrow', '163.Cape_May_Warbler', '043.Yellow_bellied_Flycatcher', '084.Red_legged_Kittiwake', '089.Hooded_Merganser', '116.Chipping_Sparrow', '029.American_Crow', '.DS_Store', '035.Purple_Finch', '026.Bronzed_Cowbird', '114.Black_throated_Sparrow', '188.Pileated_Woodpecker', '108.White_necked_Raven', '158.Bay_breasted_Warbler', '154.Red_eyed_Vireo', '047.American_Goldfinch', '162.Canada_Warbler', '102.Western_Wood

In [28]:
# totoal folders or classification categories. there should be 200, but more may be counted if hidden items/folders are present
print(f'number of folders aka bird species/classification categories not excluding hidden folders: {len(os.listdir(base_path))}')

'''you can avoid counting hidden folder witjh code below but it is the same
items = [item for item in os.listdir(base_path) if not item.startswith('.')]
print(len(items))
'''
items = [item for item in os.listdir(base_path) if not item.startswith('.')]
print(f'number of folders (aka bird species) after making sure not to count hidden items: {len(items)}')


number of folders aka bird species/classification categories not excluding hidden folders: 201
number of folders (aka bird species) after making sure not to count hidden items: 200


In [52]:
'''here we will make a dataframe and get each image path and include it as well as the label or bird classification'''
# this empty list will store labels and image paths
data = []

# loop through each category folder in base directory
for label in os.listdir(base_path):
    label_path = os.path.join(base_path, label)
    
    # check if it's a directory (skip if it's a file or hidden folder)
    if os.path.isdir(label_path):
        # loop through each img file in the category folder
        for image_file in os.listdir(label_path):
            # create the full path to the image
            image_path = os.path.join(label_path, image_file)
            
            # then add image path and label to the data list
            data.append((label, image_path))  # Keep the order: label first, then image path

# make dataframe with the right data
df = pd.DataFrame(data, columns=['label', 'image_path'])
df

Unnamed: 0,label,image_path
0,005.Crested_Auklet,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
1,005.Crested_Auklet,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
2,005.Crested_Auklet,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
3,005.Crested_Auklet,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
4,005.Crested_Auklet,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
...,...,...
11783,054.Blue_Grosbeak,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
11784,054.Blue_Grosbeak,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
11785,054.Blue_Grosbeak,/Users/melissaaprilcastro/FeatherFind/Data/CUB...
11786,054.Blue_Grosbeak,/Users/melissaaprilcastro/FeatherFind/Data/CUB...


In [63]:
# lets cheeck the minimum number of iimages for one species as we need to amke sure the disparity isnt too large to have a balanced data ser
print(f'{df['label'].value_counts().min()}\n')
# lets check the number of img for each label
print(f'{df['label'].value_counts()}')

41

label
137.Cliff_Swallow        60
128.Seaside_Sparrow      60
143.Caspian_Tern         60
014.Indigo_Bunting       60
092.Nighthawk            60
                         ..
105.Whip_poor_Will       49
008.Rhinoceros_Auklet    48
018.Spotted_Catbird      45
005.Crested_Auklet       44
006.Least_Auklet         41
Name: count, Length: 200, dtype: int64


In [13]:
''' using efficientNet to fine tune for bird classification

This model takes input images of shape (224, 224, 3), and the input data should be in the range [0, 255]. 

Normalization is included as part of the model.

libraries :
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
'''

'''from my understanding, include_top = False is usually for fine tunning the efficientNet model'''

base_model = EfficientNetB0(weights='imagenet', include_top = False)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [34]:
# lets make the data into a dataframe for easy visual
data = pd.DataFrame(base_path)

ValueError: DataFrame constructor not properly called!