In [50]:
import os
import numpy as np
import pandas as pd
import cv2
import pickle

import csv
from sklearn.cluster import KMeans
from keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.utils import img_to_array  # Updated import
from keras.models import Model

In [8]:
# Path to your image dataset (200 folders)
IMAGE_DIR = os.path.join('..', 'Dataset', 'CUB_Dataset', 'CUB_Dataset', 'CUB_200_2011', 'images')
image_folder_path = IMAGE_DIR

# Parameters
IMG_SIZE = (224, 224)  # Image size for the CNN
N_CLUSTERS = 2 # Number of clusters

# Load pre-trained VGG16 model and remove the top layers
base_model = VGG16(weights='imagenet')
vgg_model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)


In [9]:
# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, IMG_SIZE)  # Resize image
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = preprocess_input(image)  # Preprocess for VGG16
    return image

In [30]:
%%time
def extract_features_from_image():
    
    # Extract features from images
    image_paths, features, class_labels, class_number = [], [], [], []

    for class_folder in os.listdir(image_folder_path):
        class_folder_path = os.path.join(image_folder_path, class_folder)
        # print(class_folder_path, class_folder_path.split('\\')[-1].split('.')[-1].replace('_', ' '))
        print(class_folder_path.split('\\')[-1])
        bird_lable = class_folder_path.split('\\')[-1].split('.')[-1].replace('_', ' ')
        bird_number = class_folder_path.split('\\')[-1].split('.')[0]

        # Ensure it's a directory
        if os.path.isdir(class_folder_path):
            for image_name in os.listdir(class_folder_path):
                image_path = os.path.join(class_folder_path, image_name)
                image_paths.append(image_path)  # Save the image path

                # Load and preprocess image
                image = load_and_preprocess_image(image_path)

                # Extract features
                feature = vgg_model.predict(image)
                features.append(feature.flatten())  # Flatten the feature array
                class_labels.append(bird_lable)
                class_number.append(bird_number)
                
    return (image_paths, features, class_labels, class_number)
    

CPU times: total: 0 ns
Wall time: 1.95 ms


In [47]:
def save_df(extract_features):
    features = np.array(extract_features[1])
    image_paths = np.array(extract_features[0])
    class_number = np.array(extract_features[3])
    class_labels = np.array(extract_features[2])

    df = pd.DataFrame(features)
    df['class_number'] = class_number
    df['class_labels'] = class_labels
    df['image_paths'] = image_paths
    display(df.head())
    
    CSV_PATH = os.path.join('..', 'Dataset', 'images_extract_features.csv')
    df.to_csv(CSV_PATH, index=False)
    
    print(f"File saved to : {CSV_PATH}")
    
def train_kmean_model(N_CLUSTERS, features):
    
    # Apply K-Means clustering
    kmeans = KMeans(n_clusters = N_CLUSTERS, random_state = 42)
    # clusters = kmeans.fit_predict(features) 
    # note : she using fit_predict which is wrong in this case need to use fit to train model
    kmeans.fit(features)
    
    return kmeans

# Function to save the model
def save_model(model, file_path):
    
    with open(file_path, 'wb') as f:
        pickle.dump(model, f)
    print(f"Model saved to {file_path}")

# Function to load the model
def load_model(file_path):
    """
    Loads a model from a file.
    :param file_path: str, path of the saved model
    :return: loaded model
    """
    with open(file_path, 'rb') as f:
        model = pickle.load(f)
    print(f"Model loaded from {file_path}")
    return model

# Function to predict with the loaded model
def predict(model, data):
    """
    Predicts the cluster labels for the given data using the model.
    :param model: trained KMeans model
    :param data: ndarray, data to predict
    :return: ndarray, cluster labels
    """
    return model.predict(data)

## I have few querys ? 

1. How can we that much sure about the split ?
2. What if : it not split / cluster properly ? (Need to find out)

## Lest find 

In [32]:
%%time
extract_features = extract_features_from_image()
print(len(extract_features))

001.Black_footed_Albatross
002.Laysan_Albatross
003.Sooty_Albatross


004.Groove_billed_Ani
005.Crested_Auklet
006.Least_Auklet


007.Parakeet_Auklet
008.Rhinoceros_Auklet
009.Brewer_Blackbird


010.Red_winged_Blackbird
011.Rusty_Blackbird
012.Yellow_headed_Blackbird


013.Bobolink
014.Indigo_Bunting


015.Lazuli_Bunting
016.Painted_Bunting
017.Cardinal


018.Spotted_Catbird
019.Gray_Catbird
020.Yellow_breasted_Chat


021.Eastern_Towhee
022.Chuck_will_Widow


023.Brandt_Cormorant
024.Red_faced_Cormorant
025.Pelagic_Cormorant


026.Bronzed_Cowbird
027.Shiny_Cowbird
028.Brown_Creeper


029.American_Crow
030.Fish_Crow


031.Black_billed_Cuckoo
032.Mangrove_Cuckoo
033.Yellow_billed_Cuckoo


034.Gray_crowned_Rosy_Finch
035.Purple_Finch


036.Northern_Flicker
037.Acadian_Flycatcher
038.Great_Crested_Flycatcher


039.Least_Flycatcher
040.Olive_sided_Flycatcher
041.Scissor_tailed_Flycatcher


042.Vermilion_Flycatcher
043.Yellow_bellied_Flycatcher


044.Frigatebird
045.Northern_Fulmar
046.Gadwall


047.American_Goldfinch
048.European_Goldfinch


049.Boat_tailed_Grackle
050.Eared_Grebe
051.Horned_Grebe


052.Pied_billed_Grebe
053.Western_Grebe


054.Blue_Grosbeak
055.Evening_Grosbeak
056.Pine_Grosbeak


057.Rose_breasted_Grosbeak
058.Pigeon_Guillemot


059.California_Gull
060.Glaucous_winged_Gull
061.Heermann_Gull


062.Herring_Gull
063.Ivory_Gull
064.Ring_billed_Gull


065.Slaty_backed_Gull
066.Western_Gull


067.Anna_Hummingbird
068.Ruby_throated_Hummingbird
069.Rufous_Hummingbird


070.Green_Violetear
071.Long_tailed_Jaeger


072.Pomarine_Jaeger
073.Blue_Jay
074.Florida_Jay


075.Green_Jay
076.Dark_eyed_Junco
077.Tropical_Kingbird


078.Gray_Kingbird
079.Belted_Kingfisher


080.Green_Kingfisher
081.Pied_Kingfisher
082.Ringed_Kingfisher


083.White_breasted_Kingfisher
084.Red_legged_Kittiwake


085.Horned_Lark
086.Pacific_Loon
087.Mallard


088.Western_Meadowlark
089.Hooded_Merganser


090.Red_breasted_Merganser
091.Mockingbird
092.Nighthawk


093.Clark_Nutcracker
094.White_breasted_Nuthatch


095.Baltimore_Oriole
096.Hooded_Oriole
097.Orchard_Oriole


098.Scott_Oriole
099.Ovenbird
100.Brown_Pelican


101.White_Pelican
102.Western_Wood_Pewee


103.Sayornis
104.American_Pipit
105.Whip_poor_Will


106.Horned_Puffin
107.Common_Raven
108.White_necked_Raven


109.American_Redstart
110.Geococcyx


111.Loggerhead_Shrike
112.Great_Grey_Shrike
113.Baird_Sparrow


114.Black_throated_Sparrow
115.Brewer_Sparrow


116.Chipping_Sparrow
117.Clay_colored_Sparrow
118.House_Sparrow


119.Field_Sparrow
120.Fox_Sparrow
121.Grasshopper_Sparrow


122.Harris_Sparrow
123.Henslow_Sparrow


124.Le_Conte_Sparrow
125.Lincoln_Sparrow
126.Nelson_Sharp_tailed_Sparrow


127.Savannah_Sparrow
128.Seaside_Sparrow


129.Song_Sparrow
130.Tree_Sparrow
131.Vesper_Sparrow


132.White_crowned_Sparrow
133.White_throated_Sparrow


134.Cape_Glossy_Starling
135.Bank_Swallow
136.Barn_Swallow


137.Cliff_Swallow
138.Tree_Swallow


139.Scarlet_Tanager
140.Summer_Tanager
141.Artic_Tern


142.Black_Tern
143.Caspian_Tern
144.Common_Tern


145.Elegant_Tern
146.Forsters_Tern


147.Least_Tern
148.Green_tailed_Towhee
149.Brown_Thrasher


150.Sage_Thrasher
151.Black_capped_Vireo


152.Blue_headed_Vireo
153.Philadelphia_Vireo
154.Red_eyed_Vireo


155.Warbling_Vireo
156.White_eyed_Vireo
157.Yellow_throated_Vireo


158.Bay_breasted_Warbler
159.Black_and_white_Warbler


160.Black_throated_Blue_Warbler
161.Blue_winged_Warbler
162.Canada_Warbler


163.Cape_May_Warbler
164.Cerulean_Warbler


165.Chestnut_sided_Warbler
166.Golden_winged_Warbler
167.Hooded_Warbler


168.Kentucky_Warbler
169.Magnolia_Warbler


170.Mourning_Warbler
171.Myrtle_Warbler
172.Nashville_Warbler


173.Orange_crowned_Warbler
174.Palm_Warbler
175.Pine_Warbler


176.Prairie_Warbler
177.Prothonotary_Warbler


178.Swainson_Warbler
179.Tennessee_Warbler
180.Wilson_Warbler


181.Worm_eating_Warbler
182.Yellow_Warbler


183.Northern_Waterthrush
184.Louisiana_Waterthrush
185.Bohemian_Waxwing


186.Cedar_Waxwing
187.American_Three_toed_Woodpecker
188.Pileated_Woodpecker


189.Red_bellied_Woodpecker
190.Red_cockaded_Woodpecker


191.Red_headed_Woodpecker
192.Downy_Woodpecker
193.Bewick_Wren


194.Cactus_Wren
195.Carolina_Wren


196.House_Wren
197.Marsh_Wren
198.Rock_Wren


199.Winter_Wren
200.Common_Yellowthroat


4
CPU times: total: 3h 24min 52s
Wall time: 1h 10min 31s


In [48]:
%%time
# Save dataframe
save_df(extract_features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4089,4090,4091,4092,4093,4094,4095,class_number,class_labels,image_paths
0,3.042403,2.884922,0.0,7.585155,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Black footed Albatross,..\Dataset\CUB_Dataset\CUB_Dataset\CUB_200_201...
1,0.0,0.0,9.547824,5.867018,0.0,0.0,0.0,0.0,0.547732,0.614757,...,0.0,0.0,0.0,13.213732,0.0,0.0,0.0,1,Black footed Albatross,..\Dataset\CUB_Dataset\CUB_Dataset\CUB_200_201...
2,4.043432,0.0,0.0,1.765985,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Black footed Albatross,..\Dataset\CUB_Dataset\CUB_Dataset\CUB_200_201...
3,0.0,0.643451,0.0,11.106439,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.763441,2.361611,0.0,0.0,1,Black footed Albatross,..\Dataset\CUB_Dataset\CUB_Dataset\CUB_200_201...
4,4.628634,3.541798,0.0,2.993902,0.0,0.0,0.0,0.0,0.0,0.0,...,6.767929,0.0,0.0,1.820253,0.0,0.0,0.0,1,Black footed Albatross,..\Dataset\CUB_Dataset\CUB_Dataset\CUB_200_201...


File saved to : ..\Dataset\images_extract_features.csv




KMeans(n_clusters=2, random_state=42)


In [49]:
# Convert features to numpy array
features = np.array(extract_features[1])

# Train Kmean model
clusters = train_kmean_model(N_CLUSTERS, features)
print(clusters)



KMeans(n_clusters=2, random_state=42)
