In [7]:
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.preprocessing.image import img_to_array, load_img

In [16]:
# Function to preprocess and load a grayscale image 
# Attention: When I want smaller images like 64x64 I need to consider this in VGG16 - default is 224x224x3 with 3 channels for RGB

def preprocess_grayscale_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Load image as grayscale
    image = cv2.resize(image, (64, 64))  # Resize image to 64x64
    image = np.stack((image,)*3, axis=-1)  # Convert to 3-channel RGB
    image = image.astype('float32')
    return image

# Function to extract features 
def extract_features(image_path):
    image = preprocess_grayscale_image(image_path)
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    image = tf.keras.applications.vgg16.preprocess_input(image)  # Preprocess image
    features = model.predict(image, verbose=0)
    return features.flatten()

To use VGG16 with 64x64 grayscale images, you can follow these steps:
1. Convert Grayscale to RGB: Duplicate the grayscale channel to create a 3-channel RGB image.
2. Resize to 64x64
3. Modify the VGG16 Architecture to handle 64x64 input size because the default is different (224x224x3 with 3 channels for RGB). 

In [9]:
# Get the labels of the glaciers for the measurements
file_20_grid = r'n0_metadata19_hmineq0.0_tmin20050000_mean_grid_20.csv'
data_20_grid = pd.read_csv(file_20_grid, low_memory=False)
RGIId_names = np.unique(data_20_grid['RGIId'])

In [10]:
input_layer = Input(shape=(64, 64, 3))

# Load the VGG16 model with weights, excluding the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_layer)

# Create a model that outputs the features from the last convolutional block
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

In [17]:
features_list = []
path = r'glacier_geometries_images_small/'

for name in tqdm(RGIId_names, desc="Processing images", unit="image"):
    image_path = path + name + '.png'
    features = extract_features(image_path)
    features_list.append(features)

Processing images: 100%|████████████████████████████████████████████████████████| 2101/2101 [08:06<00:00,  4.32image/s]


In [19]:
picture_features = []
for name in tqdm(data_20_grid['RGIId'], desc="Create Features for Measurements", unit="measurement"):
    for i in range(len(RGIId_names)):
        if name == RGIId_names[i]:
            picture_features.append(features_list[i])  

100%|██████████████████████████████████████████████████████████████████████████| 73111/73111 [00:22<00:00, 3234.24it/s]


In [20]:
# create dataframe
features_df = pd.DataFrame(picture_features)
features_df.columns = [f'pic_flat_feat_{i+1}' for i in range(features_df.shape[1])]
df = pd.concat([data_20_grid, features_df], axis=1)
df.to_csv("n0_wpics_feat_metadata19_hmineq0.0_tmin20050000_mean_grid_20.csv", index=False)