# Imports

In [1]:
import numpy as np
import os
import json
import cv2
from skimage.feature import hog
import tarfile
from utils import *

# Helper Functions

In [2]:
# define a function that determines the mean and standard deviation of each RGB and and L*a*b*
# color-space channel for an image
def compute_channel_stats(image_path):
    # read the image
    img = cv2.imread(image_path)
    
    # convert image to L*a*b* color space
    lab_img = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    
    # compute mean and standard deviation for each color channel (RGB and L*a*b*)
    mean_rgb, std_rgb = cv2.meanStdDev(img)
    mean_lab, std_lab = cv2.meanStdDev(lab_img)
    
    # flatten the results into a feature vector
    channel_stats = np.concatenate((mean_rgb.flatten(), std_rgb.flatten(), mean_lab.flatten(), std_lab.flatten()))
    
    return channel_stats

In [3]:
# define a function that determines the hog descriptors for an image's grayscale representation
def compute_hog_stats(image_path):
    # read the image
    img = cv2.imread(image_path)
    
    # convert image to grayscale
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # compute HOG features
    fd, hog_image = hog(gray_img, orientations=4, pixels_per_cell=(32, 32), visualize=True)
    
    return fd, hog_image

In [4]:
# define a function that loops through each file to generate a dictionary that contains
# the feature vectors of all images in each class
def generate_feature_vectors(files, directory):
    feature_vectors = {}
    hog_images = {}
    
    # iterate over each file
    for class_name, file_name in files:
        # load the image
        img_path = os.path.join(directory, class_name, file_name)
        
        # compute color statistics
        channel_stats = compute_channel_stats(img_path)
        
        # compute HOG features
        hog_stats, hog_image = compute_hog_stats(img_path)
        
        # concatenate color statistics and HOG features
        stats = np.concatenate((channel_stats, hog_stats))
        
        # append feature vector to the correct class in feature_vectors
        if class_name not in feature_vectors:
            feature_vectors[class_name] = []
        feature_vectors[class_name].append(stats)
        
        # append hog_image to the correct class in hog_images
        if class_name not in hog_images:
            hog_images[class_name] = []
        hog_images[class_name].append(hog_image)
        
        
        # TODO:  add additional features to the feature_vector (SIFT, spatial frequencies, texture, others?)
    
    return feature_vectors, hog_images

In [5]:
# define a function to save the feature vector dictionary to disk
def save_feature_vectors(feature_vectors, filename):
    # convert numpy arrays to Python lists
    feature_vectors_dict = {}
    for class_name, vectors in feature_vectors.items():
        feature_vectors_dict[class_name] = [vector.tolist() for vector in vectors]
    
    # save feature_vectors_dict dictionary as JSON
    json_filename = filename.replace('.tar.gz', '.json')
    with open(json_filename, 'w') as f:
        json.dump(feature_vectors_dict, f)
    
    # create tar.gz file
    with tarfile.open(filename, 'w:gz') as tar:
        tar.add(json_filename, arcname=os.path.basename(json_filename))
    
    # remove the temporary JSON file
    os.remove(json_filename)

In [6]:
# define a function to load the feature vector dictionary from disk
def load_feature_vectors(filename):
    # extract the JSON file from the tar.gz file
    with tarfile.open(filename, 'r:gz') as tar:
        tar.extractall()
        json_filename = tar.getnames()[0]  # assuming only one file in the archive
    
    # load the JSON file and convert Python lists back to numpy arrays
    with open(json_filename, 'r') as f:
        feature_vectors_dict = json.load(f)
    
    feature_vectors = {}
    for class_name, vectors in feature_vectors_dict.items():
        feature_vectors[class_name] = [np.array(vector) for vector in vectors]
    
    # remove the extracted JSON file
    os.remove(json_filename)
    
    return feature_vectors


# Import data

In [7]:
# define file directory
directory = '../data/interim/PatternNet/PatternNet/images'

# create a list of classes considered for this project
classes = ['beach', 'chaparral', 'dense_residential', 'forest', 'freeway', 'harbor', 'overpass', 'parking_space', 'river', 'swimming_pool']

# define the train, val, and test sets
train_files, val_files, test_files = generate_splits(classes, directory)

train/validation/test subsets were loaded from a pre-generated file
	Number of train files: 4799
	Number of val files: 1599
	Number of test files: 1601


# Generate Feature Vectors

In [8]:
# generate the set of feature vectors for all images in each class
feature_vectors, hog_images = generate_feature_vectors(train_files, directory)

[ WARN:0@0.506] global /Users/runner/work/opencv-python/opencv-python/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('../data/interim/PatternNet/PatternNet/images/parking_space/parkingspace250.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.5.5) /Users/runner/work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [None]:
# inspections

print(type(feature_vectors))
print(feature_vectors.keys())
print(type(feature_vectors['beach']))
print(len(feature_vectors['beach']))
print(len(feature_vectors['beach'][0]))

# Save data

In [None]:
# save feature vector to disk
save_feature_vectors(feature_vectors, "../data/processed/feature_vectors_1.tar.gz")

# Import data

In [None]:
# load feature vector from disk
feature_vectors = load_feature_vectors("../data/processed/feature_vectors_1.tar.gz")

In [None]:
# inspections

print(type(feature_vectors))
print(feature_vectors.keys())
print(type(feature_vectors['beach']))
print(len(feature_vectors['beach']))
print(len(feature_vectors['beach'][0]))

In [None]:
#