In [4]:
import cv2 as cv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn import svm
from sklearn import metrics
from skimage.feature import hog

<h1>tasks</h1>
<p>Image normalization: consistent resolution, normalizing pixel values <br>
data augmentationL: e.g. randomly applying rotation <br>
feature extraction

</p>

In [13]:
desired_height = 1000
desired_width = 1000


parent_folder_path = r"raw data"
output_parent_folder = r"processed data"

def resize_image(image, new_width, new_height):
    height, width = image.shape[:2]
    aspect_ratio = width / float(height)
   
    if new_width / aspect_ratio <= new_height:
        resized_width = new_width
        resized_height = int(new_width / aspect_ratio)
    else:
        resized_width = int(new_height * aspect_ratio)
        resized_height = new_height
   
    resized_image = cv.resize(image, (resized_width, resized_height))
   
    return resized_image

def normalize_image(image):
    image = image.astype(np.float32)
    min_val = np.min(image)
    max_val = np.max(image)
   
    normalized_image = (image - min_val) / (max_val - min_val)
   
    return normalized_image

for subfolder_name in os.listdir(parent_folder_path):
    subfolder_path = os.path.join(parent_folder_path, subfolder_name)

    # Check if the item in the parent folder is a subfolder
    if os.path.isdir(subfolder_path):
        # Create a new directory
        new_name = "processed_" + subfolder_name
        output_folder_path = os.path.join(output_parent_folder, new_name)
        os.makedirs(output_folder_path, exist_ok=True)


        for filename in os.listdir(subfolder_path):

            if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_path = os.path.join(subfolder_path, filename)
                image = cv.imread(image_path)

                resized_image = resize_image(image, desired_width, desired_height)
                normalized_image = normalize_image(resized_image)
               
                # Save the image
                output_path = os.path.join(output_folder_path, filename)
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                cv.imwrite(output_path, normalized_image*255)


def augment_images(rotation_angle, scale_factor, translation_x, translation_y, num):
    print("function ran")
    for subfolder_name in os.listdir(parent_folder_path):
        subfolder_path = os.path.join(parent_folder_path, subfolder_name)
   
        # Check if the item in the parent folder is a subfolder
        if os.path.isdir(subfolder_path):
            # Create a new directory
            new_name = "processed_" + subfolder_name
            output_folder_path = os.path.join(output_parent_folder, new_name)
            os.makedirs(output_folder_path, exist_ok=True)
   
   
            for filename in os.listdir(subfolder_path):
   
                if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                    image_path = os.path.join(subfolder_path, filename)
                    image = cv.imread(image_path)
   
                    resized_image = resize_image(image, desired_width, desired_height)
                    normalized_image = normalize_image(resized_image)
                   
                    # Save the image
                    output_path = os.path.join(output_folder_path, f"rotated{filename}")
                   
                    rotation_matrix = cv.getRotationMatrix2D((normalized_image.shape[1] / 2, normalized_image.shape[0] / 2), rotation_angle, 1)
                    augmented_image = cv.warpAffine(normalized_image, rotation_matrix, (normalized_image.shape[1], normalized_image.shape[0]))
                    augmented_image = cv.resize(augmented_image, None, fx=scale_factor, fy=scale_factor)
                    cv.imwrite(output_path, augmented_image*255)  

                    output_path = os.path.join(output_folder_path, f"translated{filename}")
                    translation_matrix = np.float32([[1, 0, translation_x], [0, 1, translation_y]])
                    augmented_image = cv.warpAffine(augmented_image, translation_matrix, (normalized_image.shape[1], normalized_image.shape[0]))
                    cv.imwrite(output_path, augmented_image*255)  

                    output_path = os.path.join(output_folder_path, f"translated2{filename}")
                    augmented_image = cv.warpAffine(normalized_image, translation_matrix, (normalized_image.shape[1], normalized_image.shape[0]))  
                    cv.imwrite(output_path, augmented_image*255)
                    
    print("function finish")
augment_images(60, 1.3, -10, -5, 1)
augment_images(100, 0.8, 30, 10, 2)
augment_images(140, 0.9, 15, -3, 3)
augment_images(170, 0.8, -8, 3, 4)
augment_images(300, 1.1, 5, 7, 5)

function ran
function finish
function ran
function finish
function ran
function finish
function ran
function finish
function ran
function finish


<h1>Classes and their corresponding numbers</h1>
0 - earth <br>
1 - jupiter <br>
2 - mars<br>
3 - mercury<br>
4 - neptune<br>
5 - saturn<br>
6 - uranus<br>
7 - venus<br>

In [17]:

def extract_edge_features(image):
    gray = cv.cvtColor(image, cv.COLOR_RGB2GRAY)
    edges = cv.Canny(gray, 100, 200)
    edges_flat = edges.flatten()
    
    return edges_flat

def extract_hog_features(image):
    gray = cv.cvtColor(image, cv.COLOR_RGB2GRAY)
    
    # Calculate HOG features for each color channel
    hog_features = []
    for channel in range(image.shape[2]):
        channel_features = hog(gray, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), transform_sqrt=True, block_norm='L2-Hys')
        hog_features.append(channel_features)
    
    # Concatenate the features
    features = np.concatenate(hog_features)
    
    return features


def pad_features(features):
    desired_length = 16777216 #current max length of array from extract color histogram function
    current_length = len(features)
    
    if current_length < desired_length:
        padding_length = desired_length - current_length
        padded_features = np.pad(features, (0, padding_length), mode='constant')
    else:
        padded_features = features[:desired_length]
    
    return padded_features



def extract_color_histogram(image):
    chans = cv.split(image)
    colors = ("b", "g", "r")

    arr=[]
    for (chan, color) in zip(chans, colors):
        hist = cv.calcHist([chan], [0], None, [256], [0, 256])
        norm_hist = hist/ np.linalg.norm(hist)
        c_hist = cv.normalize(hist,hist,8,cv.NORM_MINMAX)
        arr.append(np.array(c_hist))
    arr = np.array(arr)
    return arr.flatten()
   

parent_folder_path = r"processed data"
features_folder = r"features"
labels_folder = r"labels"

i=0

for subfolder_name in os.listdir(parent_folder_path):

    print(f"scanning {subfolder_name}")

    features_folder_path = os.path.join(features_folder, subfolder_name) 
    labels_folder_path = os.path.join(labels_folder, subfolder_name)
    
    os.makedirs(features_folder_path, exist_ok=True)
    os.makedirs(labels_folder_path, exist_ok=True)

    subfolder_path = os.path.join(parent_folder_path, subfolder_name)
    for filename in os.listdir(subfolder_path):

            if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_path = os.path.join(subfolder_path, filename)
                image = cv.imread(image_path)

                color_histogram_features = extract_color_histogram(image)
                # hog_features = pad_features(extract_hog_features(image))
                # edge_features = pad_features(extract_edge_features(image))
                hog_features = extract_hog_features(image)
                edge_features = extract_edge_features(image)
                
                combined_features = np.concatenate((color_histogram_features, hog_features, edge_features), axis=0)
                labels_arr = np.array([i])
                
                file_name = filename[:-4] + ".txt"
                features_output_path = os.path.join(features_folder_path, file_name)
                labels_output_path = os.path.join(labels_folder_path, file_name)
                np.savetxt(features_output_path, combined_features)
                
                np.savetxt(labels_output_path, labels_arr)
                print("savetxt")
                # labels.append(subfolder_name)
    i+=1
print("finish")
print("new run")

scanning earth
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
savetxt
s

In [None]:
np.savetxt("features.txt", features)
np.savetxt("labels.txt", labels)
