# Run Experiments

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import cv2
from skimage import feature, exposure
import colorsys
import os
import random
from tqdm import tqdm
from PIL import Image

from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

import torch
from torch import nn
import torchvision.models as models
from torchvision.models import resnet101, ResNet101_Weights
from torchvision.models import vgg16, VGG16_Weights
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
from torchvision import transforms
from pytorch_model_summary import summary

In [2]:
from utils import *
from HOG_and_DAISY_feature_extraction_methods import *

## Split the data into Train (and Validation) and a Test set

In [3]:
# # load all 30 classes, 220 images per class
# X, Y, idx_to_cl = load_dataset('data', cl_limit=30, img_limit=220)

# # train test split at 80-20 ratio
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, shuffle=True, random_state=42)

# print(f"Data shape: {X.shape}")
# print(f"Data labels shape: {Y.shape}\n")
# print(f"X Train shape: {X_train.shape}")
# print(f"Y Train shape: {Y_train.shape}")
# print(f"X Test shape: {X_test.shape}")
# print(f"Y Test shape: {Y_test.shape}")

# # save all as np
# folder_path = Path('train_test_split')
# folder_path.mkdir(parents=True, exist_ok=True)

# with open(folder_path / 'X_train.npy', 'wb') as f:
#     np.save(f, X_train)
# with open(folder_path / 'X_test.npy', 'wb') as f:
#     np.save(f, X_test)
# with open(folder_path / 'Y_train.npy', 'wb') as f:
#     np.save(f, Y_train)
# with open(folder_path / 'Y_test.npy', 'wb') as f:
#     np.save(f, Y_test)

In [4]:
# load in train and test data
folder_path = Path('train_test_split')

X_train = np.load(folder_path / 'X_train.npy')
Y_train = np.load(folder_path / 'Y_train.npy')
X_test = np.load(folder_path / 'X_test.npy')
Y_test = np.load(folder_path / 'Y_test.npy')

print(f"X Train shape: {X_train.shape}")
print(f"Y Train shape: {Y_train.shape}")
print(f"X Test shape: {X_test.shape}")
print(f"Y Test shape: {Y_test.shape}")

X Train shape: (5280, 600, 600, 3)
Y Train shape: (5280,)
X Test shape: (1320, 600, 600, 3)
Y Test shape: (1320,)


In [5]:
def process_features():
    """ """
    features, features_idxs = apply_features(X=X_train, feature_functions = {
        'temp': extract_small_brightness_hog_features,
    })
    
    # save features
    feature_folder_path = Path('features')

    with open(feature_folder_path / 'train_small_brightness_HOG_features.npy', 'wb') as f:
        np.save(f, features)
        
# process_features()

## Plan
 - PCA variance explained
    - load in all features 
    - plot variance explained together
 - Pick a final feature set
    - do PCA
    - do t-SNE

In [6]:
def load_features():
    """ """
    feature_dict = {}
    features_dir = Path('features')
    for path in Path('features').iterdir():
        feature_name = path.stem
        feature_data = np.load(path)
        feature_dict[feature_name] = feature_data
    return feature_dict

In [7]:
feature_dict = load_features()

In [8]:
# # features scaling
# scaler = StandardScaler()
# features_train_scaled = scaler.fit_transform(multiple_features) # replace input with whatever experiments we are doing
# # TODO: save this scaler for final features only

In [9]:
# # PCA for features, use 95% explained variance for now
# pca = PCA(n_components=0.95)
# train_transformed = pca.fit_transform(features_train_scaled)
# print(train_transformed.shape)

# # TODO: save this pca model for final features

## Plan
 - iterate throught the feature_dict
    - for each feature set, train a Log Reg model
    - report accuracy, f1-weighted and time info

In [10]:
def try_models(feature_dictionary, Y_train):
    """ """
    results_dict = {}
    for feature_name, feature_data in tqdm(feature_dictionary.items()):
        lr = LogisticRegression(
            penalty= "l2",
            tol = 1e-4,
            C=1.0,
            solver="lbfgs",
            max_iter=150,
            multi_class="multinomial",
            n_jobs=-1,
        )

        # replace experiment with whatever feature we are testing
        lr_model = cross_validate(lr, feature_data, Y_train, scoring=('f1_weighted','accuracy'), cv=10, n_jobs=-1)
        
        results_dict[feature_name] = lr_model
    
    return results_dict

In [11]:
results_dict = try_models(feature_dict, Y_train)

100%|██████████| 22/22 [30:20<00:00, 82.74s/it] 


In [16]:
feature_names_list = []
fit_time_list = []
score_time_list = []
f1_list = []
accuracy_list = []

for feature_name, info_dict in results_dict.items():
   
    feature_names_list.append(feature_name)
    fit_time_list.append(np.mean(info_dict['fit_time']))
    score_time_list.append(np.mean(info_dict['score_time']))
    f1_list.append(np.mean(info_dict['test_f1_weighted']))
    accuracy_list.append(np.mean(info_dict['test_accuracy']))

In [20]:
fill_dict = {
    "Feature_Name": feature_names_list,
    "Accuracy": accuracy_list,
    "F1_Score": f1_list,
    "Train_Time": fit_time_list,
    "Evaluate_Time": score_time_list,
}

results_df = pd.DataFrame(fill_dict)
results_df.sort_values(by=["Accuracy"], ascending=False)

Unnamed: 0,Feature_Name,Accuracy,F1_Score,Train_Time,Evaluate_Time
13,train_ResNet_features,0.917235,0.917039,54.139675,0.015045
4,train_EffNet_features,0.907576,0.907905,33.202282,0.008705
19,train_small_hue_HOG_features,0.310985,0.305891,217.378017,0.03706
21,train_small_saturation_HOG_features,0.309659,0.296947,208.478318,0.04391
10,train_large_hue_HOG_features,0.302652,0.296977,23.463807,0.006006
18,train_small_green_HOG_features,0.302083,0.287884,220.557816,0.041026
17,train_small_gray_HOG_features,0.298864,0.28606,213.320857,0.046007
16,train_small_brightness_HOG_features,0.298106,0.283539,215.857707,0.046792
20,train_small_red_HOG_features,0.29697,0.284441,215.376529,0.048306
15,train_small_blue_HOG_features,0.295076,0.279959,219.546621,0.043214


In [21]:
results_df.to_csv("Initial_Features_LogReg_results.csv")

## Features to Join
 - RGB, HSV
 - HOG (Small, Red), HOG (Large, Red), HOG (Small, Green), HOG (Large, Green), HOG (Small, Blue), HOG (Large, Blue)
 - HOG (Small, Gray), HOG (Large, Gray), HOG (Small, Hue), HOG (Large, Hue), HOG (Small, Saturation), HOG (Large, Saturation), HOG (Small, Brightness), HOG (Large, Brightness)
 - DAISY (Red), DAISY (Green), DAISY (Blue)
 - RGB, HSV, DAISY (Gray)
 - RGB, HSV, DAISY (Gray), ResNet

In [23]:
for name in feature_dict.keys():
    print(name)

train_daisy_blue_features
train_daisy_gray_features
train_daisy_green_features
train_daisy_red_features
train_EffNet_features
train_HSV_features
train_large_blue_HOG_features
train_large_brightness_HOG_features
train_large_gray_HOG_features
train_large_green_HOG_features
train_large_hue_HOG_features
train_large_red_HOG_features
train_large_saturation_HOG_features
train_ResNet_features
train_RGB_features
train_small_blue_HOG_features
train_small_brightness_HOG_features
train_small_gray_HOG_features
train_small_green_HOG_features
train_small_hue_HOG_features
train_small_red_HOG_features
train_small_saturation_HOG_features


In [31]:
RGB_DAISY_features = np.hstack((feature_dict['train_daisy_red_features'], feature_dict['train_daisy_green_features'], feature_dict['train_daisy_blue_features']))
RGB_DAISY_features.shape

(5280, 5400)

In [32]:
red_hog_features = np.hstack((feature_dict['train_small_red_HOG_features'], feature_dict['train_large_red_HOG_features']))
red_hog_features.shape

(5280, 9000)

In [33]:
green_hog_features = np.hstack((feature_dict['train_small_green_HOG_features'], feature_dict['train_large_green_HOG_features']))
green_hog_features.shape

(5280, 9000)

In [34]:
blue_hog_features = np.hstack((feature_dict['train_small_blue_HOG_features'], feature_dict['train_large_blue_HOG_features']))
blue_hog_features.shape

(5280, 9000)

In [35]:
gray_hog_features = np.hstack((feature_dict['train_small_gray_HOG_features'], feature_dict['train_large_gray_HOG_features']))
gray_hog_features.shape

(5280, 9000)

In [36]:
hue_hog_features = np.hstack((feature_dict['train_small_hue_HOG_features'], feature_dict['train_large_hue_HOG_features']))
hue_hog_features.shape

(5280, 9000)

In [37]:
saturation_hog_features = np.hstack((feature_dict['train_small_saturation_HOG_features'], feature_dict['train_large_saturation_HOG_features']))
saturation_hog_features.shape

(5280, 9000)

In [38]:
brightness_hog_features = np.hstack((feature_dict['train_small_brightness_HOG_features'], feature_dict['train_large_brightness_HOG_features']))
brightness_hog_features.shape

(5280, 9000)

In [39]:
RGB_HSV_features = np.hstack((feature_dict['train_RGB_features'], feature_dict['train_HSV_features']))
RGB_HSV_features.shape

(5280, 12)

In [40]:
# combo hog RGB
hog_RGB_features = np.hstack((red_hog_features, green_hog_features, blue_hog_features))
hog_RGB_features.shape

(5280, 27000)

In [41]:
# combo hog gray + HSV
hog_gray_HSV_features = np.hstack((gray_hog_features, hue_hog_features, saturation_hog_features, brightness_hog_features))
hog_gray_HSV_features.shape

(5280, 36000)

In [42]:
# RGB, HSV, ResNet
RGB_HSV_ResNet_features = np.hstack((RGB_HSV_features, feature_dict['train_ResNet_features']))
RGB_HSV_ResNet_features.shape

(5280, 2060)

In [43]:
# RGB, HSV, ResNet, Gray HOG
RGB_HSV_ResNet_GrayHOG_features = np.hstack((RGB_HSV_ResNet_features, gray_hog_features))
RGB_HSV_ResNet_GrayHOG_features.shape

(5280, 11060)

In [48]:
new_feature_combos_dict = {
    "DAISY (RGB)": RGB_DAISY_features,
    "HOG Multi-Scale (Red)": red_hog_features,
    "HOG Multi-Scale (Green)": green_hog_features,
    "HOG Multi-Scale (Blue)": blue_hog_features,
    "HOG Multi-Scale (Gray)": gray_hog_features,
    "HOG Multi-Scale (Hue)": hue_hog_features,
    "HOG Multi-Scale (Saturation)": saturation_hog_features,
    "HOG Multi-Scale (Brightness)": brightness_hog_features,
    # "RGB + HSV": RGB_HSV_features,
    "HOG Multi-Scale (RGB)": hog_RGB_features,
    "HOG Multi-Scale (Gray+HSV)": hog_gray_HSV_features,
    "RGB + HSV + ResNet": RGB_HSV_ResNet_features,
    "RGB + HSV + ResNet + HOG (Gray)": RGB_HSV_ResNet_GrayHOG_features,
}

In [49]:
def get_PCA(X_list, n_components=2):
    pca_list = []
    xpca_list = []
    for X in X_list:
        pca = PCA(n_components=n_components).fit(X)
        X_pca = pca.transform(X)
        pca_list.append(pca)
        xpca_list.append(X_pca)
    return pca_list, xpca_list

def plot_PCA(X_list, labels, n_components=2):
    pca_list, xpca_list = get_PCA(X_list, n_components=n_components)
    plt.figure(figsize=(15,5))
    #colors = ['b-', 'm-', 'k-']
    for i in range(len(X_list)):
        # plt.plot(np.cumsum(pca_list[i].explained_variance_ratio_), colors[i], label=labels[i])
        plt.plot(np.cumsum(pca_list[i].explained_variance_ratio_), label=labels[i])
    plt.yticks(np.linspace(0, 1, 8))
    plt.xlabel('Number of components')
    plt.ylabel('Explained Variances')
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    #plt.savefig(r"c:\Users\grays\Repositories\281 Computer Vision\w281-final\plots\ResNet_EffNet_PCA_explained_200.png")
    plt.show()

In [50]:
new_feature_list = list(new_feature_combos_dict.values())
new_feature_names = list(new_feature_combos_dict.keys())

In [51]:
x = plot_PCA(X_list=new_feature_list, labels=new_feature_names, n_components=500)