# Hyperparameters Evaluation for Random Forest Classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')

## Helper Functions

The standard <code>read_data()</code> function seen throughout the project. designed to simplify and standardize the data reading process. <br/>In addition the _Global_ variables associated with its function are defined along with the relitive paths to the data files.<br/>__NOTE:__ in a business environment this function would be placed into a pythin module and imported. Here it is included into the notebook to provide full transparency. 

In [2]:
# required modules
import numpy as np
import pandas as pd
import sqlite3
from sklearn import preprocessing
import colorsys

# groups
COLOURS      = 10
PALETTES     = 11
LRG_PALETTES = 12
IMAGE        = 13
TILES        = 14

# sets
TRAIN        = 20
TEST         = 21
VALIDATE     = 22

# targets
ARTIST       = 30
GENRE        = 31
STYLE        = 32

# array
DATA         = 40
CATAGORY     = 41
CATAGORY_NUM = 42
IMAGE_TAG    = 43
TILE_INDEX   = 44

# colour systems
HLS          = 50
RGB          = 51
RAW          = 52

# an overly complicated method of defining the data file path
data_file  = {COLOURS      : {TRAIN    : {ARTIST : "./data/colour_sets/artist_train_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_train_colours.npz",
                                          STYLE  : "./data/colour_sets/style_train_colours.npz"},
                              TEST     : {ARTIST : "./data/colour_sets/artist_test_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_test_colours.npz",
                                          STYLE  : "./data/colour_sets/style_test_colours.npz"} ,
                              VALIDATE : {ARTIST : "./data/colour_sets/artist_validation_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_validation_colours.npz",
                                          STYLE  : "./data/colour_sets/style_validation_colours.npz"}},
              PALETTES     : {TRAIN    : {ARTIST : "./data/palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_validation_palettes.npz"}},
              LRG_PALETTES : {TRAIN    : {ARTIST : "./data/large_palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/large_palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/large_palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_validation_palettes.npz"}},
              IMAGE        : {TRAIN    : {ARTIST : "./data/full_image_feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/full_image_feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/full_image_feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_validation_features.npz"}},
              TILES        : {TRAIN    : {ARTIST : "./data/feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/feature_sets/style_validation_features.npz"}}}

# Create a DB connection between python and the file system
conn = sqlite3.connect("./data/database/artist.db")

# the fuction that provides a standard data read method
def read_data(data_group, data_set, data_target, data_type, colour_system = RGB):
    
    # read the data file
    data = np.load(data_file[data_group][data_set][data_target], allow_pickle=True)

    # if tile index is requested
    if   data_type    == TILE_INDEX:
        # when group is tiles, return the tile indexes
        if data_group == TILES:
            return pd.DataFrame(data["arr_3"], columns = {"tile_index"})
        # otherwise, just return an emtpy dataset
        else:
            return pd.DataFrame(columns = {"tile_index"})
    # if image tags are requested
    elif data_type == IMAGE_TAG:
        return pd.DataFrame(data["arr_2"], columns = {"image_tag"})
    # if catagory data is requested
    elif data_type == CATAGORY:
        return pd.DataFrame(data["arr_1"], columns = {"catagory"})
    # if catagory_num is requested
    elif data_type == CATAGORY_NUM:
        # select data for the appropriate lookup table
        if   data_target == ARTIST: 
            query_string = "SELECT ID, NAME  AS VALUE FROM ARTIST"
        elif data_target == GENRE:
            query_string = "SELECT ID, GENRE AS VALUE FROM GENRE"
        else:
            query_string = "SELECT ID, STYLE AS VALUE FROM STYLE" 
        # build the lookup table
        lookup = pd.read_sql_query(query_string, conn)
        # get the catagory data
        data   = pd.DataFrame(data["arr_1"], columns = {"catagory"})
        # convert the catagory text labels to numeric catagory ids
        return pd.DataFrame(data["catagory"].apply(lambda x: lookup[lookup["VALUE"] == x]["id"].values[0]))
    # data requested
    elif data_group == COLOURS:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[1] * data["arr_0"].shape[2])))
    elif data_group == PALETTES or data_group == LRG_PALETTES:

        # extract the colour data
        colour_data = data["arr_0"]
        
        # find the datas dimensions
        records, _, palette_length, palette_layers = np.shape(colour_data)
        
        # reshape
        colour_data = colour_data.reshape(records * palette_length, palette_layers)
        
        # convert to a dataframe
        colour_data_df = pd.DataFrame(colour_data, columns =["r","g","b"])
        
        # process the data with different colour systems
        
        # hue, luminance, saturation
        if colour_system == HLS:
            
            # define a dataframe to hold the hls values
            hls_data_df = pd.DataFrame(columns =["h","l","s"])

            # loop through each record
            for row in colour_data_df.iterrows():
                
                # extract the r,g,b values and devide them by 255 ready to convert
                r, g, b = row[1].values/255

                # convert the values and add them to the dataframe
                hls_data_df.loc[len(hls_data_df)] = colorsys.rgb_to_hls(r, g, b)
            
            # create datasets and normalize
            hue        = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["h"]).reshape(records, palette_length)))
            luminance  = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["l"]).reshape(records, palette_length)))
            saturation = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["s"]).reshape(records, palette_length)))  
            
            # return values
            return hue, luminance, saturation
            
        # red, green, blue unnormalized
        elif colour_system == RAW:
            red   = pd.DataFrame(np.array(colour_data_df["r"]).reshape(records, palette_length))
            green = pd.DataFrame(np.array(colour_data_df["g"]).reshape(records, palette_length))
            blue  = pd.DataFrame(np.array(colour_data_df["b"]).reshape(records, palette_length))
            
            # return values
            return red, green, blue            
            
        # red, green, blue normalized- (default)
        else:
    
            # create datasets and normalize
            red   = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["r"]).reshape(records, palette_length)))
            green = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["g"]).reshape(records, palette_length)))
            blue  = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["b"]).reshape(records, palette_length)))
            
            # return values
            return red, green, blue

    elif data_group == IMAGE:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[2])))
    else:
        # normalize the data
        return pd.DataFrame(preprocessing.normalize(data["arr_0"])) 

The <code>get_colours()</code> function provides a list of colours the length of the value <code>no_of_colours</code>. This can be useful for consisten colours when used across multiple plots. 

In [4]:
# required modules
import random

# define colours used 
def get_colours(no_of_colours):
    
    # consistent randomness
    random.seed(42)
    
    # colour list
    colours = []

    # generate colours
    for i in range(no_of_colours):
        colours.append('#%06X' % random.randint(0, 0xFFFFFF))
    
    # return the colour list
    return colours

Function <code>find_best_hyperparameters()</code> uses a _Grid Search_ to loop thought the hyperparameter values. All combinations are displayed with their score. The combination that achieves the best score is highlighted and returned. 

In [4]:
def find_best_hyperparameters(X_train, y_train, group, target, results):

    # define models and parameters
    model = RandomForestClassifier()

    # define grid search
    grid        = dict(n_estimators                    = [100, 1000, 2000],
                       max_features                    = ["sqrt", "log2"],
                       max_depth                       = [10, 25, None])
    
    cv          = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 2, 
                                          random_state = 42)
    
    grid_search = GridSearchCV(estimator               = model, 
                               param_grid              = grid, 
                               n_jobs                  = -1, 
                               cv                      = cv, 
                               scoring                 ='accuracy',
                               error_score             = 0)
    
    grid_result = grid_search.fit(X_train, y_train)
    
    # results
    results = results.append({"classifier" : "RandomForest",
                              "group"      : group,
                              "target"     : target,
                              "score"      : round(grid_result.best_score_, 6),
                              "params"     : grid_result.best_params_},
                              ignore_index = True)
    
    print("-" * 90, "\nBest score: {} using parametrs: {}".format(round(grid_result.best_score_, 6), grid_result.best_params_))
    print("-" * 90)

    means  = grid_result.cv_results_["mean_test_score"]
    stds   = grid_result.cv_results_["std_test_score" ]
    params = grid_result.cv_results_["params"         ]
    
    for mean, stdev, param in zip(means, stds, params):
        print("Mean: {} StdDev: {} using: {}".format(round(mean, 6), round(stdev, 6), param))
        
    print("-" * 90, "\n")
    
    
    return results

## Apply Various Hyperparameters to the Different Data Groups and Target Catagories to Find Those That Perform Best
As the best results are returned. they are added to the <code>results</code> DataFrame.

In [5]:
# dataframe to hold the best results
results = pd.DataFrame()

In [6]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, ARTIST, DATA), read_data(COLOURS, TRAIN, ARTIST, CATAGORY), "COLOURS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.596802 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.578436 StdDev: 0.01615 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.577571 StdDev: 0.013882 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.574978 StdDev: 0.014352 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.561582 StdDev: 0.018069 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.566335 StdDev: 0.015719 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.565903 StdDev: 0.012506 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.588159 StdDev: 0.015074 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 1

In [7]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, GENRE,  DATA), read_data(COLOURS, TRAIN, GENRE,  CATAGORY), "COLOURS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.353068 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.33535 StdDev: 0.018848 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.339672 StdDev: 0.015559 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.34032 StdDev: 0.012866 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.325843 StdDev: 0.020757 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.333838 StdDev: 0.018135 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.334702 StdDev: 0.019976 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.345722 StdDev: 0.016174 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 10

In [8]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, STYLE,  DATA), read_data(COLOURS, TRAIN, STYLE, CATAGORY), "COLOURS", "STYLE",  results)

------------------------------------------------------------------------------------------ 
Best score: 0.347217 using parametrs: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}
------------------------------------------------------------------------------------------
Mean: 0.334165 StdDev: 0.013965 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.336276 StdDev: 0.011428 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.338004 StdDev: 0.010961 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.333205 StdDev: 0.017192 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.334549 StdDev: 0.01444 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.332246 StdDev: 0.012971 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.347217 StdDev: 0.008565 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}

In [9]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.418755 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.402118 StdDev: 0.007859 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.404062 StdDev: 0.007263 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.405575 StdDev: 0.006629 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.40471 StdDev: 0.014165 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.402766 StdDev: 0.005774 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.404062 StdDev: 0.006979 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.411193 StdDev: 0.01102 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 10

In [10]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.214564 using parametrs: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.213051 StdDev: 0.014448 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.213267 StdDev: 0.010261 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.214564 StdDev: 0.009225 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.213915 StdDev: 0.011209 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.212187 StdDev: 0.004731 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.211755 StdDev: 0.009982 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.204408 StdDev: 0.013864 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 10

In [11]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, STYLE, CATAGORY), "PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.233973 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.225144 StdDev: 0.012572 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.231286 StdDev: 0.014458 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.228599 StdDev: 0.014962 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.226871 StdDev: 0.016865 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.230326 StdDev: 0.010713 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.231862 StdDev: 0.014363 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.216507 StdDev: 0.015823 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 

In [12]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.458513 using parametrs: {'max_depth': None, 'max_features': 'log2', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.446413 StdDev: 0.017561 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.450086 StdDev: 0.012607 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.452679 StdDev: 0.013726 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.444252 StdDev: 0.014939 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.449438 StdDev: 0.015304 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.449654 StdDev: 0.013334 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.446413 StdDev: 0.014953 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 

In [13]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.219965 using parametrs: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
------------------------------------------------------------------------------------------
Mean: 0.216292 StdDev: 0.008916 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.216292 StdDev: 0.010742 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.215212 StdDev: 0.006199 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.219965 StdDev: 0.014711 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.21694 StdDev: 0.007127 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.21586 StdDev: 0.009171 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.20981 StdDev: 0.015223 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}
M

In [14]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, STYLE, CATAGORY), "PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.274664 using parametrs: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.260269 StdDev: 0.010966 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.26929 StdDev: 0.011448 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.268138 StdDev: 0.012331 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.261228 StdDev: 0.015078 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.264683 StdDev: 0.013275 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.264491 StdDev: 0.011417 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.264875 StdDev: 0.012461 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100

In [15]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.437122 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.423725 StdDev: 0.008649 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.427182 StdDev: 0.007946 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.423509 StdDev: 0.010572 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.426102 StdDev: 0.007596 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.423077 StdDev: 0.008068 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.421132 StdDev: 0.008756 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.430424 StdDev: 0.011961 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 

In [16]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.224287 using parametrs: {'max_depth': None, 'max_features': 'log2', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.21478 StdDev: 0.01142 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.21586 StdDev: 0.00776 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.21089 StdDev: 0.016565 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.21478 StdDev: 0.017547 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.210026 StdDev: 0.010588 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.213699 StdDev: 0.016611 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.214347 StdDev: 0.015549 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}
M

In [17]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, STYLE, CATAGORY), "LRG-PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.26334 using parametrs: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.244338 StdDev: 0.017713 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.256238 StdDev: 0.013305 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.250864 StdDev: 0.012944 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.24357 StdDev: 0.010664 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.253551 StdDev: 0.016344 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.250672 StdDev: 0.013797 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.24357 StdDev: 0.014967 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}


In [18]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.473855 using parametrs: {'max_depth': None, 'max_features': 'log2', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.466724 StdDev: 0.013023 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.471046 StdDev: 0.01062 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.467373 StdDev: 0.006547 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.462835 StdDev: 0.010096 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.46694 StdDev: 0.009672 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.466724 StdDev: 0.009168 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.468237 StdDev: 0.013914 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 10

In [19]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.22796 using parametrs: {'max_depth': None, 'max_features': 'log2', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.219317 StdDev: 0.009151 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.217589 StdDev: 0.013415 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.221046 StdDev: 0.011966 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.218237 StdDev: 0.019526 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.22299 StdDev: 0.010706 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.217156 StdDev: 0.008413 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.219965 StdDev: 0.00963 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100

In [20]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, STYLE, CATAGORY), "LRG-PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.282342 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 1000}
------------------------------------------------------------------------------------------
Mean: 0.26833 StdDev: 0.014244 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.27428 StdDev: 0.012242 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.275816 StdDev: 0.013909 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.268522 StdDev: 0.010797 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.273704 StdDev: 0.011592 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.270825 StdDev: 0.010211 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.268522 StdDev: 0.017052 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 10

In [21]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, ARTIST, DATA), read_data(IMAGE, TRAIN, ARTIST, CATAGORY), "IMAGE", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.61452 using parametrs: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}
------------------------------------------------------------------------------------------
Mean: 0.582541 StdDev: 0.014719 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.582973 StdDev: 0.012965 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.578436 StdDev: 0.011218 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.529602 StdDev: 0.013273 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.536949 StdDev: 0.01052 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.53522 StdDev: 0.009657 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.61452 StdDev: 0.009421 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100}
Me

In [22]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, GENRE, DATA), read_data(IMAGE, TRAIN, GENRE, CATAGORY), "IMAGE", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.478176 using parametrs: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.450303 StdDev: 0.016067 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.461322 StdDev: 0.019038 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.459378 StdDev: 0.01694 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.427615 StdDev: 0.015114 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.434529 StdDev: 0.013399 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.434529 StdDev: 0.016539 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.466076 StdDev: 0.018754 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 100

In [23]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, STYLE, DATA), read_data(IMAGE, TRAIN, STYLE, CATAGORY), "IMAGE", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.438964 using parametrs: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 2000}
------------------------------------------------------------------------------------------
Mean: 0.406718 StdDev: 0.014982 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 100}
Mean: 0.422265 StdDev: 0.014055 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 1000}
Mean: 0.423033 StdDev: 0.015392 using: {'max_depth': 10, 'max_features': 'sqrt', 'n_estimators': 2000}
Mean: 0.390211 StdDev: 0.013577 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 100}
Mean: 0.406142 StdDev: 0.014922 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 1000}
Mean: 0.404223 StdDev: 0.01045 using: {'max_depth': 10, 'max_features': 'log2', 'n_estimators': 2000}
Mean: 0.421305 StdDev: 0.014524 using: {'max_depth': 25, 'max_features': 'sqrt', 'n_estimators': 1

## View the table of best results.
With all the tests completed we can now view the combined best results.

In [24]:
results

Unnamed: 0,classifier,group,params,score,target
0,RandomForest,COLOURS,"{'max_depth': None, 'max_features': 'sqrt', 'n...",0.596802,ARTIST
1,RandomForest,COLOURS,"{'max_depth': None, 'max_features': 'sqrt', 'n...",0.353068,GENRE
2,RandomForest,COLOURS,"{'max_depth': 25, 'max_features': 'sqrt', 'n_e...",0.347217,STYLE
3,RandomForest,PALETTES-RGB,"{'max_depth': None, 'max_features': 'sqrt', 'n...",0.418755,ARTIST
4,RandomForest,PALETTES-RGB,"{'max_depth': 10, 'max_features': 'sqrt', 'n_e...",0.214564,GENRE
5,RandomForest,PALETTES-RGB,"{'max_depth': None, 'max_features': 'sqrt', 'n...",0.233973,STYLE
6,RandomForest,PALETTES-HLS,"{'max_depth': None, 'max_features': 'log2', 'n...",0.458513,ARTIST
7,RandomForest,PALETTES-HLS,"{'max_depth': 10, 'max_features': 'log2', 'n_e...",0.219965,GENRE
8,RandomForest,PALETTES-HLS,"{'max_depth': 25, 'max_features': 'sqrt', 'n_e...",0.274664,STYLE
9,RandomForest,LRG-PALETTES-RGB,"{'max_depth': None, 'max_features': 'sqrt', 'n...",0.437122,ARTIST


## Write the Results to a File
These are saved into a _.csv_ file to be used later in this section of the project.

In [25]:
results.to_csv("./data/hyperparameter_results/RandomForestClassifier.csv", index = False)