# Hyperparameters for Support Vector Classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')

## Helper Functions

The standard <code>read_data()</code> function seen throughout the project. designed to simplify and standardize the data reading process. <br/>In addition the _Global_ variables associated with its function are defined along with the relitive paths to the data files.<br/>__NOTE:__ in a business environment this function would be placed into a pythin module and imported. Here it is included into the notebook to provide full transparency. 

In [2]:
# required modules
import numpy as np
import pandas as pd
import sqlite3
from sklearn import preprocessing
import colorsys

# groups
COLOURS      = 10
PALETTES     = 11
LRG_PALETTES = 12
IMAGE        = 13
TILES        = 14

# sets
TRAIN        = 20
TEST         = 21
VALIDATE     = 22

# targets
ARTIST       = 30
GENRE        = 31
STYLE        = 32

# array
DATA         = 40
CATAGORY     = 41
CATAGORY_NUM = 42
IMAGE_TAG    = 43
TILE_INDEX   = 44

# colour systems
HLS          = 50
RGB          = 51
RAW          = 52

# an overly complicated method of defining the data file path
data_file  = {COLOURS      : {TRAIN    : {ARTIST : "./data/colour_sets/artist_train_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_train_colours.npz",
                                          STYLE  : "./data/colour_sets/style_train_colours.npz"},
                              TEST     : {ARTIST : "./data/colour_sets/artist_test_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_test_colours.npz",
                                          STYLE  : "./data/colour_sets/style_test_colours.npz"} ,
                              VALIDATE : {ARTIST : "./data/colour_sets/artist_validation_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_validation_colours.npz",
                                          STYLE  : "./data/colour_sets/style_validation_colours.npz"}},
              PALETTES     : {TRAIN    : {ARTIST : "./data/palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_validation_palettes.npz"}},
              LRG_PALETTES : {TRAIN    : {ARTIST : "./data/large_palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/large_palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/large_palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_validation_palettes.npz"}},
              IMAGE        : {TRAIN    : {ARTIST : "./data/full_image_feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/full_image_feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/full_image_feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_validation_features.npz"}},
              TILES        : {TRAIN    : {ARTIST : "./data/feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/feature_sets/style_validation_features.npz"}}}

# Create a DB connection between python and the file system
conn = sqlite3.connect("./data/database/artist.db")

# the fuction that provides a standard data read method
def read_data(data_group, data_set, data_target, data_type, colour_system = RGB):
    
    # read the data file
    data = np.load(data_file[data_group][data_set][data_target], allow_pickle=True)

    # if tile index is requested
    if   data_type    == TILE_INDEX:
        # when group is tiles, return the tile indexes
        if data_group == TILES:
            return pd.DataFrame(data["arr_3"], columns = {"tile_index"})
        # otherwise, just return an emtpy dataset
        else:
            return pd.DataFrame(columns = {"tile_index"})
    # if image tags are requested
    elif data_type == IMAGE_TAG:
        return pd.DataFrame(data["arr_2"], columns = {"image_tag"})
    # if catagory data is requested
    elif data_type == CATAGORY:
        return pd.DataFrame(data["arr_1"], columns = {"catagory"})
    # if catagory_num is requested
    elif data_type == CATAGORY_NUM:
        # select data for the appropriate lookup table
        if   data_target == ARTIST: 
            query_string = "SELECT ID, NAME  AS VALUE FROM ARTIST"
        elif data_target == GENRE:
            query_string = "SELECT ID, GENRE AS VALUE FROM GENRE"
        else:
            query_string = "SELECT ID, STYLE AS VALUE FROM STYLE" 
        # build the lookup table
        lookup = pd.read_sql_query(query_string, conn)
        # get the catagory data
        data   = pd.DataFrame(data["arr_1"], columns = {"catagory"})
        # convert the catagory text labels to numeric catagory ids
        return pd.DataFrame(data["catagory"].apply(lambda x: lookup[lookup["VALUE"] == x]["id"].values[0]))
    # data requested
    elif data_group == COLOURS:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[1] * data["arr_0"].shape[2])))
    elif data_group == PALETTES or data_group == LRG_PALETTES:

        # extract the colour data
        colour_data = data["arr_0"]
        
        # find the datas dimensions
        records, _, palette_length, palette_layers = np.shape(colour_data)
        
        # reshape
        colour_data = colour_data.reshape(records * palette_length, palette_layers)
        
        # convert to a dataframe
        colour_data_df = pd.DataFrame(colour_data, columns =["r","g","b"])
        
        # process the data with different colour systems
        
        # hue, luminance, saturation
        if colour_system == HLS:
            
            # define a dataframe to hold the hls values
            hls_data_df = pd.DataFrame(columns =["h","l","s"])

            # loop through each record
            for row in colour_data_df.iterrows():
                
                # extract the r,g,b values and devide them by 255 ready to convert
                r, g, b = row[1].values/255

                # convert the values and add them to the dataframe
                hls_data_df.loc[len(hls_data_df)] = colorsys.rgb_to_hls(r, g, b)
            
            # create datasets and normalize
            hue        = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["h"]).reshape(records, palette_length)))
            luminance  = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["l"]).reshape(records, palette_length)))
            saturation = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["s"]).reshape(records, palette_length)))  
            
            # return values
            return hue, luminance, saturation
            
        # red, green, blue unnormalized
        elif colour_system == RAW:
            red   = pd.DataFrame(np.array(colour_data_df["r"]).reshape(records, palette_length))
            green = pd.DataFrame(np.array(colour_data_df["g"]).reshape(records, palette_length))
            blue  = pd.DataFrame(np.array(colour_data_df["b"]).reshape(records, palette_length))
            
            # return values
            return red, green, blue            
            
        # red, green, blue normalized- (default)
        else:
    
            # create datasets and normalize
            red   = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["r"]).reshape(records, palette_length)))
            green = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["g"]).reshape(records, palette_length)))
            blue  = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["b"]).reshape(records, palette_length)))
            
            # return values
            return red, green, blue

    elif data_group == IMAGE:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[2])))
    else:
        # normalize the data
        return pd.DataFrame(preprocessing.normalize(data["arr_0"])) 

The <code>get_colours()</code> function provides a list of colours the length of the value <code>no_of_colours</code>. This can be useful for consisten colours when used across multiple plots. 

In [4]:
# required modules
import random

# define colours used 
def get_colours(no_of_colours):
    
    # consistent randomness
    random.seed(42)
    
    # colour list
    colours = []

    # generate colours
    for i in range(no_of_colours):
        colours.append('#%06X' % random.randint(0, 0xFFFFFF))
    
    # return the colour list
    return colours

Generate sorted lists of _Artists, Genres_ & _Styles_. These are used in a variety of situations. These functions ensure that the lists are always defined in the same order.

In [5]:
# define data lists
artists = list(set(read_data(COLOURS, TRAIN, ARTIST, CATAGORY)["catagory"]))
styles  = list(set(read_data(COLOURS, TRAIN, STYLE,  CATAGORY)["catagory"]))
genres  = list(set(read_data(COLOURS, TRAIN, GENRE,  CATAGORY)["catagory"]))

# sort the lists
artists.sort()
styles.sort()
genres.sort()

Function <code>find_best_hyperparameters()</code> uses a _Grid Search_ to loop thought the hyperparameter values. All combinations are displayed with their score. The combination that achieves the best score is highlighted and returned. 

In [4]:
def find_best_hyperparameters(X_train, y_train, group, target, results):

    # define models and parameters
    model = SVC()

    # define grid search
    grid        = dict(kernel = ["poly", "rbf", "sigmoid"],
                       C      = [0.1, 1.0, 10.0, 100.0],
                       gamma  = ["scale", "auto"])
    
    cv          = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 2, 
                                          random_state = 42)
    
    grid_search = GridSearchCV(estimator   = model,
                               param_grid  = grid, 
                               n_jobs      = -1, 
                               cv          = cv, 
                               scoring     = "accuracy",
                               error_score = 0)
    
    grid_result = grid_search.fit(X_train, y_train)
    
    # results
    results = results.append({"classifier" : "SVC",
                              "group"      : group,
                              "target"     : target,
                              "score"      : round(grid_result.best_score_, 6),
                              "params"     : grid_result.best_params_},
                              ignore_index = True)
    
    print("-" * 90, "\nBest score: {} using parametrs: {}".format(round(grid_result.best_score_, 6), grid_result.best_params_))
    print("-" * 90)

    means  = grid_result.cv_results_["mean_test_score"]
    stds   = grid_result.cv_results_["std_test_score" ]
    params = grid_result.cv_results_["params"         ]
    
    for mean, stdev, param in zip(means, stds, params):
        print("Mean: {} StdDev: {} using: {}".format(round(mean, 6), round(stdev, 6), param))
        
    print("-" * 90, "\n")
    
    
    return results

## Apply Various Hyperparameters to the Different Data Groups and Target Catagories to Find Those That Perform Best
As the best results are returned. they are added to the <code>results</code> DataFrame.

In [5]:
# dataframe to hold the best results
results = pd.DataFrame()

In [6]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, ARTIST, DATA), read_data(COLOURS, TRAIN, ARTIST, CATAGORY), "COLOURS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.579732 using parametrs: {'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.500864 StdDev: 0.01644 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.418323 StdDev: 0.008147 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.386992 StdDev: 0.005412 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.576491 StdDev: 0.021421 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.541271 StdDev: 0.018938 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.333621 StdDev: 0.013854 using: {'C': 

In [7]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, GENRE,  DATA), read_data(COLOURS, TRAIN, GENRE,  CATAGORY), "COLOURS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.32433 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
------------------------------------------------------------------------------------------
Mean: 0.301642 StdDev: 0.013424 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.278738 StdDev: 0.014182 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.273336 StdDev: 0.013024 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.32433 StdDev: 0.017392 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.321089 StdDev: 0.015349 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.228392 StdDev: 0.010175 using: {'C': 1

In [8]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, STYLE,  DATA), read_data(COLOURS, TRAIN, STYLE, CATAGORY), "COLOURS", "STYLE",  results)

------------------------------------------------------------------------------------------ 
Best score: 0.347025 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.324184 StdDev: 0.013873 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.269482 StdDev: 0.011854 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.227255 StdDev: 0.006764 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.331286 StdDev: 0.019319 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.347025 StdDev: 0.018638 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.206718 StdDev: 0.018201 using: {'C': 1.0, '

In [9]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.388505 using parametrs: {'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.374028 StdDev: 0.007304 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.385048 StdDev: 0.011767 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.365385 StdDev: 0.004661 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C':

In [10]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.219749 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.219101 StdDev: 0.009872 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.213699 StdDev: 0.002522 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.203976 StdDev: 0.014728 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.219749 StdDev: 0.009089 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 

In [11]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, STYLE, CATAGORY), "PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.210749 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.199616 StdDev: 0.013252 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.160653 StdDev: 0.005699 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.190979 StdDev: 0.013907 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.210749 StdDev: 0.011217 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 1.0, 'gamm

In [12]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.434745 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.407519 StdDev: 0.008417 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.355013 StdDev: 0.003273 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.326491 StdDev: 0.008877 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.401469 StdDev: 0.017747 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.434745 StdDev: 0.007424 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.27917 StdDev: 0.0107 using: {'C': 1.0

In [13]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.231417 using parametrs: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.21586 StdDev: 0.014976 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.231417 StdDev: 0.01578 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.207649 StdDev: 0.009449 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.19274 StdDev: 0.013509 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.224719 StdDev: 0.012601 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.166595 StdDev: 0.016149 using: {'C': 1.0

In [14]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(PALETTES,   TRAIN, STYLE,  CATAGORY), "PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.243762 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.230134 StdDev: 0.0157 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.227063 StdDev: 0.005268 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.153167 StdDev: 0.00184 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.216891 StdDev: 0.018688 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.243762 StdDev: 0.010524 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.147985 StdDev: 0.014047 using: {'C': 1.0, 'gam

In [15]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.401901 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.383751 StdDev: 0.018232 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.334054 StdDev: 0.020244 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.401901 StdDev: 0.013796 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 

In [16]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.224071 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.210458 StdDev: 0.015758 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.213267 StdDev: 0.003176 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.173293 StdDev: 0.016045 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.224071 StdDev: 0.011255 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 

In [17]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, STYLE, CATAGORY), "LRG-PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.222265 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.192514 StdDev: 0.009791 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.158157 StdDev: 0.007638 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.158541 StdDev: 0.010935 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.222265 StdDev: 0.014249 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 1.0, 'gamm

In [18]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.469965 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.45484 StdDev: 0.013366 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.419188 StdDev: 0.012686 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.329732 StdDev: 0.007132 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.415298 StdDev: 0.022999 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.469965 StdDev: 0.013427 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.312878 StdDev: 0.011062 using: {'C': 1

In [19]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.229257 using parametrs: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.223855 StdDev: 0.017839 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.229257 StdDev: 0.017683 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.210674 StdDev: 0.011055 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.203328 StdDev: 0.011813 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.222558 StdDev: 0.012524 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.185393 StdDev: 0.015916 using: {'C': 

In [20]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES,   TRAIN, STYLE,  CATAGORY), "LRG-PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.265259 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.251056 StdDev: 0.01737 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.23071 StdDev: 0.005435 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.154894 StdDev: 0.00227 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.227447 StdDev: 0.013217 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.265259 StdDev: 0.010982 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.172745 StdDev: 0.007759 using: {'C': 1.0, 'gam

In [21]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, ARTIST, DATA), read_data(IMAGE, TRAIN, ARTIST, CATAGORY), "IMAGE", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.822818 using parametrs: {'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.445765 StdDev: 0.015943 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.367978 StdDev: 0.005249 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.503889 StdDev: 0.010319 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.798833 StdDev: 0.014648 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.803803 StdDev: 0.008828 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.803803 StdDev: 0.009602 using: {'C':

In [22]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, GENRE, DATA), read_data(IMAGE, TRAIN, GENRE, CATAGORY), "IMAGE", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.580164 using parametrs: {'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
Mean: 0.38051 StdDev: 0.013248 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.335998 StdDev: 0.015423 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.360199 StdDev: 0.015284 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.572602 StdDev: 0.019232 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.557692 StdDev: 0.019047 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.55121 StdDev: 0.016155 using: {'C': 1

In [23]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, STYLE, DATA), read_data(IMAGE, TRAIN, STYLE, CATAGORY), "IMAGE", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.533013 using parametrs: {'C': 1.0, 'gamma': 'scale', 'kernel': 'sigmoid'}
------------------------------------------------------------------------------------------
Mean: 0.310557 StdDev: 0.010498 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.241459 StdDev: 0.005092 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.348177 StdDev: 0.01069 using: {'C': 0.1, 'gamma': 'scale', 'kernel': 'sigmoid'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'poly'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'gamma': 'auto', 'kernel': 'sigmoid'}
Mean: 0.517274 StdDev: 0.025856 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}
Mean: 0.515355 StdDev: 0.017726 using: {'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Mean: 0.533013 StdDev: 0.014075 using: {'C': 1.0

## View the table of best results.
With all the tests completed we can now view the combined best results.

In [24]:
results

Unnamed: 0,classifier,group,params,score,target
0,SVC,COLOURS,"{'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.579732,ARTIST
1,SVC,COLOURS,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'poly'}",0.32433,GENRE
2,SVC,COLOURS,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.347025,STYLE
3,SVC,PALETTES-RGB,"{'C': 10.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.388505,ARTIST
4,SVC,PALETTES-RGB,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.219749,GENRE
5,SVC,PALETTES-RGB,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.210749,STYLE
6,SVC,PALETTES-HLS,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.434745,ARTIST
7,SVC,PALETTES-HLS,"{'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}",0.231417,GENRE
8,SVC,PALETTES-HLS,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.243762,STYLE
9,SVC,LRG-PALETTES-RGB,"{'C': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}",0.401901,ARTIST


## Write the Results to a File
These are saved into a _.csv_ file to be used later in this section of the project.

In [25]:
results.to_csv("./data/hyperparameter_results/SupportVectorClassifier.csv", index = False)