# Hyperparameters for Logistic Regression Classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

## Helper Functions

The standard <code>read_data()</code> function seen throughout the project. designed to simplify and standardize the data reading process. <br/>In addition the _Global_ variables associated with its function are defined along with the relitive paths to the data files.<br/>__NOTE:__ in a business environment this function would be placed into a pythin module and imported. Here it is included into the notebook to provide full transparency. 

In [2]:
# required modules
import numpy as np
import pandas as pd
import sqlite3
from sklearn import preprocessing
import colorsys

# groups
COLOURS      = 10
PALETTES     = 11
LRG_PALETTES = 12
IMAGE        = 13
TILES        = 14

# sets
TRAIN        = 20
TEST         = 21
VALIDATE     = 22

# targets
ARTIST       = 30
GENRE        = 31
STYLE        = 32

# array
DATA         = 40
CATAGORY     = 41
CATAGORY_NUM = 42
IMAGE_TAG    = 43
TILE_INDEX   = 44

# colour systems
HLS          = 50
RGB          = 51
RAW          = 52

# an overly complicated method of defining the data file path
data_file  = {COLOURS      : {TRAIN    : {ARTIST : "./data/colour_sets/artist_train_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_train_colours.npz",
                                          STYLE  : "./data/colour_sets/style_train_colours.npz"},
                              TEST     : {ARTIST : "./data/colour_sets/artist_test_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_test_colours.npz",
                                          STYLE  : "./data/colour_sets/style_test_colours.npz"} ,
                              VALIDATE : {ARTIST : "./data/colour_sets/artist_validation_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_validation_colours.npz",
                                          STYLE  : "./data/colour_sets/style_validation_colours.npz"}},
              PALETTES     : {TRAIN    : {ARTIST : "./data/palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_validation_palettes.npz"}},
              LRG_PALETTES : {TRAIN    : {ARTIST : "./data/large_palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/large_palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/large_palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_validation_palettes.npz"}},
              IMAGE        : {TRAIN    : {ARTIST : "./data/full_image_feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/full_image_feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/full_image_feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_validation_features.npz"}},
              TILES        : {TRAIN    : {ARTIST : "./data/feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/feature_sets/style_validation_features.npz"}}}

# Create a DB connection between python and the file system
conn = sqlite3.connect("./data/database/artist.db")

# the fuction that provides a standard data read method
def read_data(data_group, data_set, data_target, data_type, colour_system = RGB):
    
    # read the data file
    data = np.load(data_file[data_group][data_set][data_target], allow_pickle=True)

    # if tile index is requested
    if   data_type    == TILE_INDEX:
        # when group is tiles, return the tile indexes
        if data_group == TILES:
            return pd.DataFrame(data["arr_3"], columns = {"tile_index"})
        # otherwise, just return an emtpy dataset
        else:
            return pd.DataFrame(columns = {"tile_index"})
    # if image tags are requested
    elif data_type == IMAGE_TAG:
        return pd.DataFrame(data["arr_2"], columns = {"image_tag"})
    # if catagory data is requested
    elif data_type == CATAGORY:
        return pd.DataFrame(data["arr_1"], columns = {"catagory"})
    # if catagory_num is requested
    elif data_type == CATAGORY_NUM:
        # select data for the appropriate lookup table
        if   data_target == ARTIST: 
            query_string = "SELECT ID, NAME  AS VALUE FROM ARTIST"
        elif data_target == GENRE:
            query_string = "SELECT ID, GENRE AS VALUE FROM GENRE"
        else:
            query_string = "SELECT ID, STYLE AS VALUE FROM STYLE" 
        # build the lookup table
        lookup = pd.read_sql_query(query_string, conn)
        # get the catagory data
        data   = pd.DataFrame(data["arr_1"], columns = {"catagory"})
        # convert the catagory text labels to numeric catagory ids
        return pd.DataFrame(data["catagory"].apply(lambda x: lookup[lookup["VALUE"] == x]["id"].values[0]))
    # data requested
    elif data_group == COLOURS:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[1] * data["arr_0"].shape[2])))
    elif data_group == PALETTES or data_group == LRG_PALETTES:

        # extract the colour data
        colour_data = data["arr_0"]
        
        # find the datas dimensions
        records, _, palette_length, palette_layers = np.shape(colour_data)
        
        # reshape
        colour_data = colour_data.reshape(records * palette_length, palette_layers)
        
        # convert to a dataframe
        colour_data_df = pd.DataFrame(colour_data, columns =["r","g","b"])
        
        # process the data with different colour systems
        
        # hue, luminance, saturation
        if colour_system == HLS:
            
            # define a dataframe to hold the hls values
            hls_data_df = pd.DataFrame(columns =["h","l","s"])

            # loop through each record
            for row in colour_data_df.iterrows():
                
                # extract the r,g,b values and devide them by 255 ready to convert
                r, g, b = row[1].values/255

                # convert the values and add them to the dataframe
                hls_data_df.loc[len(hls_data_df)] = colorsys.rgb_to_hls(r, g, b)
            
            # create datasets and normalize
            hue        = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["h"]).reshape(records, palette_length)))
            luminance  = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["l"]).reshape(records, palette_length)))
            saturation = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["s"]).reshape(records, palette_length)))  
            
            # return values
            return hue, luminance, saturation
            
        # red, green, blue unnormalized
        elif colour_system == RAW:
            red   = pd.DataFrame(np.array(colour_data_df["r"]).reshape(records, palette_length))
            green = pd.DataFrame(np.array(colour_data_df["g"]).reshape(records, palette_length))
            blue  = pd.DataFrame(np.array(colour_data_df["b"]).reshape(records, palette_length))
            
            # return values
            return red, green, blue            
            
        # red, green, blue normalized- (default)
        else:
    
            # create datasets and normalize
            red   = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["r"]).reshape(records, palette_length)))
            green = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["g"]).reshape(records, palette_length)))
            blue  = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["b"]).reshape(records, palette_length)))
            
            # return values
            return red, green, blue

    elif data_group == IMAGE:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[2])))
    else:
        # normalize the data
        return pd.DataFrame(preprocessing.normalize(data["arr_0"])) 

The <code>get_colours()</code> function provides a list of colours the length of the value <code>no_of_colours</code>. This can be useful for consisten colours when used across multiple plots. 

In [4]:
# required modules
import random

# define colours used 
def get_colours(no_of_colours):
    
    # consistent randomness
    random.seed(42)
    
    # colour list
    colours = []

    # generate colours
    for i in range(no_of_colours):
        colours.append('#%06X' % random.randint(0, 0xFFFFFF))
    
    # return the colour list
    return colours

Generate sorted lists of _Artists, Genres_ & _Styles_. These are used in a variety of situations. These functions ensure that the lists are always defined in the same order.

In [5]:
# define data lists
artists = list(set(read_data(COLOURS, TRAIN, ARTIST, CATAGORY)["catagory"]))
styles  = list(set(read_data(COLOURS, TRAIN, STYLE,  CATAGORY)["catagory"]))
genres  = list(set(read_data(COLOURS, TRAIN, GENRE,  CATAGORY)["catagory"]))

# sort the lists
artists.sort()
styles.sort()
genres.sort()

Function <code>find_best_hyperparameters()</code> uses a _Grid Search_ to loop thought the hyperparameter values. All combinations are displayed with their score. The combination that achieves the best score is highlighted and returned. 

In [4]:
def find_best_hyperparameters(X_train, y_train, group, target, results):

    # define models and parameters
    model = LogisticRegression()
    

    # define grid search
    grid        = dict(solver      = ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
                       penalty     = ["l1", "l2"],
                       C           = [0.1, 1.0, 10.0, 100.0],
                       multi_class = ["ovr", "multinomial"])
    
    cv          = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 2, 
                                          random_state = 42)
    
    grid_search = GridSearchCV(estimator   = model, 
                               param_grid  = grid, 
                               n_jobs      = -1, 
                               cv          = cv, 
                               scoring     = "accuracy",
                               error_score = 0)
    
    grid_result = grid_search.fit(X_train, y_train)
    
    # results
    results = results.append({"classifier" : "LogisticRegression",
                              "group"      : group,
                              "target"     : target,
                              "score"      : round(grid_result.best_score_, 6),
                              "params"     : grid_result.best_params_},
                              ignore_index = True)
    
    print("-" * 90, "\nBest score: {} using parametrs: {}".format(round(grid_result.best_score_, 6), grid_result.best_params_))
    print("-" * 90)

    means  = grid_result.cv_results_["mean_test_score"]
    stds   = grid_result.cv_results_["std_test_score" ]
    params = grid_result.cv_results_["params"         ]
    
    for mean, stdev, param in zip(means, stds, params):
        print("Mean: {} StdDev: {} using: {}".format(round(mean, 6), round(stdev, 6), param))
        
    print("-" * 90, "\n")
    
    
    return results

## Apply Various Hyperparameters to the Different Data Groups and Target Catagories to Find Those That Perform Best
As the best results are returned. they are added to the <code>results</code> DataFrame.

In [5]:
# dataframe to hold the best results
results = pd.DataFrame()

In [6]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, ARTIST, DATA), read_data(COLOURS, TRAIN, ARTIST, CATAGORY), "COLOURS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.514693 using parametrs: {'C': 10.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.394771 StdDev: 0.005234 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.394339 StdDev: 0.004974 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.412489 StdDev: 0.006255 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.412489 StdDev: 0.006255 using: {'C': 0.1, 'multi_class': 'ovr'

In [7]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, GENRE,  DATA), read_data(COLOURS, TRAIN, GENRE,  CATAGORY), "COLOURS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.303587 using parametrs: {'C': 10.0, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.229473 StdDev: 0.01033 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.232282 StdDev: 0.011407 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.280683 StdDev: 0.013441 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.280683 StdDev: 0.013441 using: {'C': 0.1, 'multi_class': 'ovr', 'penalt

In [8]:
results = find_best_hyperparameters(read_data(COLOURS,  TRAIN, STYLE,  DATA), read_data(COLOURS, TRAIN, STYLE, CATAGORY), "COLOURS", "STYLE",  results)

------------------------------------------------------------------------------------------ 
Best score: 0.304798 using parametrs: {'C': 10.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'lbfgs'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.126488 StdDev: 0.011213 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.129367 StdDev: 0.012261 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.26929 StdDev: 0.013919 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.26929 StdDev: 0.013919 using: {'C': 0.1, 'multi_class': 'ovr', 'pen

In [9]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.372083 using parametrs: {'C': 100.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr

In [10]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.213483 using parametrs: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.213051 StdDev: 0.002885 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.210026 StdDev: 0.015808 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.210026 StdDev: 0.015808 using: {'C': 0.1, 'multi_class': 'ovr', 'penalt

In [11]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(PALETTES, TRAIN, STYLE, CATAGORY), "PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.186948 using parametrs: {'C': 10.0, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.140883 StdDev: 0.001247 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.156622 StdDev: 0.007563 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.156622 StdDev: 0.007563 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty

In [12]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, ARTIST, CATAGORY), "PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.399957 using parametrs: {'C': 1.0, 'multi_class': 'multinomial', 'penalty': 'l1', 'solver': 'saga'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.366249 StdDev: 0.010855 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.365385 StdDev: 0.011402 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.378997 StdDev: 0.013956 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.378997 StdDev: 0.013956 using: {'C': 0.1, 'multi_class': 'ovr', 'pen

In [13]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(PALETTES, TRAIN, GENRE, CATAGORY), "PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.229473 using parametrs: {'C': 0.1, 'multi_class': 'multinomial', 'penalty': 'l1', 'solver': 'saga'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.226448 StdDev: 0.010953 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.228176 StdDev: 0.013006 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.22688 StdDev: 0.016126 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.22688 StdDev: 0.016126 using: {'C': 0.1, 'multi_class': 'ovr', 'penal

In [14]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(PALETTES,   TRAIN, STYLE,  CATAGORY), "PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.227447 using parametrs: {'C': 1.0, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.21094 StdDev: 0.005332 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.213244 StdDev: 0.006525 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.226871 StdDev: 0.009747 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.226871 StdDev: 0.009747 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l

In [15]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-RGB", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.359551 using parametrs: {'C': 10.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr'

In [16]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-RGB", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.22299 using parametrs: {'C': 1.0, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.211538 StdDev: 0.003197 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.221046 StdDev: 0.01475 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.22083 StdDev: 0.014916 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2'

In [17]:
# flatten the red, green and blue layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, RGB), (r, l * f)), read_data(LRG_PALETTES, TRAIN, STYLE, CATAGORY), "LRG-PALETTES-RGB", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.188868 using parametrs: {'C': 10.0, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'lbfgs'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.141267 StdDev: 0.0011 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.155854 StdDev: 0.004942 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.155854 StdDev: 0.004942 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2

In [18]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, ARTIST, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, ARTIST, CATAGORY), "LRG-PALETTES-HLS", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.444685 using parametrs: {'C': 1.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'lbfgs'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.367329 StdDev: 0.006209 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.36236 StdDev: 0.005977 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.407087 StdDev: 0.013372 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.407087 StdDev: 0.013372 using: {'C': 0.1, 'multi_class': 'ovr', 'pen

In [19]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, GENRE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES, TRAIN, GENRE, CATAGORY), "LRG-PALETTES-HLS", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.227528 using parametrs: {'C': 0.1, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.226448 StdDev: 0.012589 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.226448 StdDev: 0.012589 using: {'C': 0.1, 'multi_class': 'ovr',

In [20]:
# flatten the hue, luminence and saturation layers
l, r, f = np.shape(                            read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS))
results = find_best_hyperparameters(np.reshape(read_data(LRG_PALETTES, TRAIN, STYLE, DATA, HLS), (r, l * f)), read_data(LRG_PALETTES,   TRAIN, STYLE,  CATAGORY), "LRG-PALETTES-HLS", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.241459 using parametrs: {'C': 1.0, 'multi_class': 'multinomial', 'penalty': 'l1', 'solver': 'saga'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.20499 StdDev: 0.007014 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.20595 StdDev: 0.006243 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.234357 StdDev: 0.0064 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.234357 StdDev: 0.0064 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty':

In [21]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, ARTIST, DATA), read_data(IMAGE, TRAIN, ARTIST, CATAGORY), "IMAGE", "ARTIST", results)

------------------------------------------------------------------------------------------ 
Best score: 0.809421 using parametrs: {'C': 100.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.353068 StdDev: 0.002541 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.445765 StdDev: 0.011983 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.445765 StdDev: 0.011983 using: {'C': 0.1, 'multi_class': 'ovr

In [22]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, GENRE, DATA), read_data(IMAGE, TRAIN, GENRE, CATAGORY), "IMAGE", "GENRE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.576707 using parametrs: {'C': 10.0, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.213483 StdDev: 0.002806 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.212619 StdDev: 0.003087 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.341832 StdDev: 0.011801 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.341832 StdDev: 0.011801 using: {'C': 0.1, 'multi_class': 'ovr', 'penal

In [23]:
results = find_best_hyperparameters(read_data(IMAGE, TRAIN, STYLE, DATA), read_data(IMAGE, TRAIN, STYLE, CATAGORY), "IMAGE", "STYLE", results)

------------------------------------------------------------------------------------------ 
Best score: 0.513052 using parametrs: {'C': 10.0, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'newton-cg'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'lbfgs'}
Mean: 0.144146 StdDev: 0.007849 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'liblinear'}
Mean: 0.0 StdDev: 0.0 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'sag'}
Mean: 0.144146 StdDev: 0.007849 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l1', 'solver': 'saga'}
Mean: 0.330326 StdDev: 0.013644 using: {'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l2', 'solver': 'newton-cg'}
Mean: 0.330326 StdDev: 0.013644 using: {'C': 0.1, 'multi_class': 'ovr', 'penal

## View the table of best results.
With all the tests completed we can now view the combined best results.

In [24]:
results

Unnamed: 0,classifier,group,params,score,target
0,LogisticRegression,COLOURS,"{'C': 10.0, 'multi_class': 'multinomial', 'pen...",0.514693,ARTIST
1,LogisticRegression,COLOURS,"{'C': 10.0, 'multi_class': 'ovr', 'penalty': '...",0.303587,GENRE
2,LogisticRegression,COLOURS,"{'C': 10.0, 'multi_class': 'multinomial', 'pen...",0.304798,STYLE
3,LogisticRegression,PALETTES-RGB,"{'C': 100.0, 'multi_class': 'multinomial', 'pe...",0.372083,ARTIST
4,LogisticRegression,PALETTES-RGB,"{'C': 0.1, 'multi_class': 'ovr', 'penalty': 'l...",0.213483,GENRE
5,LogisticRegression,PALETTES-RGB,"{'C': 10.0, 'multi_class': 'ovr', 'penalty': '...",0.186948,STYLE
6,LogisticRegression,PALETTES-HLS,"{'C': 1.0, 'multi_class': 'multinomial', 'pena...",0.399957,ARTIST
7,LogisticRegression,PALETTES-HLS,"{'C': 0.1, 'multi_class': 'multinomial', 'pena...",0.229473,GENRE
8,LogisticRegression,PALETTES-HLS,"{'C': 1.0, 'multi_class': 'ovr', 'penalty': 'l...",0.227447,STYLE
9,LogisticRegression,LRG-PALETTES-RGB,"{'C': 10.0, 'multi_class': 'multinomial', 'pen...",0.359551,ARTIST


## Write the Results to a File
These are saved into a _.csv_ file to be used later in this section of the project.

In [25]:
results.to_csv("./data/hyperparameter_results/LogisticRegressionClassifier.csv", index = False)