# Data Sanity Check
This __Data Sanity Check__ note book is intended as a means to check the retrieval of data from both files and from the database. It also introduces the <code>read_data()</code> function which will be used throughout the project to simplify & standardize the method of reading data.

In [1]:
import numpy as np
import pandas as pd
import sqlite3
from sklearn import preprocessing

## Helper Function

The standard <code>read_data()</code> function seen throughout the project. designed to simplify and standardize the data reading process. <br/>In addition the _Global_ variables associated with its function are defined along with the relitive paths to the data files.<br/>__NOTE:__ in a business environment this function would be placed into a pythin module and imported. Here it is included into the notebook to provide full transparency. 

In [2]:
# required modules
import numpy as np
import pandas as pd
import sqlite3
from sklearn import preprocessing
import colorsys

# groups
COLOURS      = 10
PALETTES     = 11
LRG_PALETTES = 12
IMAGE        = 13
TILES        = 14

# sets
TRAIN        = 20
TEST         = 21
VALIDATE     = 22

# targets
ARTIST       = 30
GENRE        = 31
STYLE        = 32

# array
DATA         = 40
CATAGORY     = 41
CATAGORY_NUM = 42
IMAGE_TAG    = 43
TILE_INDEX   = 44

# colour systems
HLS          = 50
RGB          = 51
RAW          = 52

# an overly complicated method of defining the data file path
data_file  = {COLOURS      : {TRAIN    : {ARTIST : "./data/colour_sets/artist_train_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_train_colours.npz",
                                          STYLE  : "./data/colour_sets/style_train_colours.npz"},
                              TEST     : {ARTIST : "./data/colour_sets/artist_test_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_test_colours.npz",
                                          STYLE  : "./data/colour_sets/style_test_colours.npz"} ,
                              VALIDATE : {ARTIST : "./data/colour_sets/artist_validation_colours.npz",
                                          GENRE  : "./data/colour_sets/genre_validation_colours.npz",
                                          STYLE  : "./data/colour_sets/style_validation_colours.npz"}},
              PALETTES     : {TRAIN    : {ARTIST : "./data/palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/palette_sets/style_validation_palettes.npz"}},
              LRG_PALETTES : {TRAIN    : {ARTIST : "./data/large_palette_sets/artist_train_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_train_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_train_palettes.npz"} ,
                              TEST     : {ARTIST : "./data/large_palette_sets/artist_test_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_test_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_test_palettes.npz"} ,
                              VALIDATE : {ARTIST : "./data/large_palette_sets/artist_validation_palettes.npz",
                                          GENRE  : "./data/large_palette_sets/genre_validation_palettes.npz",
                                          STYLE  : "./data/large_palette_sets/style_validation_palettes.npz"}},
              IMAGE        : {TRAIN    : {ARTIST : "./data/full_image_feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/full_image_feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/full_image_feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/full_image_feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/full_image_feature_sets/style_validation_features.npz"}},
              TILES        : {TRAIN    : {ARTIST : "./data/feature_sets/artist_train_features.npz",
                                          GENRE  : "./data/feature_sets/genre_train_features.npz",
                                          STYLE  : "./data/feature_sets/style_train_features.npz"} ,
                              TEST     : {ARTIST : "./data/feature_sets/artist_test_features.npz",
                                          GENRE  : "./data/feature_sets/genre_test_features.npz",
                                          STYLE  : "./data/feature_sets/style_test_features.npz"} ,
                              VALIDATE : {ARTIST : "./data/feature_sets/artist_validation_features.npz",
                                          GENRE  : "./data/feature_sets/genre_validation_features.npz",
                                          STYLE  : "./data/feature_sets/style_validation_features.npz"}}}

# Create a DB connection between python and the file system
conn = sqlite3.connect("./data/database/artist.db")

# the fuction that provides a standard data read method
def read_data(data_group, data_set, data_target, data_type, colour_system = RGB):
    
    # read the data file
    data = np.load(data_file[data_group][data_set][data_target], allow_pickle=True)

    # if tile index is requested
    if   data_type    == TILE_INDEX:
        # when group is tiles, return the tile indexes
        if data_group == TILES:
            return pd.DataFrame(data["arr_3"], columns = {"tile_index"})
        # otherwise, just return an emtpy dataset
        else:
            return pd.DataFrame(columns = {"tile_index"})
    # if image tags are requested
    elif data_type == IMAGE_TAG:
        return pd.DataFrame(data["arr_2"], columns = {"image_tag"})
    # if catagory data is requested
    elif data_type == CATAGORY:
        return pd.DataFrame(data["arr_1"], columns = {"catagory"})
    # if catagory_num is requested
    elif data_type == CATAGORY_NUM:
        # select data for the appropriate lookup table
        if   data_target == ARTIST: 
            query_string = "SELECT ID, NAME  AS VALUE FROM ARTIST"
        elif data_target == GENRE:
            query_string = "SELECT ID, GENRE AS VALUE FROM GENRE"
        else:
            query_string = "SELECT ID, STYLE AS VALUE FROM STYLE" 
        # build the lookup table
        lookup = pd.read_sql_query(query_string, conn)
        # get the catagory data
        data   = pd.DataFrame(data["arr_1"], columns = {"catagory"})
        # convert the catagory text labels to numeric catagory ids
        return pd.DataFrame(data["catagory"].apply(lambda x: lookup[lookup["VALUE"] == x]["id"].values[0]))
    # data requested
    elif data_group == COLOURS:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[1] * data["arr_0"].shape[2])))
    elif data_group == PALETTES or data_group == LRG_PALETTES:

        # extract the colour data
        colour_data = data["arr_0"]
        
        # find the datas dimensions
        records, _, palette_length, palette_layers = np.shape(colour_data)
        
        # reshape
        colour_data = colour_data.reshape(records * palette_length, palette_layers)
        
        # convert to a dataframe
        colour_data_df = pd.DataFrame(colour_data, columns =["r","g","b"])
        
        # process the data with different colour systems
        
        # hue, luminance, saturation
        if colour_system == HLS:
            
            # define a dataframe to hold the hls values
            hls_data_df = pd.DataFrame(columns =["h","l","s"])

            # loop through each record
            for row in colour_data_df.iterrows():
                
                # extract the r,g,b values and devide them by 255 ready to convert
                r, g, b = row[1].values/255

                # convert the values and add them to the dataframe
                hls_data_df.loc[len(hls_data_df)] = colorsys.rgb_to_hls(r, g, b)
            
            # create datasets and normalize
            hue        = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["h"]).reshape(records, palette_length)))
            luminance  = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["l"]).reshape(records, palette_length)))
            saturation = pd.DataFrame(preprocessing.normalize(np.array(hls_data_df["s"]).reshape(records, palette_length)))  
            
            # return values
            return hue, luminance, saturation
            
        # red, green, blue unnormalized
        elif colour_system == RAW:
            red   = pd.DataFrame(np.array(colour_data_df["r"]).reshape(records, palette_length))
            green = pd.DataFrame(np.array(colour_data_df["g"]).reshape(records, palette_length))
            blue  = pd.DataFrame(np.array(colour_data_df["b"]).reshape(records, palette_length))
            
            # return values
            return red, green, blue            
            
        # red, green, blue normalized- (default)
        else:
    
            # create datasets and normalize
            red   = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["r"]).reshape(records, palette_length)))
            green = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["g"]).reshape(records, palette_length)))
            blue  = pd.DataFrame(preprocessing.normalize(np.array(colour_data_df["b"]).reshape(records, palette_length)))
            
            # return values
            return red, green, blue

    elif data_group == IMAGE:
        # reshape the data and normalize
        return pd.DataFrame(preprocessing.normalize(data["arr_0"].reshape(data["arr_0"].shape[0],data["arr_0"].shape[2])))
    else:
        # normalize the data
        return pd.DataFrame(preprocessing.normalize(data["arr_0"])) 

## Test Area
Here various calls to the <code>read_data()</code> function are made to demonstrate the the data can be easily retrieved.

In [3]:
read_data(COLOURS, VALIDATE, STYLE, DATA)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,758,759,760,761,762,763,764,765,766,767
0,0.171125,0.171125,0.171125,0.049506,0.049506,0.049506,0.034053,0.034053,0.034053,0.021748,...,0.034626,0.050078,0.050078,0.050078,0.063528,0.063528,0.063528,0.523390,0.523390,0.523390
1,0.517343,0.516724,0.517343,0.060367,0.061809,0.061191,0.044709,0.044503,0.044503,0.039970,...,0.026990,0.022869,0.022663,0.021633,0.039558,0.039146,0.040176,0.214066,0.215302,0.215096
2,0.159141,0.159141,0.159141,0.039983,0.039983,0.039983,0.044724,0.044724,0.044724,0.061475,...,0.037138,0.031765,0.031765,0.031765,0.039983,0.039983,0.039983,0.533050,0.533050,0.533050
3,0.131443,0.131443,0.131443,0.041557,0.041557,0.041557,0.104047,0.104047,0.104047,0.187161,...,0.012313,0.008927,0.008927,0.008927,0.011698,0.011698,0.011698,0.162842,0.162842,0.162842
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.005690,0.004979,0.005690,0.001422,0.008535,0.004267,0.002134,0.011379,0.026315,0.015647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,0.319781,0.235047,0.214416,0.054525,0.043473,0.039052,0.036841,0.031683,0.040525,0.044209,...,0.016947,0.047894,0.019894,0.014736,0.039052,0.025052,0.008842,0.192311,0.149575,0.087682
336,0.116485,0.107329,0.229410,0.018821,0.017295,0.050867,0.021364,0.017295,0.051884,0.013225,...,0.019329,0.090035,0.023399,0.031538,0.092069,0.024925,0.031538,0.626682,0.180069,0.245688
337,0.025989,0.035807,0.019444,0.009626,0.018674,0.015208,0.007508,0.014053,0.016748,0.006160,...,0.019636,0.029069,0.022909,0.027722,0.088555,0.046588,0.032342,0.577339,0.632012,0.472036
338,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [4]:
read_data(IMAGE, VALIDATE, GENRE, DATA)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,0.006582,0.000000,0.000000,0.027896,0.000000,0.000000,0.000000,0.048670,0.001476,0.000000,...,0.000000,0.000000,0.011457,0.000000,0.000000,0.000000,0.023074,0.000000,0.001053,0.010834
1,0.016554,0.000000,0.000000,0.000000,0.000000,0.005379,0.000000,0.053873,0.027196,0.000000,...,0.025181,0.000000,0.000000,0.024900,0.000000,0.000000,0.000000,0.000000,0.000000,0.063102
2,0.024926,0.000000,0.010351,0.019364,0.007988,0.000000,0.000000,0.018069,0.005126,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.024854,0.000000
3,0.000659,0.037240,0.020711,0.020658,0.000000,0.000000,0.000000,0.062904,0.000000,0.000000,...,0.000000,0.000000,0.003306,0.000000,0.008102,0.000000,0.020000,0.016788,0.000000,0.001930
4,0.010102,0.018667,0.000000,0.044791,0.000000,0.000000,0.001539,0.062939,0.053216,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.006497,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.000000,0.000000,0.001913,0.000000,0.064242,0.006258,0.000000,0.000000,0.000000,0.001715,...,0.000038,0.022159,0.000000,0.014875,0.000000,0.018889,0.008885,0.000000,0.152083,0.021358
293,0.000000,0.000000,0.006344,0.000000,0.033495,0.001882,0.000000,0.000000,0.038717,0.000000,...,0.034648,0.000000,0.000000,0.017174,0.005278,0.000000,0.000000,0.000000,0.000000,0.028404
294,0.002087,0.032730,0.064136,0.000000,0.000029,0.033731,0.000000,0.030263,0.033711,0.002534,...,0.000000,0.007479,0.000000,0.000000,0.017531,0.000000,0.030721,0.000000,0.000000,0.000000
295,0.012742,0.002012,0.000000,0.062977,0.003823,0.000000,0.003756,0.009299,0.008343,0.000000,...,0.000000,0.000000,0.028342,0.000000,0.000000,0.004762,0.000000,0.000000,0.070075,0.000000


In [5]:
read_data(TILES, VALIDATE, ARTIST, DATA)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,0.027533,0.007490,0.134417,0.090102,0.013606,0.000000,0.017352,0.025166,0.048273,0.000000,...,0.000000,0.000000,0.117118,0.043346,0.002939,0.001855,0.0,0.000000,0.0,0.012089
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026183,0.000000,...,0.000000,0.000000,0.038961,0.020789,0.000000,0.000000,0.0,0.000000,0.0,0.000000
2,0.026003,0.000000,0.000000,0.101362,0.002888,0.002356,0.000000,0.108805,0.105373,0.000000,...,0.000000,0.024882,0.000000,0.000000,0.000000,0.022300,0.0,0.000000,0.0,0.000000
3,0.011170,0.000000,0.000000,0.038902,0.000000,0.014706,0.000000,0.031950,0.002399,0.003453,...,0.000000,0.000000,0.032753,0.000000,0.000360,0.000000,0.0,0.000000,0.0,0.000000
4,0.007799,0.000000,0.000000,0.021099,0.040591,0.001465,0.000000,0.050586,0.045270,0.000000,...,0.000000,0.000000,0.007436,0.000000,0.002665,0.000851,0.0,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19003,0.007957,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.145611,0.000000,...,0.000000,0.000000,0.096401,0.024799,0.000000,0.000000,0.0,0.001395,0.0,0.000000
19004,0.041427,0.000000,0.000000,0.015527,0.000000,0.000000,0.000000,0.018151,0.103429,0.000000,...,0.000000,0.013058,0.113551,0.000000,0.000000,0.000000,0.0,0.003936,0.0,0.028075
19005,0.009695,0.001144,0.000000,0.000000,0.011671,0.000000,0.000000,0.030620,0.101848,0.000000,...,0.001351,0.057842,0.087615,0.000452,0.000000,0.000000,0.0,0.000000,0.0,0.019122
19006,0.064459,0.000000,0.000000,0.007490,0.001542,0.000000,0.000000,0.001305,0.082158,0.005941,...,0.000000,0.070625,0.150234,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.101680


In [6]:
read_data(COLOURS, VALIDATE, STYLE, CATAGORY)

Unnamed: 0,catagory
0,expressionism
1,cubism
2,expressionism
3,surrealism
4,surrealism
...,...
335,expressionism
336,pop-art
337,romanticism
338,romanticism


In [7]:
read_data(IMAGE, VALIDATE, GENRE, CATAGORY)

Unnamed: 0,catagory
0,portrait
1,nude-painting
2,illustration
3,portrait
4,religious-painting
...,...
292,landscape
293,interior
294,portrait
295,genre-painting


In [8]:
read_data(TILES, VALIDATE, ARTIST, CATAGORY)

Unnamed: 0,catagory
0,M.C. Escher
1,M.C. Escher
2,M.C. Escher
3,M.C. Escher
4,M.C. Escher
...,...
19003,Karl Bodmer
19004,Karl Bodmer
19005,Karl Bodmer
19006,Karl Bodmer


In [9]:
read_data(TILES, VALIDATE, ARTIST, CATAGORY_NUM)

Unnamed: 0,catagory
0,9
1,9
2,9
3,9
4,9
...,...
19003,6
19004,6
19005,6
19006,6


In [10]:
read_data(COLOURS, VALIDATE, GENRE, IMAGE_TAG)

Unnamed: 0,image_tag
0,09_0025
1,09_0044
2,09_0056
3,09_0070
4,09_0088
...,...
292,12_0119
293,12_0132
294,06_0032
295,06_0055


In [11]:
read_data(TILES, VALIDATE, ARTIST, TILE_INDEX)

Unnamed: 0,tile_index
0,0
1,1
2,2
3,3
4,4
...,...
19003,59
19004,60
19005,61
19006,62


In [12]:
read_data(IMAGE, VALIDATE, ARTIST, IMAGE_TAG)

Unnamed: 0,image_tag
0,09_0025
1,09_0044
2,09_0056
3,09_0070
4,09_0088
...,...
292,12_0119
293,12_0132
294,06_0032
295,06_0055


In [13]:
r, g, b = read_data(PALETTES, VALIDATE, ARTIST, DATA)

In [14]:
r

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.031698,0.063396,0.079245,0.110944,0.142642,0.174340,0.221887,0.237736,0.269434,0.301132,0.316981,0.348680,0.364529,0.380378,0.396227
1,0.000000,0.018841,0.037682,0.056523,0.094206,0.113047,0.131888,0.150729,0.188411,0.207252,0.263776,0.301458,0.357981,0.395663,0.433346,0.471028
2,0.000000,0.015758,0.047275,0.078791,0.126066,0.157583,0.189100,0.204858,0.236374,0.252133,0.283649,0.315166,0.346682,0.378199,0.393957,0.393957
3,0.000000,0.019488,0.019488,0.038977,0.058465,0.097442,0.116930,0.155907,0.194883,0.214372,0.253348,0.292325,0.350790,0.389767,0.448232,0.487208
4,0.071497,0.089371,0.107246,0.125120,0.142994,0.160868,0.178743,0.196617,0.214491,0.250240,0.268114,0.303862,0.321737,0.357485,0.393234,0.428982
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.015621,0.015621,0.031242,0.062485,0.109348,0.140591,0.187454,0.234318,0.249939,0.265560,0.281181,0.312424,0.359287,0.359287,0.390530,0.390530
293,0.014566,0.058265,0.116531,0.145664,0.160230,0.189363,0.189363,0.218496,0.247628,0.276761,0.305894,0.305894,0.305894,0.364159,0.364159,0.364159
294,0.014139,0.056557,0.098975,0.141393,0.169672,0.212090,0.212090,0.254508,0.254508,0.268647,0.296925,0.311065,0.325204,0.325204,0.353483,0.353483
295,0.062431,0.093647,0.124863,0.140471,0.140471,0.171686,0.187294,0.202902,0.234118,0.265334,0.296549,0.312157,0.343373,0.358981,0.358981,0.374589


In [15]:
g

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.031698,0.063396,0.079245,0.110944,0.142642,0.174340,0.221887,0.237736,0.269434,0.301132,0.316981,0.348680,0.364529,0.380378,0.396227
1,0.000000,0.018841,0.037682,0.056523,0.094206,0.113047,0.131888,0.150729,0.188411,0.207252,0.263776,0.301458,0.357981,0.395663,0.433346,0.471028
2,0.000000,0.015758,0.047275,0.078791,0.126066,0.157583,0.189100,0.204858,0.236374,0.252133,0.283649,0.315166,0.346682,0.378199,0.393957,0.393957
3,0.000000,0.019488,0.019488,0.038977,0.058465,0.097442,0.116930,0.155907,0.194883,0.214372,0.253348,0.292325,0.350790,0.389767,0.448232,0.487208
4,0.054384,0.072512,0.090640,0.126896,0.145024,0.163152,0.181280,0.199408,0.217536,0.235664,0.271919,0.290047,0.326303,0.362559,0.398815,0.435071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.016418,0.032835,0.131342,0.065671,0.114924,0.164177,0.197013,0.246266,0.295519,0.131342,0.295519,0.328355,0.344772,0.377608,0.328355,0.410443
293,0.016595,0.066381,0.116168,0.149358,0.165954,0.215740,0.215740,0.248931,0.282121,0.215740,0.016595,0.298717,0.331907,0.365098,0.381693,0.414884
294,0.000000,0.031442,0.078606,0.110049,0.141491,0.172934,0.204376,0.220097,0.298704,0.251540,0.267261,0.345867,0.298704,0.314425,0.393031,0.393031
295,0.032564,0.065129,0.097693,0.130258,0.146540,0.162822,0.179105,0.195387,0.227951,0.260516,0.293080,0.325645,0.341927,0.358209,0.374492,0.390774


In [16]:
b

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.031698,0.063396,0.079245,0.110944,0.142642,0.174340,0.221887,0.237736,0.269434,0.301132,0.316981,0.348680,0.364529,0.380378,0.396227
1,0.000000,0.018841,0.037682,0.056523,0.094206,0.113047,0.131888,0.150729,0.188411,0.207252,0.263776,0.301458,0.357981,0.395663,0.433346,0.471028
2,0.000000,0.015758,0.047275,0.078791,0.126066,0.157583,0.189100,0.204858,0.236374,0.252133,0.283649,0.315166,0.346682,0.378199,0.393957,0.393957
3,0.000000,0.019488,0.019488,0.038977,0.058465,0.097442,0.116930,0.155907,0.194883,0.214372,0.253348,0.292325,0.350790,0.389767,0.448232,0.487208
4,0.072192,0.090240,0.108288,0.126337,0.144385,0.162433,0.180481,0.198529,0.216577,0.234625,0.270721,0.288769,0.324865,0.360961,0.397058,0.433154
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.017145,0.154303,0.085724,0.051434,0.120014,0.171448,0.222883,0.257172,0.394331,0.068579,0.308607,0.360041,0.308607,0.394331,0.034290,0.411476
293,0.037107,0.074214,0.148428,0.018554,0.185535,0.241196,0.315410,0.278303,0.352517,0.241196,0.000000,0.000000,0.389624,0.000000,0.389624,0.463839
294,0.000000,0.022033,0.044065,0.044065,0.066098,0.066098,0.176261,0.088131,0.330489,0.176261,0.088131,0.396587,0.154228,0.242359,0.506751,0.550816
295,0.036352,0.054527,0.072703,0.109055,0.145406,0.127231,0.163582,0.181758,0.218110,0.272637,0.308989,0.327165,0.345340,0.363516,0.381692,0.399868


In [17]:
read_data(PALETTES, VALIDATE, GENRE, IMAGE_TAG)

Unnamed: 0,image_tag
0,09_0025
1,09_0044
2,09_0056
3,09_0070
4,09_0088
...,...
292,12_0119
293,12_0132
294,06_0032
295,06_0055


In [18]:
h, l, s = read_data(PALETTES, VALIDATE, ARTIST, DATA, HLS)

In [19]:
h

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.577350,0.577350,0.577350,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.000000,0.435617,0.281043,0.112417,0.000000,0.337252,0.449669,0.000000,0.417550,0.034590,0.000000,0.449669,0.067450,0.000000,0.087979,0.112417
293,0.398902,0.000000,0.498627,0.088645,0.000000,0.000000,0.398902,0.000000,0.398902,0.000000,0.004749,0.085479,0.498627,0.087758,0.049863,0.000000
294,0.000000,0.113850,0.204929,0.213468,0.227699,0.227699,0.243964,0.243964,0.426936,0.248399,0.261185,0.341549,0.256162,0.256162,0.341549,0.000000
295,0.000000,0.117192,0.175788,0.234383,0.351575,0.263681,0.234383,0.234383,0.234383,0.175788,0.175788,0.351575,0.234383,0.234383,0.351575,0.351575


In [20]:
l

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.031698,0.063396,0.079245,0.110944,0.142642,0.174340,0.221887,0.237736,0.269434,0.301132,0.316981,0.348680,0.364529,0.380378,0.396227
1,0.000000,0.018841,0.037682,0.056523,0.094206,0.113047,0.131888,0.150729,0.188411,0.207252,0.263776,0.301458,0.357981,0.395663,0.433346,0.471028
2,0.000000,0.015758,0.047275,0.078791,0.126066,0.157583,0.189100,0.204858,0.236374,0.252133,0.283649,0.315166,0.346682,0.378199,0.393957,0.393957
3,0.000000,0.019488,0.019488,0.038977,0.058465,0.097442,0.116930,0.155907,0.194883,0.214372,0.253348,0.292325,0.350790,0.389767,0.448232,0.487208
4,0.063012,0.081015,0.099019,0.126024,0.144027,0.162031,0.180034,0.198038,0.216041,0.243046,0.270051,0.297056,0.324061,0.360068,0.396075,0.432082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.016783,0.083915,0.083915,0.058741,0.117481,0.159439,0.209788,0.251746,0.327269,0.176222,0.302095,0.344052,0.344052,0.386010,0.226571,0.411184
293,0.025858,0.068955,0.129291,0.094813,0.181008,0.224105,0.258582,0.258582,0.310299,0.275821,0.181008,0.181008,0.353396,0.215485,0.396493,0.430970
294,0.008845,0.044224,0.079602,0.106137,0.132671,0.159205,0.203428,0.194584,0.300720,0.238807,0.221118,0.353788,0.265341,0.300720,0.424546,0.442236
295,0.050450,0.075676,0.100901,0.126126,0.142943,0.151351,0.176577,0.193393,0.227027,0.269069,0.302703,0.319520,0.344745,0.361562,0.369970,0.386787


In [21]:
s

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.667230,0.518957,0.424601,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.194609,0.000000,0.259478,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.000000,0.448852,0.336639,0.080152,0.000000,0.029530,0.022443,0.000000,0.327288,0.347326,0.000000,0.056107,0.280533,0.000000,0.537688,0.280533
293,0.155569,0.000000,0.031114,0.381852,0.022224,0.000000,0.088897,0.000000,0.062228,0.147381,0.466708,0.466708,0.046671,0.466708,0.373366,0.000000
294,0.415285,0.249171,0.230714,0.276857,0.249171,0.276857,0.126391,0.264272,0.097714,0.190339,0.282394,0.151013,0.316408,0.293142,0.276857,0.000000
295,0.332667,0.332667,0.332667,0.199600,0.058706,0.221778,0.142572,0.130174,0.124750,0.105053,0.133067,0.153539,0.299401,0.374251,0.285144,0.399201


In [22]:
r, g, b = read_data(LRG_PALETTES, VALIDATE, ARTIST, DATA)

In [23]:
r

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0.000000,0.000000,0.001335,0.002003,0.003338,0.005340,0.007343,0.009346,0.011348,0.013351,...,0.156873,0.158875,0.160211,0.161546,0.163548,0.165551,0.167554,0.168889,0.168889,0.170224
1,0.000000,0.000752,0.001505,0.001505,0.003010,0.003762,0.006019,0.008277,0.009782,0.011286,...,0.176069,0.178326,0.181336,0.183593,0.185850,0.187355,0.188860,0.189613,0.190365,0.191117
2,0.000000,0.000682,0.001363,0.001363,0.002045,0.003408,0.004090,0.004771,0.005453,0.005453,...,0.166313,0.167676,0.168358,0.169039,0.169721,0.171084,0.171766,0.172447,0.173129,0.173129
3,0.000000,0.000000,0.000828,0.001657,0.002485,0.003313,0.004142,0.004970,0.005798,0.006627,...,0.182238,0.185552,0.188865,0.193007,0.197149,0.200462,0.204604,0.207089,0.208745,0.210402
4,0.020319,0.024834,0.027844,0.029349,0.030854,0.031607,0.031607,0.033112,0.034617,0.034617,...,0.161798,0.164055,0.167818,0.170828,0.174591,0.174591,0.179106,0.182117,0.185879,0.190395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.000692,0.000692,0.001383,0.001383,0.002075,0.003459,0.003459,0.006917,0.006917,0.007609,...,0.164629,0.166013,0.167396,0.168088,0.171546,0.171546,0.172238,0.172238,0.173621,0.175005
293,0.000628,0.000628,0.000628,0.001255,0.005022,0.005649,0.012554,0.016321,0.018831,0.020715,...,0.158185,0.158185,0.158812,0.158812,0.158812,0.159440,0.159440,0.159440,0.159440,0.159440
294,0.000589,0.005893,0.010608,0.016501,0.018269,0.024163,0.025931,0.031235,0.034182,0.034771,...,0.142030,0.142030,0.143209,0.143799,0.145567,0.145567,0.147924,0.149103,0.149103,0.149692
295,0.013233,0.019188,0.025142,0.026466,0.027127,0.031759,0.034405,0.035729,0.037714,0.039698,...,0.150854,0.152177,0.152839,0.154824,0.156147,0.158132,0.159456,0.160117,0.160779,0.162102


In [24]:
g

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0.000000,0.000000,0.001335,0.002003,0.003338,0.005340,0.007343,0.009346,0.011348,0.013351,...,0.156873,0.158875,0.160211,0.161546,0.163548,0.165551,0.167554,0.168889,0.168889,0.170224
1,0.000000,0.000000,0.000754,0.002263,0.003018,0.003772,0.005281,0.008298,0.009807,0.011316,...,0.175774,0.178792,0.181055,0.183318,0.186336,0.187845,0.189354,0.190862,0.190862,0.191617
2,0.000000,0.000682,0.001363,0.001363,0.002045,0.003408,0.004090,0.004771,0.005453,0.005453,...,0.166313,0.167676,0.168358,0.169039,0.169721,0.171084,0.171766,0.172447,0.173129,0.173129
3,0.000000,0.000000,0.000828,0.001657,0.002485,0.003313,0.004142,0.004970,0.005798,0.006627,...,0.182238,0.185552,0.188865,0.193007,0.197149,0.200462,0.204604,0.207089,0.208745,0.210402
4,0.014515,0.020626,0.022918,0.025210,0.022918,0.025974,0.028265,0.029029,0.027501,0.030557,...,0.164245,0.166537,0.169592,0.173412,0.177232,0.182579,0.181051,0.185635,0.190218,0.194038
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.001457,0.016026,0.001457,0.002185,0.004371,0.005099,0.044436,0.013841,0.024768,0.038608,...,0.174101,0.171916,0.163175,0.154433,0.157347,0.174830,0.148605,0.179929,0.152976,0.184300
293,0.000696,0.001392,0.003480,0.013921,0.008353,0.006264,0.011833,0.017401,0.022274,0.024362,...,0.157308,0.167053,0.151739,0.164268,0.170533,0.151739,0.164964,0.171925,0.176797,0.176797
294,0.000000,0.002046,0.006138,0.005456,0.012276,0.017733,0.008184,0.025235,0.034101,0.017051,...,0.138451,0.158230,0.149364,0.157548,0.149364,0.165732,0.171189,0.173235,0.173235,0.173235
295,0.009000,0.014538,0.020077,0.015231,0.024231,0.024923,0.020769,0.030461,0.026307,0.035307,...,0.152998,0.154383,0.156460,0.157152,0.159229,0.160614,0.161998,0.161998,0.163383,0.164768


In [25]:
b

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0.000000,0.000000,0.001335,0.002003,0.003338,0.005340,0.007343,0.009346,0.011348,0.013351,...,0.156873,0.158875,0.160211,0.161546,0.163548,0.165551,0.167554,0.168889,0.168889,0.170224
1,0.000000,0.000000,0.000754,0.001508,0.002262,0.003769,0.006031,0.008293,0.009801,0.011308,...,0.176409,0.178671,0.180933,0.183948,0.186210,0.187718,0.189225,0.189979,0.190733,0.191487
2,0.000000,0.000682,0.001363,0.001363,0.002045,0.003408,0.004090,0.004771,0.005453,0.005453,...,0.166313,0.167676,0.168358,0.169039,0.169721,0.171084,0.171766,0.172447,0.173129,0.173129
3,0.000000,0.000000,0.000828,0.001657,0.002485,0.003313,0.004142,0.004970,0.005798,0.006627,...,0.182238,0.185552,0.188865,0.193007,0.197149,0.200462,0.204604,0.207089,0.208745,0.210402
4,0.016054,0.022170,0.025228,0.026756,0.025992,0.028285,0.029814,0.030579,0.030579,0.032108,...,0.163597,0.165890,0.168948,0.172006,0.175064,0.181179,0.179651,0.183473,0.187295,0.192647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.001445,0.002167,0.020224,0.036115,0.068618,0.052727,0.028892,0.087398,0.012279,0.007223,...,0.172628,0.163961,0.138680,0.124235,0.070785,0.150237,0.002167,0.170461,0.028169,0.181296
293,0.000711,0.023463,0.057590,0.097406,0.040526,0.006399,0.011376,0.019197,0.034839,0.007110,...,0.164239,0.175615,0.000711,0.141487,0.149308,0.155707,0.001422,0.179170,0.161395,0.180592
294,0.002064,0.004127,0.007223,0.006191,0.011350,0.014446,0.007223,0.016510,0.025796,0.011350,...,0.084612,0.204306,0.141363,0.183669,0.110408,0.221847,0.235261,0.246612,0.255898,0.262089
295,0.008655,0.013377,0.017311,0.012590,0.023606,0.020458,0.015737,0.026753,0.020458,0.031474,...,0.159733,0.160520,0.162093,0.162880,0.164454,0.166815,0.169175,0.167601,0.170749,0.172323


In [26]:
r, g, b = read_data(LRG_PALETTES, VALIDATE, ARTIST, DATA, RAW)

In [27]:
r

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0,0,2,3,5,8,11,14,17,20,...,235,238,240,242,245,248,251,253,253,255
1,0,1,2,2,4,5,8,11,13,15,...,234,237,241,244,247,249,251,252,253,254
2,0,1,2,2,3,5,6,7,8,8,...,244,246,247,248,249,251,252,253,254,254
3,0,0,1,2,3,4,5,6,7,8,...,220,224,228,233,238,242,247,250,252,254
4,27,33,37,39,41,42,42,44,46,46,...,215,218,223,227,232,232,238,242,247,253
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,1,1,2,2,3,5,5,10,10,11,...,238,240,242,243,248,248,249,249,251,253
293,1,1,1,2,8,9,20,26,30,33,...,252,252,253,253,253,254,254,254,254,254
294,1,10,18,28,31,41,44,53,58,59,...,241,241,243,244,247,247,251,253,253,254
295,20,29,38,40,41,48,52,54,57,60,...,228,230,231,234,236,239,241,242,243,245


In [28]:
g

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0,0,2,3,5,8,11,14,17,20,...,235,238,240,242,245,248,251,253,253,255
1,0,0,1,3,4,5,7,11,13,15,...,233,237,240,243,247,249,251,253,253,254
2,0,1,2,2,3,5,6,7,8,8,...,244,246,247,248,249,251,252,253,254,254
3,0,0,1,2,3,4,5,6,7,8,...,220,224,228,233,238,242,247,250,252,254
4,19,27,30,33,30,34,37,38,36,40,...,215,218,222,227,232,239,237,243,249,254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,2,22,2,3,6,7,61,19,34,53,...,239,236,224,212,216,240,204,247,210,253
293,1,2,5,20,12,9,17,25,32,35,...,226,240,218,236,245,218,237,247,254,254
294,0,3,9,8,18,26,12,37,50,25,...,203,232,219,231,219,243,251,254,254,254
295,13,21,29,22,35,36,30,44,38,51,...,221,223,226,227,230,232,234,234,236,238


In [29]:
b

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,86,87,88,89,90,91,92,93,94,95
0,0,0,2,3,5,8,11,14,17,20,...,235,238,240,242,245,248,251,253,253,255
1,0,0,1,2,3,5,8,11,13,15,...,234,237,240,244,247,249,251,252,253,254
2,0,1,2,2,3,5,6,7,8,8,...,244,246,247,248,249,251,252,253,254,254
3,0,0,1,2,3,4,5,6,7,8,...,220,224,228,233,238,242,247,250,252,254
4,21,29,33,35,34,37,39,40,40,42,...,214,217,221,225,229,237,235,240,245,252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,2,3,28,50,95,73,40,121,17,10,...,239,227,192,172,98,208,3,236,39,251
293,1,33,81,137,57,9,16,27,49,10,...,231,247,1,199,210,219,2,252,227,254
294,2,4,7,6,11,14,7,16,25,11,...,82,198,137,178,107,215,228,239,248,254
295,11,17,22,16,30,26,20,34,26,40,...,203,204,206,207,209,212,215,213,217,219
