# Extrator de paleta de cores

In [2]:
from os import walk
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter
from scipy.cluster.vq import whiten
from matplotlib import pyplot as plt
from matplotlib import image as img

## 1. Encontrar arquivos para extrair paleta

In [4]:
# artistName = 'frida-kahlo'
# folderName = 'dataset_artistas/' + artistName
# folderName = 'dataset_artistas'
folderName = 'dataset_artistas\\andy-warhol\\1960'

fileList = {}
for paths, dirs, files in walk(folderName):
    for file in files:
        if file.endswith(".jpg"):
                fileList[os.path.splitext(file)[0]] = os.path.join(paths, file)
                # fileList.append(os.path.join(paths, file))

fileList


{'300220': 'dataset_artistas\\andy-warhol\\1960\\300220.jpg',
 '306859': 'dataset_artistas\\andy-warhol\\1960\\306859.jpg'}

## 2. Extrair paleta

In [6]:
# Contar a dominancia de cada cor da paleta
def dominaciaCores(labels):

    counter = Counter(labels)
    dominaceList = sorted([(i, counter[i] / len(labels) * 100.0) for i in counter])
    return [i[1] for i in dominaceList ]

In [7]:
def extratorPaleta(filePath):

    #Ler o arquivo de imagem
    image = img.imread(filePath)

    #Constroi dataframe de cores
    df = pd.DataFrame()
    df['r']=pd.Series(image[:,:,0].flatten())
    df['g']=pd.Series(image[:,:,1].flatten())
    df['b']=pd.Series(image[:,:,2].flatten())
    df['r_whiten'] = whiten(df['r'])
    df['g_whiten'] = whiten(df['g'])
    df['b_whiten'] = whiten(df['b'])

    kmeans = KMeans(n_clusters=5, random_state=0).fit(df[['r_whiten', 'g_whiten', 'b_whiten']])

    cluster_centers = kmeans.cluster_centers_

    r_std, g_std, b_std = df[['r', 'g', 'b']].std()
    
    #Calcula cores finais da paleta baseada no centroid do cluster
    colors=[]
    for color in cluster_centers:
        sr, sg, sb = color
        colors.append((int(sr*r_std), int(sg*g_std), int(sb*b_std)))

    #Calcula o percentual de dominacia de cada cor
    dominacia = dominaciaCores(kmeans.labels_)

    return (colors, dominacia)


In [9]:
#Extraindo a paleta de cores de todos quadros

dadosCores = []
for id, path in fileList.items():
    dadosPaleta = extratorPaleta(path)
    dadosCores.append([id, dadosPaleta[0], dadosPaleta[1]])


Unnamed: 0,contentId,paleta_rgb,paleta_dominancia
0,300220,"[(236, 228, 209), (210, 178, 131), (102, 96, 7...","[78.06201550387597, 5.710594315245478, 4.37168..."
1,306859,"[(238, 232, 204), (84, 62, 43), (204, 202, 184...","[58.225507920110196, 9.573379390495868, 17.659..."


## 3. Salva em arquivo

In [None]:
dfPaleta = pd.DataFrame(dadosCores, columns = ['contentId', 'paleta_rgb', 'paleta_dominancia'])
dfPaleta.to_csv('paleta.csv', index=False)

# Testes

In [21]:
dfList = []
dfList.append(pd.read_json('dataset_artistas/andy-warhol.json', orient='records'))
dfList.append(pd.read_json('dataset_artistas/frida-kahlo.json', orient='records'))
dfList.append(pd.read_json('dataset_artistas/sandro-botticelli.json', orient='records'))
dfList.append(pd.read_json('dataset_artistas/vincent-van-gogh.json', orient='records'))
dfList.append(pd.read_json('dataset_artistas/pablo-picasso.json', orient='records'))

df = pd.concat(dfList, ignore_index=True)
# df.head()


df[df.duplicated(['artistName'], keep=False)]


Unnamed: 0,title,contentId,artistContentId,artistName,completitionYear,yearAsString,width,image,height,artistUrl,...,technique,sizeX,sizeY,diameter,auction,yearOfTrade,lastPrice,galleryName,tags,description
0,A Cat Named Sam,229254,229251,Warhol Andy,1954.0,1954,335,https://uploads7.wikiart.org/images/andy-warho...,448,andy-warhol,...,,,,,,,,,"animals, cats, Cat, Felidae, Fictional character",
1,Fairy and Christmas Ornaments,301919,229251,Warhol Andy,1955.0,1955,465,https://uploads8.wikiart.org/images/andy-warho...,600,andy-warhol,...,,,,,,,,,,
2,Cherub and Horse,300199,229251,Warhol Andy,1956.0,1956,570,https://uploads4.wikiart.org/images/andy-warho...,452,andy-warhol,...,,,,,,,,,,
3,Advertisement for I. Miller,9223372032559859394,229251,Warhol Andy,1958.0,1958,660,https://uploads3.wikiart.org/00313/images/endi...,479,andy-warhol,...,,,,,,,,,Text,
4,Shoe Advertisement for I.Miller (Harper's Bazzar),9223372032559859125,229251,Warhol Andy,1958.0,1958,660,https://uploads1.wikiart.org/00313/images/endi...,480,andy-warhol,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3560,Chateau Mouton Rothschild,9223372032559859135,223667,Picasso Pablo,1973.0,1973,463,https://uploads2.wikiart.org/00313/images/pabl...,706,pablo-picasso,...,,,,,,,,,"Text, Font",
3561,Bild,223917,223667,Picasso Pablo,,,796,https://uploads0.wikiart.org/images/pablo-pica...,1088,pablo-picasso,...,,,,,,,,,,
3562,Head of the man,224020,223667,Picasso Pablo,,,864,https://uploads0.wikiart.org/images/pablo-pica...,1088,pablo-picasso,...,,65.0,81.0,,,,,,male-portraits,
3563,Mule,224159,223667,Picasso Pablo,,,1088,https://uploads7.wikiart.org/images/pablo-pica...,657,pablo-picasso,...,,,,,,,,,"animals, mules, Tree, Bovine",
