# Abordagem 1 - Extrator de paleta de cores - Algoritmo de clusterização

## 1. Encontrar arquivos para extrair paleta

In [None]:
# artistName = 'frida-kahlo'
# folderName = 'dataset_artistas/' + artistName
folderName = 'dataset_artistas\\frida-kahlo'
# folderName = 'dataset_artistas\\andy-warhol\\1960'

fileList = []
for paths, dirs, files in os.walk(folderName):
    for file in files:
        if file.endswith(".jpg"):
                id = os.path.splitext(file)[0]
                fileList.append((os.path.join(paths, file), id))
                # fileList.append(os.path.join(paths, file))

fileList


## 2. Extrair paleta

In [None]:
# Contar a dominancia de cada cor da paleta
def dominaciaCores(labels):

    counter = Counter(labels)
    dominaceList = sorted([(i, counter[i] / len(labels) * 100.0) for i in counter])
    return [i[1] for i in dominaceList ]

In [None]:
def extratorPaleta(filePath, id):

    #Ler o arquivo de imagem
    image = img.imread(filePath)

    #Constroi dataframe de cores
    df = pd.DataFrame()
    df['r']=pd.Series(image[:,:,0].flatten())
    df['g']=pd.Series(image[:,:,1].flatten())
    df['b']=pd.Series(image[:,:,2].flatten())
    df['r_whiten'] = whiten(df['r'])
    df['g_whiten'] = whiten(df['g'])
    df['b_whiten'] = whiten(df['b'])

    kmeans = KMeans(n_clusters=5, random_state=0).fit(df[['r_whiten', 'g_whiten', 'b_whiten']])

    cluster_centers = kmeans.cluster_centers_

    r_std, g_std, b_std = df[['r', 'g', 'b']].std()
    
    #Calcula cores finais da paleta baseada no centroid do cluster
    colors=[]
    for color in cluster_centers:
        sr, sg, sb = color
        colors.append((int(sr*r_std), int(sg*g_std), int(sb*b_std)))

    #Calcula o percentual de dominacia de cada cor
    dominacia = dominaciaCores(kmeans.labels_)

    return [id, colors, dominacia]


In [None]:
#Extraindo a paleta de cores de todos quadros

dadosCores = []
for item in fileList:
    dadosCores.append(extratorPaleta(item[0], item[1]))

dadosCores


In [None]:
#Testando multiprocessing em python
if __name__ == '__main__':

    PROCESSES = 4
    pool = mp.Pool(PROCESSES)
    dadosCores = pool.starmap(paleta_multiprocessing.extratorPaleta, fileList)
       

    # process_pool = mp.Pool(3)
    # data = [(1,3),(3,2),(4,6)]
    # output = process_pool.starmap(paleta_multiprocessing.f_sum,data)
    # output

## 3. Salva em arquivo

In [None]:
dfPaleta = pd.DataFrame(dadosCores, columns = ['contentId', 'paleta_rgb', 'paleta_dominancia'])
dfPaleta
dfPaleta.to_csv('paleta.csv', index=False)

In [None]:
plt.imshow([dfPaleta['paleta_rgb'][0]])
plt.show()




In [None]:
plt.imshow([dfPaleta['paleta_rgb'][86]])
plt.show()


# Abordagem 2 - Classificar de acordo com paleta - Algoritmo de classificação

In [38]:
import os
from os.path import isfile, join
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from collections import Counter
from scipy.cluster.vq import whiten
from matplotlib import pyplot as plt
from matplotlib import image as img
import multiprocessing as mp
import paleta_multiprocessing
from colormap import rgb2hex
from colormap import hex2rgb


## 1. Definição da paleta

In [48]:
paletaCores = pd.read_csv('paleta_cores.csv')
# paletaCores
paletaCores['hex'] = paletaCores.apply( lambda row: rgb2hex(row.r, row.g, row.b), axis=1 )
# paletaCores['hex'] = paletaCores.apply( lambda row: print(row)) rgb2hex(row.r, row.g, row.b)
paletaCores

Unnamed: 0,nome,r,g,b,hex
0,white,255,255,255,#FFFFFF
1,ivory,255,244,216,#FFF4D8
2,cream,255,241,182,#FFF1B6
3,lightyellowglaze,243,239,111,#F3EF6F
4,cadmiumyellowlemon,235,234,100,#EBEA64
...,...,...,...,...,...
115,Payne´sgrey,72,78,85,#484E55
116,black,5,5,5,#050505
117,silver,171,173,176,#ABADB0
118,gold,205,192,141,#CDC08D


## 2. Classificação de acordo com a paleta

In [49]:
# Criando classificador baseado na paleta
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(paletaCores[['r', 'g', 'b']])

NearestNeighbors(n_neighbors=1)

In [50]:
#Calcula a frequencia de cores de um obra
def calculoImagem(path, classificador):
    image = img.imread(path)

    #Constroi dataframe de cores
    df = pd.DataFrame()
    df['r']=pd.Series(image[:,:,0].flatten())
    df['g']=pd.Series(image[:,:,1].flatten())
    df['b']=pd.Series(image[:,:,2].flatten())

    colorClassification = classificador.kneighbors(df[['r', 'g', 'b']],return_distance=False)
    return colorClassification.flatten()

In [51]:
def calculaFreqCores(arrayFrequencia):

    #Calcula frequencia e porcentagem de cores
    unique, counts = np.unique(arrayFrequencia, return_counts=True)
    frequenciaCor = np.asarray((unique, counts)).T

    normalizedFreq = np.zeros(len(paletaCores))

    # for index, count in frequenciaCor:
    #     normalizedFreq[index] = round(count/ len(arrayFrequencia), 4)
    

    for index, count in frequenciaCor:
       normalizedFreq[index] = count   

    return normalizedFreq    


In [52]:
nomeArtista = 'frida-kahlo'
pathArtista = 'dataset_artistas\\' + nomeArtista + '\\'
anos = os.listdir(pathArtista)

freqAnos = []
for ano in anos:
    
    #Calcular a frequencia de cores de obras de um ano
    pathAnos = pathArtista + ano
    listaObras = [join(pathAnos, f) for f in os.listdir(pathAnos) if isfile(join(pathAnos, f))]

    classificoesAno = []
    for pathObra in listaObras:
        imagemClassificacao = calculoImagem(pathObra, neigh)
        classificoesAno.append(imagemClassificacao)
        
    frequenciasCoresAno = np.concatenate(classificoesAno)
    freqAnos.append(calculaFreqCores(frequenciasCoresAno))



In [53]:
# freqAnos
freqAnosArray = np.stack(freqAnos, axis=1)
freqAnosArray.shape

(120, 28)

## 3. Salvar em arquivo

In [55]:
import plotly.graph_objects as go

In [57]:
# import plotly.express as px
# df = px.data.gapminder()
# fig = px.area(df, x="year", y="pop", color="continent",
# 	      line_group="country")
# fig.show()



x=anos
fig = go.Figure()

for index in range(len(freqAnosArray)):
    print(index, freqAnosArray[index])
    fig.add_trace(go.Scatter(
        x=x, y=freqAnosArray[index],
        mode='lines',
        
        line=dict(width=0.5, color=paletaCores['hex'][index]),
        fillcolor = paletaCores['hex'][index],
        stackgroup='one',
        groupnorm='percent' # sets the normalization for the sum of the stackgroup
    ))


fig.update_layout(
    showlegend=True,
    xaxis_type='category',
    yaxis=dict(
        type='linear',
        range=[1, 100],
        ticksuffix='%'))

fig.show()

0 [3.648500e+04 1.360600e+04 1.077100e+04 6.856000e+04 1.126900e+05
 3.190200e+04 1.487500e+04 7.273500e+04 4.857100e+04 1.340000e+02
 1.805550e+05 2.803520e+05 5.363080e+05 1.541410e+05 1.066510e+05
 1.772430e+05 1.450194e+06 3.475660e+05 2.683200e+04 2.925690e+05
 4.802200e+04 3.669200e+04 8.890000e+03 1.942000e+04 5.939800e+04
 1.466300e+05 5.370000e+03 2.987030e+05]
1 [2.50172e+05 5.98000e+02 3.77500e+03 4.62178e+05 4.51070e+04 1.15548e+05
 3.21930e+04 5.57790e+04 9.42800e+04 1.73800e+03 2.48895e+05 2.02288e+05
 3.60017e+05 5.95986e+05 6.62766e+05 1.03654e+05 1.88779e+05 8.17092e+05
 1.22070e+04 4.02123e+05 5.82130e+04 1.30669e+05 4.00520e+04 1.04650e+05
 1.05474e+05 5.24960e+04 1.08240e+04 1.29946e+05]
2 [  84546.   20154.   15391.  273331.    9643.   60190.   82295.  203624.
   18292.   15433.   72945.   69295.  507476.  110664.  238818.   12262.
   16057. 1280499.    2726.   53727.   99596.  228681.    8190.   68403.
  164957.   31761.    5466.   50391.]
3 [2.50000e+01 1.48380e+

# Testes

In [None]:
dfList = []
# dfList.append(pd.read_json('dataset_artistas/andy-warhol.json', orient='records'))
# dfList.append(pd.read_json('dataset_artistas/frida-kahlo.json', orient='records'))
# dfList.append(pd.read_json('dataset_artistas/sandro-botticelli.json', orient='records'))
# dfList.append(pd.read_json('dataset_artistas/vincent-van-gogh.json', orient='records'))
dfList.append(pd.read_json('dataset_artistas/pablo-picasso.json', orient='records'))

df = pd.concat(dfList, ignore_index=True)
# df.head()


df[df['genre'] != 'sketch and study']['completitionYear'].value_counts()



# df[df.duplicated(['artistName'], keep=False)]


In [None]:
# Informacoes do artista

dfListArtist = pd.read_json('dataset_artistas/artists.json', orient='records')
dfListArtist

