In [2]:
import pandas as pd
import pickle
from colormap import hex2rgb
import numpy as np
from collections import Counter, defaultdict
from tqdm.notebook import trange, tqdm
from sklearn.cluster import AgglomerativeClustering
import json

# Split into columns

In [3]:
df = pd.read_csv("../FinalDataSetCreation/FinalDataset_with_all.csv")
df = df.drop(["Unnamed: 0"],axis = 1)
#df.to_csv("neo4j_data.csv")

In [4]:
df.head()

Unnamed: 0,id,media,url,artist_name,artist_url,work_name,work_year,media_long,dimension,gallery_name,...,education,location,color1,color2,color3,color4,color_group1,color_group2,color_group3,color_group4
0,painting15,painting,https://www.artsy.net/artwork/hebru-brantley-d...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,"Death Of The Black Fairy, The Great Debate Pt. 5",2020,Acrylic & Diamond Dust on Canvas,60 × 60 in | 152.4 × 152.4 cm,Corridor Contemporary,...,Clark Atlanta University,Chicago,#E7DFCC,#A69784,#403931,#877C76,69,24,192,246
1,prints369,prints,https://www.artsy.net/artwork/hebru-brantley-f...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Flower N Word,2022,"22-color Hand-Pulled Screen Print, Saunders Wa...",40 × 30 in | 101.6 × 76.2 cm,Pinto Gallery,...,Clark Atlanta University,Chicago,,,,,-1,-1,-1,-1
2,prints826,prints,https://www.artsy.net/artwork/hebru-brantley-m...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Monoprint,2017,Hand-Pulled Screen Print on Mohawk Superfine U...,24 × 18 in | 61 × 45.7 cm,MODCLAIR,...,Clark Atlanta University,Chicago,,,,,-1,-1,-1,-1
3,photography133,photography,https://www.artsy.net/artwork/hebru-brantley-n...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Night Flight,2021,Digital print with screen printed glow in the ...,29 × 36 in | 73.7 × 91.4 cm,Heritage: Urban Art,...,Clark Atlanta University,Chicago,,,,,-1,-1,-1,-1
4,painting37,painting,https://www.artsy.net/artwork/miaz-brothers-ma...,Miaz Brothers,https://www.artsy.net/artist/miaz-brothers,Maestro,2022,Acrylic on canvas,40 1/5 × 29 9/10 in | 102 × 76 cm,Maddox Gallery,...,European Institute of Design,Cyan,#181A15,#2F342E,#43492F,#B8A36C,121,280,147,12


In [5]:
with open('./all_artwork_palettes.pickle','rb') as pickle_file:
    content = pickle.load(pickle_file)

In [6]:
color_list = []
for id in content.keys():
    color_list.append([id]+list(content[id]['c_code'])[:4])

In [7]:
color_df = pd.DataFrame(color_list, columns = ['id', 'color1', 'color2','color3','color4'])
color_df.to_csv("ColorPaletteData.csv")

In [8]:
color_df

Unnamed: 0,id,color1,color2,color3,color4
0,photography560,#FFFFFF,#6E6F69,#DAD4BE,#9A968A
1,mixed-media1297,#151515,#9BC53F,#E5E5E3,#C74138
2,photography574,#F8F8F8,#949399,#D1D1D1,#68696E
3,prints1038,#FFFFFF,#C5963E,#E6E5A5,#E4B4B0
4,works-on-paper1080,#597739,#4F6942,#595731,#3B533D
...,...,...,...,...,...
2028,photography1214,#E0F3FA,#181913,#38424E,#575852
2029,painting534,#FAFAFA,#252324,#CEB000,#FFEA00
2030,prints1021,#EBECE6,#F5AC8C,#F05B33,#900A25
2031,works-on-paper977,#F6E6C2,#EBEFF0,#777753,#F9DF92


# Add color group

In [9]:
colors = color_df.iloc[:, -4:]
all_colors = [a for l in [colors[c].unique() for c in colors.columns] for a in l if isinstance(a, str)]
print("total number of hexcodes: ", len(all_colors))
all_colors = list(set(all_colors))
print("total number of unique hexcodes: ", len(all_colors))

total number of hexcodes:  6725
total number of unique hexcodes:  6230


In [10]:
colors.head()

Unnamed: 0,color1,color2,color3,color4
0,#FFFFFF,#6E6F69,#DAD4BE,#9A968A
1,#151515,#9BC53F,#E5E5E3,#C74138
2,#F8F8F8,#949399,#D1D1D1,#68696E
3,#FFFFFF,#C5963E,#E6E5A5,#E4B4B0
4,#597739,#4F6942,#595731,#3B533D


In [11]:
def dist(hex1, hex2):
    hex1 = hex1.lstrip("#")
    hex2 = hex2.lstrip("#")
    #get red/green/blue int values of hex1
    r1, g1, b1 = tuple(int(hex1[i:i+2], 16) for i in (0, 2, 4))
    # get red/green/blue int values of hex2
    r2, g2, b2 = tuple(int(hex2[i:i+2], 16) for i in (0, 2, 4))
    # calculate differences between reds, greens and blues
    r = 255 - abs(r1 - r2);
    g = 255 - abs(g1 - g2);
    b = 255 - abs(b1 - b2);
    # limit differences between 0 and 1
    r /= 255;
    g /= 255;
    b /= 255;
    # 0 means opposite colors, 1 means same colors
    return (r + g + b) / 3;


In [12]:
N = len(all_colors)

pdist = np.ones((N, N)) # initialize as ones because each color to itself should have similarity = 1

for i in tqdm(range(N)):
    for j in range(i + 1, N):
        if isinstance(all_colors[i], str) and isinstance(all_colors[j], str):
            pdist[i,j] = dist(all_colors[i], all_colors[j])
            pdist[j,i] = pdist[i,j]

  0%|          | 0/6230 [00:00<?, ?it/s]

In [13]:
clustering = AgglomerativeClustering(n_clusters=1000, affinity='precomputed', linkage='complete').fit(pdist)

In [14]:
color_groups = defaultdict(list)
for idx, label in enumerate(clustering.labels_):
    color_groups[label].append(all_colors[idx])

In [15]:
code_to_group = dict()
for group_id, codes in color_groups.items():
    for code in codes:
        code_to_group[code] = group_id

In [37]:
with open("../Final_Streamlit/data/hexcode_to_colorgroup.pickle", "wb") as f:
    pickle.dump(code_to_group, f)

In [17]:
groups = []
for _, c1, c2, c3, c4 in colors.itertuples():
    if isinstance(c1, str) and isinstance(c2, str) and isinstance(c3, str) and isinstance(c4, str):
        g1 = int(code_to_group[c1])
        g2 = int(code_to_group[c2])
        g3 = int(code_to_group[c3])
        g4 = int(code_to_group[c4])
        groups.append([g1, g2, g3, g4])
    else:
        groups.append([-1, -1, -1, -1])
    

In [18]:
color_group_df = pd.DataFrame(groups)
color_group_df.columns = ['color_group1', 'color_group2', 'color_group3', 'color_group4']

In [19]:
color_group_df

Unnamed: 0,color_group1,color_group2,color_group3,color_group4
0,965,325,549,739
1,649,754,339,31
2,164,430,53,281
3,965,118,199,208
4,749,646,445,317
...,...,...,...,...
2028,151,730,107,58
2029,833,441,178,537
2030,413,276,643,391
2031,317,42,465,518


In [20]:
color_df

Unnamed: 0,id,color1,color2,color3,color4
0,photography560,#FFFFFF,#6E6F69,#DAD4BE,#9A968A
1,mixed-media1297,#151515,#9BC53F,#E5E5E3,#C74138
2,photography574,#F8F8F8,#949399,#D1D1D1,#68696E
3,prints1038,#FFFFFF,#C5963E,#E6E5A5,#E4B4B0
4,works-on-paper1080,#597739,#4F6942,#595731,#3B533D
...,...,...,...,...,...
2028,photography1214,#E0F3FA,#181913,#38424E,#575852
2029,painting534,#FAFAFA,#252324,#CEB000,#FFEA00
2030,prints1021,#EBECE6,#F5AC8C,#F05B33,#900A25
2031,works-on-paper977,#F6E6C2,#EBEFF0,#777753,#F9DF92


In [21]:
code_to_group['#FFFFFF']

965

In [22]:
color_df = pd.concat([color_df, color_group_df], axis=1)

In [23]:
color_df.columns

Index(['id', 'color1', 'color2', 'color3', 'color4', 'color_group1',
       'color_group2', 'color_group3', 'color_group4'],
      dtype='object')

# Merge with full dataset 

In [25]:
df = df[['id', 'media', 'url', 'artist_name', 'artist_url', 'work_name',
       'work_year', 'media_long', 'dimension', 'gallery_name', 'gallery_url',
       'bid', 'price', 'img_url', 'full_bio', 'artist_website', 'artist_ins',
       'nationality', 'education', 'location']]

In [26]:
df = df.merge(color_df, how="left", on = "id", suffixes=('_y', ''))
df.drop(df.filter(regex='_y$').columns, axis=1, inplace=True)

In [27]:
df.columns

Index(['id', 'media', 'url', 'artist_name', 'artist_url', 'work_name',
       'work_year', 'media_long', 'dimension', 'gallery_name', 'gallery_url',
       'bid', 'price', 'img_url', 'full_bio', 'artist_website', 'artist_ins',
       'nationality', 'education', 'location', 'color1', 'color2', 'color3',
       'color4', 'color_group1', 'color_group2', 'color_group3',
       'color_group4'],
      dtype='object')

In [30]:
df.head()

Unnamed: 0,id,media,url,artist_name,artist_url,work_name,work_year,media_long,dimension,gallery_name,...,education,location,color1,color2,color3,color4,color_group1,color_group2,color_group3,color_group4
0,painting15,painting,https://www.artsy.net/artwork/hebru-brantley-d...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,"Death Of The Black Fairy, The Great Debate Pt. 5",2020,Acrylic & Diamond Dust on Canvas,60 × 60 in | 152.4 × 152.4 cm,Corridor Contemporary,...,Clark Atlanta University,Chicago,#E7DFCC,#A69784,#403931,#877C76,682.0,850.0,37.0,772.0
1,prints369,prints,https://www.artsy.net/artwork/hebru-brantley-f...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Flower N Word,2022,"22-color Hand-Pulled Screen Print, Saunders Wa...",40 × 30 in | 101.6 × 76.2 cm,Pinto Gallery,...,Clark Atlanta University,Chicago,#FFFFFF,#FCE44C,#65C94D,#2E3432,965.0,50.0,579.0,858.0
2,prints826,prints,https://www.artsy.net/artwork/hebru-brantley-m...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Monoprint,2017,Hand-Pulled Screen Print on Mohawk Superfine U...,24 × 18 in | 61 × 45.7 cm,MODCLAIR,...,Clark Atlanta University,Chicago,#1F201B,#F4EDDB,#B1865C,#CBBBAB,128.0,570.0,656.0,580.0
3,photography133,photography,https://www.artsy.net/artwork/hebru-brantley-n...,Hebru Brantley,https://www.artsy.net/artist/hebru-brantley,Night Flight,2021,Digital print with screen printed glow in the ...,29 × 36 in | 73.7 × 91.4 cm,Heritage: Urban Art,...,Clark Atlanta University,Chicago,#161E21,#02855D,#1A5747,#6FBA9B,583.0,452.0,21.0,777.0
4,painting37,painting,https://www.artsy.net/artwork/miaz-brothers-ma...,Miaz Brothers,https://www.artsy.net/artist/miaz-brothers,Maestro,2022,Acrylic on canvas,40 1/5 × 29 9/10 in | 102 × 76 cm,Maddox Gallery,...,European Institute of Design,Cyan,#181A15,#2F342E,#43492F,#B8A36C,942.0,350.0,970.0,759.0


In [33]:
df.to_csv("../FinalDataSetCreation/FinalDataset_with_all.csv")

In [34]:
len(df)

2479