In [43]:
from statistics import mean
from PIL import Image
import sys
import os
import logging


logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

# mute all logging
logging.disable(logging.CRITICAL)

class ColorCube(object):
    def __init__(self, colors):
        self.colors = colors or []
        self.red = [r[0] for r in colors]
        self.green = [g[1] for g in colors]
        self.blue = [b[2] for b in colors]
        self.size = (max(self.red) - min(self.red),
                     max(self.green) - min(self.green),
                     max(self.blue) - min(self.blue))
        self.max_range = max(self.size)
        self.max_channel = self.size.index(self.max_range)

    def average(self):
        logging.info('Averaging cube with {} colors'.format(len(self.colors)))
        r = int(mean(self.red))
        g = int(mean(self.green))
        b = int(mean(self.blue))
        return r, g, b

    def split(self):
        middle = len(self.colors) // 2
        colors = sorted(self.colors, key=lambda c: c[self.max_channel])
        return ColorCube(colors[:middle]), ColorCube(colors[middle:])

    def __lt__(self, other):
        return self.max_range < other.max_range


def median_cut(img, num_colors, unique=False):
    # If unique is true then multiple instances of a single RGB value will only be counted once
    # and the rest discarded. This is MUCH faster and creates a more diverse pallete, but is not
    # a "true" median cut.
    # For example if an image had 99 blue pixels (0,0,255) and a single red pixel (255,0,0) the
    # respective median cuts would be
    # unique = False: [(0,0,255),(5,0,249)]
    # unique = True:  [(0,0,255),(255,0,0)]
    # False would produce 2 almost identical shades of blue, True would result in pure blue/red
    colors = []
    logging.info('Creating list of colors')
    for color_count, color in img.getcolors(img.width * img.height):
        if unique:
            colors += [color]
        else:
            colors += [color] * color_count
    logging.info('Created list of {} colors'.format(len(colors)))
    logging.info('Creating ColorCube')
    cubes = [ColorCube(colors)]
    logging.info('ColorCube created')

    while len(cubes) < num_colors:
        logging.info('Performing split {}/{}'.format(len(cubes), num_colors - 1))
        cubes.sort()
        cubes += cubes.pop().split()

    return [c.average() for c in cubes]


def show_median_cut(cuts):
    # Create background for the palette
    palette = Image.new('RGB', (100 * len(cuts), 100))

    # Create a square of each color and insert each, side-by-side, into the palette
    for i in range(len(cuts)):
        color = Image.new('RGB', (100, 100), cuts[i])
        palette.paste(color, (100 * i, 0))

    palette.show()


def merge_palette(img, palette):
    # Do not convert color_width to int.  When placing colors side by side on an image
    # that doesn't have a width divisible by the number of palette colors there will be
    # extra columns of background color.  This is avoided by adding width over and over
    # and converting to int when specifying the X coordinate.  Causing some colors to be
    # 1px wider than others
    color_width = img.width / len(palette)
    color_height = int(max(100, color_width))
    color_size = (int(color_width), color_height)
    color_x = 0
    color_y = img.height

    # Create a new image to paste the original in and all the colors of the palette
    merged = Image.new('RGB', (img.width, img.height + color_height))
    # Add in the original image
    merged.paste(img)
    # Create a square of each color and insert each, side-by-side, into the palette
    for color in palette:
        color = Image.new('RGB', color_size, color)
        merged.paste(color, (int(color_x), color_y))
        color_x += color_width

    return merged


In [44]:
from PIL import Image
import os
import pandas as pd

cut_size = [2, 4, 8]

def get_cut(img_path):
    img = Image.open(img_path)
    # resize to 256x256
    img = img.resize((256, 256))
    result = []
    for n in cut_size:
        result.append(median_cut(img, n))
    return result

# for each png in ./painting, call `median_cut(image)`
# save all to "painting_cut.csv" format: ObjectID, cut_2, cut_4, cut_8

def get_cut_all(folder_path, csv_name):
    painting_ids = os.listdir(folder_path)
    print(f"=={len(painting_ids)}==")

    all_cuts = []

    i = 0
    for id in painting_ids:
        try:
            img_path = os.path.join(folder_path, id)
            cuts = get_cut(img_path)
            all_cuts.append([id] + cuts)
            i+=1
            if i%50 == 0:
                print(i)
        except:
            print(id)
            continue
        
    df = pd.DataFrame(all_cuts, columns=['ObjectID', 'cut_2', 'cut_4', 'cut_8'])
    df.to_csv(f'{csv_name}.csv', index=False)



In [45]:
WIKIART_PATH = "/Volumes/T7/wikiart/wikiart"

# list all folders in WIKIART_PATH
folders = os.listdir(WIKIART_PATH)
# filter out non-folder
folders = list(filter(lambda x: os.path.isdir(os.path.join(WIKIART_PATH, x)), folders))


In [46]:
folders

['New_Realism',
 'Art_Nouveau_Modern',
 'Pointillism',
 'Northern_Renaissance',
 'Pop_Art',
 'Cubism',
 'Minimalism',
 'Symbolism',
 'Synthetic_Cubism',
 'Fauvism',
 'Impressionism',
 'Romanticism',
 'Ukiyo_e',
 'Action_painting',
 'Naive_Art_Primitivism',
 'Expressionism',
 'Abstract_Expressionism',
 'Early_Renaissance',
 'Rococo',
 'Post_Impressionism',
 'Analytical_Cubism',
 'Realism',
 'Mannerism_Late_Renaissance',
 'Baroque',
 'Contemporary_Realism',
 'High_Renaissance',
 'Color_Field_Painting']

In [47]:
for f in folders:
    folder_path = os.path.join(WIKIART_PATH, f)
    csv_name = f + "_cut"
    get_cut_all(folder_path, csv_name)

==1228==
50
100
classes 2.csv
150
200
250
300
350
400
450
500
550
600
classes.csv
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
==4987==
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400
2450
classes.csv
2500
2550
2600
2650
2700
2750
2800
2850
2900
2950
3000
3050
3100
3150
3200
3250
3300
3350
3400
3450
3500
3550
3600
3650
3700
3750
3800
3850
3900
3950
4000
4050
4100
4150
4200
4250
4300
4350
4400
4450
4500
4550
4600
4650
4700
4750
4800
4850
4900
4950
==1205==
50
100
150
200
250
300
350
400
450
500
550
classes.csv
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
==3248==
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
classes.csv
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400
2450
25