In this notebook, I am going to extract the domain colors from each image, put them into a dataframe for modeling later.

In [1]:
import cv2
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt
import os
from glob import glob
import colorsys

In [2]:
# make a function to get the top 4 color for each pic into a dataframe 

# take in 2 parameters, folder path and number of colors 
def color_df(folder_path,num_colors):
    
    # create an empty dataframe
    rgba_df=pd.DataFrame()
    # get all the picture path inside of the folder
    filenames = glob('{}/*'.format(folder_path))
    
    for filename in filenames:
        # read each image
        image = Image.open(filename)
        # Returns a converted copy of this image, map to a palette
        result = image.convert('P',palette=Image.ADAPTIVE,colors=num_colors)
        # convert the image to RGBA
        result = result.convert('RGBA')
        # get colors
        main_colors = result.getcolors()
        # delete the backround color, when the transparence is less than 100
        main_colors = [(count,(r,g,b,a)) for (count,(r,g,b,a)) in main_colors if a > 100]
        
        # take out count
        color_list=[]
        for i in range(0,len(main_colors)):
            color_list.append(main_colors[i][1])
         
        # make a df with rgba only
        df = pd.DataFrame(color_list,columns=['r','g','b','a'])
        
        # insert brand, year, season, num for each color
        df.insert(0,'brand',os.path.basename(filename).split('_')[0])
        df.insert(1,'year',os.path.basename(filename).split('_')[1])
        df.insert(2,'season',os.path.basename(filename).split('_')[2])
        df.insert(3,'num',os.path.basename(filename).split('_')[3])
        df['num']=df['num'].str.extract('(\d+)').astype(int)

        # concat with originial df    
        rgba_df=pd.concat([rgba_df,df])
     
        # from previous step, all the indexs are 1s, reset the index
        rgba_df.reset_index(drop=True,inplace=True)
        
    return rgba_df

In [3]:
folder_path='./selected_brands_2021_yolo_crop_rembg_resize/'
num_colors=5
df = color_df(folder_path,num_colors)

In [4]:
df.tail()

Unnamed: 0,brand,year,season,num,r,g,b,a
5972,isabel-marant,2021,spring,18,234,142,179,253
5973,chanel,2021,spring,8,206,171,151,253
5974,chanel,2021,spring,8,156,154,153,252
5975,chanel,2021,spring,8,90,90,92,253
5976,chanel,2021,spring,8,31,30,32,253


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5977 entries, 0 to 5976
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   brand   5977 non-null   object
 1   year    5977 non-null   object
 2   season  5977 non-null   object
 3   num     5977 non-null   int64 
 4   r       5977 non-null   int64 
 5   g       5977 non-null   int64 
 6   b       5977 non-null   int64 
 7   a       5977 non-null   int64 
dtypes: int64(5), object(3)
memory usage: 373.7+ KB


In [6]:
#df.to_csv('colors_some_brands_rgba_2021.csv',index=False)