In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg

from PIL import Image
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

#### Next, let’s import extcolors and rgb2hex libraries.

In [2]:
import cv2
import extcolors

from colormap import rgb2hex

#### Color extraction

* tolerance: group colors to limit the output and give a better visual representation. Based on a scale from 0 to 100. Where 0 won’t group any color and 100 will group all colors into one.
* limit: upper limit to the number of extracted colors presented in the output.

* We will first try to use tolerance=20 and limit the number of color codes output to 5 colors (limit=6).

#### Try the above approach on the first image on out data folder

In [15]:
colors_x = extcolors.extract_from_path("../data/Challenge_Data/Assets/002dbbd85ef3fe6a2e7d0754fb9f9a1a/_preview.png", tolerance = 20, limit = 5)
colors_x

([((255, 255, 255), 26783),
  ((137, 134, 134), 18275),
  ((198, 196, 196), 2551),
  ((237, 231, 196), 174)],
 300000)

#### Covert colors to pandas.Dataframe

In [3]:
import sys,os
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from feature_extraction_pipeline import *

In [17]:
df_color = color_to_df(colors_x)
df_color

Unnamed: 0,c_code,occurence
0,#FFFFFF,26783
1,#898686,18275
2,#C6C4C4,2551


In [18]:
colors_x = extcolors.extract_from_path("../data/Challenge_Data/Assets/002dbbd85ef3fe6a2e7d0754fb9f9a1a/_preview.png", tolerance = 12, limit = 12)
colors_x

([((255, 255, 255), 22135),
  ((137, 134, 134), 13528),
  ((212, 211, 211), 6954),
  ((174, 172, 172), 1920),
  ((101, 97, 97), 1167),
  ((250, 245, 208), 1075),
  ((187, 181, 154), 965),
  ((155, 149, 127), 39)],
 300000)

In [19]:
df_color = color_to_df(colors_x)
df_color

Unnamed: 0,c_code,occurence
0,#FFFFFF,22135
1,#898686,13528
2,#D4D3D3,6954
3,#AEACAC,1920
4,#656161,1167
5,#FAF5D0,1075
6,#BBB59A,965


* Now we observed and compared between the above two options, we will go with the tolerance=12 and limit the number of color codes output to 10 colors (limit=12).

#### Loop through images on each directory and create a feature extraction csv file for each asset

In [4]:
rootdir = '../data/Challenge_Data/Assets/'
max_dir=10
dir_count=0

max_loop=1
loop=0
p = []
for subdir, dirs, files in os.walk(rootdir):
    # print(subdir)
    # print(dirs)
    # print(files)
    loop+=1
    # p.append(len(dirs))
    if loop > max_loop:
        break # In order just to complete only one walkthrough
    for dir in dirs:
        dir_count+=1
        if dir_count > max_dir:
            break
        dir_path = os.path.join(subdir, dir)
        color_dict = {}
        for _,d, contents in os.walk(dir_path):
            print(len(contents))
            # p.append(contents)
            for content in contents:
                # Loop through all the contents 
                # For our case (to get the dominant colors we will just loop through the images files (.png files))
                if (str(content))[-3:] == "png":
                    # p.append(content)
                    colors_x = extcolors.extract_from_path(f"{dir_path}/{content}", tolerance = 12, limit = 12)
                    df_color = color_to_df(colors_x)
                    color_dict[content] = {
                        'colors':[],
                        'values':[]
                    }
                    # print(df_color.shape)
                    for i,row in df_color.iterrows():
                        if i >=5:
                            break
                        color_dict[content]['colors'].append(row['c_code'])
                        color_dict[content]['values'].append(row['occurence'])
                    # print(colors_x)
                    # break
                # print(content)
            break
        # print(color_dict)
        # print(os.path.join(subdir, dir))
        final_dict = return_sorted_dominant_colors(color_dict) #We grab the top 5 dominant color for that particular creative ad.
        print(final_dict)
        df = convert_color_dict_to_pandas_df(dir,final_dict) 
        """
        for game_id = ed3071a667a11cc56e88ae0489bfe6aa
        {
            '#010101': '502997',
            '#5D0600': '85037',
            '#5D0200': '73722', 
            '#E42000': '66394', 
            '#FE5700': '58892'
        } -->> Top 5 dominant colors
        
        {
            'color_1': '#010101', 
            'color_1_occurance': '502997', 
            'color_2': '#5D0600', 
            'color_2_occurance': '85037', 
            'color_3': '#5D0200', 
            'color_3_occurance': '73722', 
            'color_4': '#E42000', 
            'color_4_occurance': '66394', 
            'color_5': '#FE5700', 'color_5_occurance': '58892', 
            'game_id': 'ed3071a667a11cc56e88ae0489bfe6aa'
        }-->> Top 5 dominant colors to be converted to pandas dataframe

         We the convert the the top 5 dominant colors in to pandas dataframe for each game_id
         *****FINAL DATAFRAME CONTENT******
         ,color_1,color_1_occurance,color_2,color_2_occurance,color_3,color_3_occurance,color_4,color_4_occurance,color_5,color_5_occurance,game_id
        0,#010101,502997,#5D0600,85037,#5D0200,73722,#E42000,66394,#FE5700,58892,ed3071a667a11cc56e88ae0489bfe6aa
        """
        df.to_csv(f"{dir_path}/dominant_color.csv")
        print(df.shape)
        break
print(loop)
print(p)

29
{'#010101': '502997', '#5D0600': '85037', '#5D0200': '73722', '#E42000': '66394', '#FE5700': '58892'}
(1, 11)
2
[]
