# 'Name this Color' App's Colors Data

This notebook demostrates the steps on how to get the colors data used in the app: https://www.dkisler.de/projects/shiny/name_the_color/ 

In [1]:
# libraries and functoins
import pandas as pd
import requests as req
from bs4 import BeautifulSoup as bs

## color code converters rgb to hex and vice versa
def rgb2hex(r, g, b):
    def clamp(x):
        return max(0, min(x, 255))
    return "#{0:02x}{1:02x}{2:02x}".format(clamp(r), clamp(g), clamp(b))

def hex2rgb(hexcode):
    return list(int(hexcode.replace('#', '')[i:i+2], 16) for i in (0, 2 ,4))

## Source 1

Getting the data from http://people.csail.mit.edu/jaffer/Color/resenecolours.txt 

In [2]:
# read the data from the web
d = pd.read_csv('http://people.csail.mit.edu/jaffer/Color/resenecolours.txt', sep = '\t', skiprows=26)
# set the columns names
d.columns = ['color_name', 'r', 'g', 'b']
# delete the suffix word "Resene "
d['color_name'] = d['color_name'].apply(lambda x: x.replace('Resene ', ''))
# convert provided RGB into the HEX color code
d['hex'] = [rgb2hex(r,g,b) for r,g,b in zip(d['r'], d['g'], d['b'])]

In [3]:
d.head()

Unnamed: 0,color_name,r,g,b,hex
0,Abbey,76,79,86,#4c4f56
1,Acadia,27,20,4,#1b1404
2,Acapulco,124,176,161,#7cb0a1
3,Acorn,106,93,27,#6a5d1b
4,Aero Blue,201,255,229,#c9ffe5


## Source 2

Scrapping the colors data from Wiki: https://en.wikipedia.org/wiki/List_of_colors:_A%E2%80%93F

In [4]:
# the colors data are split alphabetically (by their name) into three pages on wiki
pages = [['A', 'F'], ['G', 'M'], ['N', 'Z']]
#
d1 = []
for iPage in pages:      
    url = 'https://en.wikipedia.org/wiki/List_of_colors:_{0}%E2%80%93{1}'.format(iPage[0], iPage[1])
    # fetch the page
    page = bs((req.get(url)).content, 'html.parser').find('table', {'class', 'wikitable sortable'})
    # extract color names and the hex
    df = pd.read_html(str(page))[0]
    df = df.iloc[1:,[0,1]].reset_index().drop('index', axis = 1)
    df.columns = ['color_name', 'hex']
    # convert hex to rgb
    rgb = pd.DataFrame.from_records([hex2rgb(x) for x in df['hex']], columns = ['r', 'g', 'b'] )
    # concat hex and rgb columns
    df = pd.concat([df, rgb], axis = 1)
    d1.append(df)
# append three data frames
d1 = pd.concat(d1, axis = 0)

In [5]:
d1.head()

Unnamed: 0,color_name,hex,r,g,b
0,Acid green,#B0BF1A,176,191,26
1,Aero,#7CB9E8,124,185,232
2,Aero blue,#C9FFE5,201,255,229
3,African violet,#B284BE,178,132,190
4,Air Force blue (RAF),#5D8AA8,93,138,168


## Concat the data from both sources

In [6]:
dat = pd.concat([d1, d], axis = 0)
dat['hex'] = dat['hex'].apply(lambda x: x.upper())
dat = dat.drop_duplicates().reset_index().drop('index', axis = 1)
dat = dat[['color_name', 'hex', 'r', 'g', 'b']]

In [7]:
dat.head()

Unnamed: 0,color_name,hex,r,g,b
0,Acid green,#B0BF1A,176,191,26
1,Aero,#7CB9E8,124,185,232
2,Aero blue,#C9FFE5,201,255,229
3,African violet,#B284BE,178,132,190
4,Air Force blue (RAF),#5D8AA8,93,138,168


## Save the data

In [8]:
dat.to_csv('colors.csv', index = False)

## Define the colors with more than one name label

In [9]:
df_hex = dat.groupby('hex').agg('count').sort_values(by='color_name', ascending=False)

df_color_sum = df_hex.groupby('color_name').agg('count')
df_color_sum['numb_of_names'] = df_color_sum.index

df_color_sum = df_color_sum[['numb_of_names', 'b']]
df_color_sum.columns = [['numb_of_names', 'numb_of_cols']]

In [10]:
df_color_sum

Unnamed: 0_level_0,numb_of_names,numb_of_cols
color_name,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,2494
2,2,207
3,3,31
4,4,3
5,5,2
