# Simple Dataset EDA

## Imports

In [31]:
import pandas as pd
import ast  # For parsing the RGB tuples from string to Python tuple
import matplotlib.colors as mcolors
from matplotlib.table import Table

## Config and Helpers

In [32]:
pd.set_option('display.max_rows', None)

In [33]:
def rgb_string_to_tuple(rgb_string):
    return ast.literal_eval(rgb_string)

In [34]:
def color_cells(val):
    if isinstance(val, str):
        rgb_tuple = rgb_string_to_tuple(val)
        # Convert RGB values to the 0-1 range
        rgb_normalized = [x / 255 for x in rgb_tuple]
        return f"background-color: {mcolors.to_hex(rgb_normalized)}"
    return ""

## Load Data

In [35]:
df = pd.read_csv('data_cleaned.csv')
df.shape

(265, 3)

In [36]:
df.head()

Unnamed: 0,color,tessract_text,easyocr_text
0,"(173, 94, 89)","Twig, Sunset Rose",Twig Sunset Rose
1,"(236, 94, 58)",Neon Orange,Neon Orange
2,"(210, 87, 141)",Breathing Fire,Breathing Fire
3,"(130, 26, 27)",Love Weapon (Ltd. Ed.),Love Weapon (Ltd. Ed.)
4,"(176, 98, 96)",Cosmo,Cosmo


In [37]:
styled_df = df.style.applymap(color_cells, subset=["color"])

In [38]:
styled_df

Unnamed: 0,color,tessract_text,easyocr_text
0,"(173, 94, 89)","Twig, Sunset Rose",Twig Sunset Rose
1,"(236, 94, 58)",Neon Orange,Neon Orange
2,"(210, 87, 141)",Breathing Fire,Breathing Fire
3,"(130, 26, 27)",Love Weapon (Ltd. Ed.),Love Weapon (Ltd. Ed.)
4,"(176, 98, 96)",Cosmo,Cosmo
5,"(228, 88, 53)",Morange,Morange
6,"(118, 199, 242)",,amnol
7,"(183, 70, 116)",New York Apple,New York Apple
8,"(72, 28, 25)",BCrowned (Ltd. Ed.),Crowned (Ltd Ed)
9,"(185, 80, 76)",Runway Hit,Runway Hit


## Generate RGB_ORIGINAL_COLORS for Similarity Lookup

In [39]:
# Use the apply function to convert RGB strings to tuples
color_tuples = df["color"].apply(ast.literal_eval).tolist()

# Convert the list of tuples to an array
color_array = [(r, g, b) for r, g, b in color_tuples]
print(color_array)

[(173, 94, 89), (236, 94, 58), (210, 87, 141), (130, 26, 27), (176, 98, 96), (228, 88, 53), (118, 199, 242), (183, 70, 116), (72, 28, 25), (185, 80, 76), (213, 89, 65), (52, 97, 180), (194, 60, 110), (176, 107, 91), (203, 83, 67), (39, 29, 102), (222, 59, 124), (188, 116, 102), (241, 152, 158), (200, 90, 73), (36, 22, 57), (207, 49, 107), (184, 113, 91), (240, 149, 154), (212, 45, 36), (207, 68, 49), (15, 15, 39), (103, 28, 59), (159, 95, 85), (229, 132, 141), (197, 58, 53), (179, 85, 60), (61, 30, 45), (190, 144, 121), (215, 124, 133), (196, 53, 45), (167, 154, 174), (154, 125, 111), (208, 113, 119), (180, 46, 37), (144, 113, 154), (111, 44, 62), (139, 106, 89), (204, 104, 116), (175, 52, 45), (219, 162, 145), (133, 106, 137), (95, 31, 48), (23, 59, 71), (199, 94, 108), (151, 43, 40), (202, 151, 132), (158, 118, 181), (85, 41, 58), (75, 40, 62), (149, 30, 24), (195, 132, 117), (114, 62, 147), (60, 20, 29), (225, 71, 167), (218, 158, 158), (134, 39, 35), (195, 127, 116), (75, 38, 90), 

In [40]:
names = df["easyocr_text"].to_list()
print(names)

['Twig Sunset Rose', 'Neon Orange', 'Breathing Fire', 'Love Weapon (Ltd. Ed.)', 'Cosmo', 'Morange', 'amnol', 'New York Apple', 'Crowned (Ltd Ed)', 'Runway Hit', 'Flamingo', 'Designer Blue', 'Lickable', 'Quaitzette', 'Prettv Boy', 'Matte Royal', 'Full Fuchsia', 'Back In Voque', 'Vledium Rare', 'Smoked Almond', 'Blue Beat', 'Pink Pigeon', 'Burnt Spice', 'Pure Happine', 'Red Brick Mulling Spices', 'So Chaud', 'Nico Kiss', 'Beetroot', 'Topped With Brandy', 'Little Buddha', 'Lady Danger', 'Toast And Butter', 'Sumac', 'Cafe Au Chic', 'Singer Ruse', 'Duzen Carnations Sweet Sakura', 'ghtly Charrer', 'Simply Smoked', 'Sunny Seoul', 'Dangerous', 'Pick Me Pick Mel', 'Fluid', 'EEss-Presso', 'Giddy', 'Brave Red, Cockney', 'Fleshpol', 'Galaxy Grey', 'Dark Side', 'Young Attitude', 'Fanfare', 'Dare You', 'The Entelope Please', '4Eva', 'Un And Un', 'Uniformly Fabulous', 'Fire Roasted Viva Glam', 'Blankety', 'Model Behavior', 'Apres Soiree', 'Fuchsia Flicker(Ltd Ed)', 'Politely Pink', 'Marsala', 'Viva G

## Full Context Prompt

In [41]:
# Extract the "color" and "easyocr_text" columns
color_text_df = df[["color", "easyocr_text"]]

# Initialize an empty list to store the generated strings
formatted_strings = []

# Iterate through the DataFrame rows and format the strings
for index, row in color_text_df.iterrows():
    color = row["color"]
    easyocr_text = row["easyocr_text"]
    
    # Create the formatted string
    formatted_string = f"<rgb>{color}<rgb>, <name>{easyocr_text}<name>"
    
    # Append the string to the list
    formatted_strings.append(formatted_string)

# Join the strings into a single string with newlines
result_string = '\n'.join(formatted_strings)

# Print the result
print(result_string)

<rgb>(173, 94, 89)<rgb>, <name>Twig Sunset Rose<name>
<rgb>(236, 94, 58)<rgb>, <name>Neon Orange<name>
<rgb>(210, 87, 141)<rgb>, <name>Breathing Fire<name>
<rgb>(130, 26, 27)<rgb>, <name>Love Weapon (Ltd. Ed.)<name>
<rgb>(176, 98, 96)<rgb>, <name>Cosmo<name>
<rgb>(228, 88, 53)<rgb>, <name>Morange<name>
<rgb>(118, 199, 242)<rgb>, <name>amnol<name>
<rgb>(183, 70, 116)<rgb>, <name>New York Apple<name>
<rgb>(72, 28, 25)<rgb>, <name>Crowned (Ltd Ed)<name>
<rgb>(185, 80, 76)<rgb>, <name>Runway Hit<name>
<rgb>(213, 89, 65)<rgb>, <name>Flamingo<name>
<rgb>(52, 97, 180)<rgb>, <name>Designer Blue<name>
<rgb>(194, 60, 110)<rgb>, <name>Lickable<name>
<rgb>(176, 107, 91)<rgb>, <name>Quaitzette<name>
<rgb>(203, 83, 67)<rgb>, <name>Prettv Boy<name>
<rgb>(39, 29, 102)<rgb>, <name>Matte Royal<name>
<rgb>(222, 59, 124)<rgb>, <name>Full Fuchsia<name>
<rgb>(188, 116, 102)<rgb>, <name>Back In Voque<name>
<rgb>(241, 152, 158)<rgb>, <name>Vledium Rare<name>
<rgb>(200, 90, 73)<rgb>, <name>Smoked Almond<name>
