# Color Distribution Visualization

This notebook analyses the chromatic composition of individual commercials in the dataset.
It generates interactive **Sunburst** and **Marimekko** charts that highlight how colors are distributed across scenes and mapped to hierarchical palettes (Basic, Essential, Extended).


In [None]:
import altair as alt
import ast
import os
import pandas as pd
import plotly.express as px

In [None]:
# %run utils/altair_coal_theme.py
alt.data_transformers.enable('default', max_rows=None)  # can read more than 5000 rows

## Reference palettes

Before visualization, the data are normalized and joined with palette definitions.
This step ensures consistent color naming across Basic, Essential, and Extended palettes.


In [None]:
# Create _export/colors folder
os.makedirs('_export/colors', exist_ok=True)

In [None]:
commercials_df: pd.DataFrame = pd.read_csv('../general/commercials.csv')
reference_palette_hierarchy_df = pd.read_csv('../colors/reference_palette_hierarchy.csv')
# reference_palette_hierarchy_df

In [None]:
# Sort for spectrum colors
reference_palette_hierarchy_df['ext_l'] = reference_palette_hierarchy_df['ext_oklch_coords'].apply(
    lambda x: ast.literal_eval(x)[0])
reference_palette_hierarchy_df['ext_h'] = reference_palette_hierarchy_df['ext_oklch_coords'].apply(
    lambda x: ast.literal_eval(x)[2])
reference_palette_hierarchy_df

In [None]:
reference_palette_hierarchy_df.sort_values(by=['ext_l'], ascending=[True], inplace=True)
spectrum_df = reference_palette_hierarchy_df.loc[
    ~reference_palette_hierarchy_df['bas_color_name'].isin(['white', 'black', 'grey'])].reset_index(
    drop=True)
greys_df = reference_palette_hierarchy_df.loc[
    reference_palette_hierarchy_df['bas_color_name'].isin(['white', 'black', 'grey'])].reset_index(drop=True)
# Define your order
spectrum_df['h_mean'] = spectrum_df.groupby(['bas_color_name'])['ext_h'].transform('mean')
spectrum_df['count'] = spectrum_df.groupby(['bas_color_name'])['bas_color_name'].transform('count')
# Scale order
spectrum_df['order'] = 1 / spectrum_df['count'] * (10000 + (1 - spectrum_df['h_mean'])) / 10000
#
greys_df['l_mean'] = greys_df.groupby(['bas_color_name'])['ext_l'].transform('mean')
greys_df['count'] = greys_df.groupby(['bas_color_name'])['bas_color_name'].transform('count')
# Scale order
greys_df['order'] = 1 / greys_df['count'] * (10000 + greys_df['l_mean']) / 10000

# spectrum_df

In [None]:
# greys_df

In [None]:
for index, df in enumerate([spectrum_df, greys_df, pd.concat([spectrum_df, greys_df])]):
    # Create colour map
    colorMapSubset = dict(zip(df['ext_color_name'], df['ext_hex_code']))
    # print(colorMapSubset)
    fig = px.sunburst(
        data_frame=df,
        path=['bas_color_name', 'ess_color_name', 'ext_color_name', ],
        values='order',
        # color='closest_essential_hex_color_code',
        # color_discrete_map=colorMapSubset,
        width=600,
        height=600,
        hover_name='ext_hex_code',
    )
    fig.update_traces(
        marker_line_width=0.0,
        marker_line_color=None,
        leaf=dict(opacity=1),
        marker_colors=[colorMapSubset[cat] for cat in fig.data[-1].labels],
        sort=True,
        branchvalues='total',
        count='branches',
        labels=[x if '_' not in x else '' for x in fig.data[-1]['labels']],
        insidetextorientation='radial',
    )
    fig.write_image(f'_export/colors/color-wheel-{index}-start.pdf')
    fig.show()

## Commercial palettes

In [None]:
# Load all commercial palettes
commercial_palettes_df: pd.DataFrame = pd.read_csv('../colors/commercial_palettes.csv')
# commercial_palettes_df

**Select a commercial**

In [None]:
sample_commercial_id:str = 'gSnVME7YCVQ'

### Sunburst charts

The Sunburst chart represents the hierarchical structure of color palettes for the selected spot.
Inner rings correspond to higher-level categories (Basic palette), while outer rings expand into Essential and Extended tones.


In [None]:
# Sunburst for commercial with id 'gSnVME7YCVQ'
filtered_commercials_df: pd.DataFrame = commercials_df[commercials_df['commercial_id'] == sample_commercial_id]
filtered_commercials_df

In [None]:
filtered_commercials_palettes_df: pd.DataFrame = commercial_palettes_df[
    commercial_palettes_df['commercial_id'].isin(filtered_commercials_df['commercial_id'])]
filtered_commercials_palettes_df = filtered_commercials_palettes_df.merge(filtered_commercials_df, on='commercial_id')
filtered_commercials_palettes_df[filtered_commercials_palettes_df['commercial_id'] == sample_commercial_id]

In [None]:
# Keep only some columns
reduced_filtered_commercials_palettes_df = filtered_commercials_palettes_df[[
    'commercial_id',
    'closest_color_ext_pal',
    'closest_color_ess_pal',
    'closest_color_bas_pal',
    'tf',
]]
# reduced_filtered_commercials_palettes_df
reduced_filtered_commercials_palettes_df[reduced_filtered_commercials_palettes_df['commercial_id'] == sample_commercial_id]


In [None]:
# Join the commercial_palettes_df with idf of closest_color_ext_pal
ext_pal_idfs_df: pd.DataFrame = pd.read_csv(
    '../colors/extended_palette_idfs.csv',
    usecols=['closest_color_ext_pal', 'idf']
)
ess_pal_idfs_df: pd.DataFrame = pd.read_csv(
    '../colors/essential_palette_idfs.csv',
    usecols=['closest_color_ess_pal', 'idf']
)
bas_pal_idfs_df: pd.DataFrame = pd.read_csv(
    '../colors/basic_palette_idfs.csv',
    usecols=['closest_color_bas_pal', 'idf']
)

In [None]:
# Extended
enriched_ext_df: pd.DataFrame = reduced_filtered_commercials_palettes_df.join(
    ext_pal_idfs_df.set_index('closest_color_ext_pal'),
    on='closest_color_ext_pal')
enriched_ext_df = enriched_ext_df.merge(
    reference_palette_hierarchy_df, left_on='closest_color_ext_pal', right_on='ext_color_name',
    how='left')

enriched_ext_df['ext_tf_idf'] = enriched_ext_df['tf'] * enriched_ext_df['idf']
# Keep only 1 extended color
enriched_ext_df.drop_duplicates(subset=['commercial_id', 'closest_color_ext_pal'], inplace=True)
enriched_ext_df
enriched_ext_df[enriched_ext_df['commercial_id'] == sample_commercial_id]

In [None]:
# Essential
enriched_ess_df = reduced_filtered_commercials_palettes_df.join(ess_pal_idfs_df.set_index('closest_color_ess_pal'),
                                                                on='closest_color_ess_pal')
enriched_ess_df = enriched_ess_df.merge(
    reference_palette_hierarchy_df, left_on='closest_color_ess_pal', right_on='ess_color_name',
    how='left').drop('ess_color_name', axis=1)
enriched_ess_df['ess_tf_idf'] = enriched_ess_df['tf'] * enriched_ess_df['idf']
# Keep only 1 extended color
enriched_ess_df.drop_duplicates(subset=['commercial_id', 'closest_color_ext_pal', 'closest_color_ess_pal'],
                                inplace=True)
enriched_ess_df

In [None]:
# Basic
enriched_bas_df = reduced_filtered_commercials_palettes_df.join(
    bas_pal_idfs_df.set_index('closest_color_bas_pal'),
    on='closest_color_bas_pal')
enriched_bas_df

In [None]:
enriched_bas_df = enriched_bas_df.merge(
    reference_palette_hierarchy_df, left_on='closest_color_bas_pal', right_on='bas_color_name',
    how='left').drop('bas_color_name', axis=1)
enriched_bas_df['bas_tf_idf'] = enriched_bas_df['tf'] * enriched_bas_df['idf']
# Keep only 1 extended color
enriched_bas_df.drop_duplicates(
    subset=['commercial_id', 'closest_color_ext_pal', 'closest_color_ess_pal', 'closest_color_bas_pal'],
    inplace=True)
enriched_bas_df

Generate sunburst for the selected sample commercial

In [None]:
# Create colour map from reference palettes
ref_color_map_subset: dict[str, str] = dict(
    zip(reference_palette_hierarchy_df['ext_color_name'], reference_palette_hierarchy_df['ext_hex_code']))
ref_color_map_subset

In [None]:
reference_palettes_tf_idfs = {
    'without_tf_idf': {'df': enriched_ext_df, 'values': 'tf'},
    'extended_tf_idf': {'df': enriched_ext_df, 'values': 'ext_tf_idf'},
    'essential_tf_idf': {'df': enriched_ess_df, 'values': 'ess_tf_idf'},
    'basic_tf_idf': {'df': enriched_bas_df, 'values': 'bas_tf_idf'},
}

In [None]:
for index, row in filtered_commercials_df.iterrows():
    commercial_id: str = row['commercial_id']

    # Create commercial_color_chart_folder
    commercial_color_chart_folder: str = f'_export/colors/{commercial_id}'
    os.makedirs(commercial_color_chart_folder, exist_ok=True)
    for key, value in reference_palettes_tf_idfs.items():
        fig = px.sunburst(
            data_frame=value['df'][value['df']['commercial_id'] == commercial_id],
            path=['closest_color_bas_pal', 'closest_color_ess_pal', 'closest_color_ext_pal'],
            values=value['values'],
            width=600,
            height=600,
            title=key,
        )
        fig.update_traces(
            marker_line_width=0.0,
            marker_line_color=None,
            leaf=dict(opacity=1),
            marker_colors=[ref_color_map_subset[cat] for cat in fig.data[-1].labels],
            sort=True,
            branchvalues='total',
            count='branches',
            labels=[''] * len(fig.data[-1]['labels']),  # hide labels
            # labels=[x if '_' not in x else '' for x in fig.data[-1]['labels']],
            hovertemplate='%{color}',
        )
        # fig.show()
        # os.makedirs('_export', exist_ok=True)
        fig.write_image(f'{commercial_color_chart_folder}/{commercial_id}.sunburst.{key}.pdf')

You will find pdfs of the exported sunbursts in the `_export/colors/<sample_commercial_id>` folder

### Marimekko diagrams

This chart shows the relative proportion of dominant colors within the same spot. The width of each bar encodes the frequency of color occurrence in a scene.


In [None]:
enriched_ext_df

In [None]:
commercial_palettes_df

In [None]:
for index, row in filtered_commercials_df.iterrows():
    commercial_id: str = row['commercial_id']
    # Create commercial_color_chart_folder
    commercial_color_chart_folder: str = f'_export/colors/{commercial_id}'
    os.makedirs(commercial_color_chart_folder, exist_ok=True)
    product_type_key = row['product_type_key']
    nice_class = row['nice_class']
    # duration_in_seconds = row['duration_in_seconds']
    source = row['source']
    ## Get the palette from commercial_id
    commercial_palette_df: pd.DataFrame = commercial_palettes_df[
        commercial_palettes_df['commercial_id'] == commercial_id]
    # commercial_palette_df
    commercial_palette_df.columns
    ### Enrich commercial palette with hex_code of closest_color_ext_pal
    enriched_commercial_palette_df = commercial_palette_df.set_index(['commercial_id', 'closest_color_ext_pal']).join(
        enriched_ext_df[['commercial_id', 'closest_color_ext_pal', 'ext_tf_idf', 'ext_oklch_coords']].set_index(
            ['commercial_id', 'closest_color_ext_pal']),
    )
    enriched_commercial_palette_df.reset_index(inplace=True)
    print(len(enriched_commercial_palette_df))
    enriched_commercial_palette_df
    enriched_commercial_palette_df['ext_l'] = enriched_commercial_palette_df['ext_oklch_coords'].apply(
        lambda x: ast.literal_eval(x)[0])
    enriched_commercial_palette_df['ext_c'] = enriched_commercial_palette_df['ext_oklch_coords'].apply(
        lambda x: ast.literal_eval(x)[1])
    enriched_commercial_palette_df['ext_h'] = enriched_commercial_palette_df['ext_oklch_coords'].apply(
        lambda x: ast.literal_eval(x)[2])
    enriched_commercial_palette_df
    ## Find new 1st and last scene

    # Count scenes
    scene_count: int = len(enriched_commercial_palette_df['scene'].unique())
    first_scene: int = enriched_commercial_palette_df['scene'].min()
    last_scene: int = enriched_commercial_palette_df['scene'].max()
    enriched_commercial_palette_df['scene'].unique()
    original_colors_marimekko = (alt.Chart(
        enriched_commercial_palette_df[
            ['scene', 'hex_code', 'frequency_within_the_scene', 'start_frame', 'end_frame',
             'ext_l',
             'ext_c',
             'ext_h',
             ]
        ]
    ).mark_rect(
        tooltip=True,
        opacity=1,
        # clip=True,
    )
    .encode(
        x=alt.X(
            'start_frame:Q',
            title='Frames',
            # title=None,
            scale=alt.Scale(
                padding=0,
                domainMin=enriched_commercial_palette_df['start_frame'].min(),
                domainMax=enriched_commercial_palette_df['end_frame'].max(),
            ),
            axis=None,
        ),
        x2=alt.X2('end_frame:Q'),
        y=alt.Y(
            'start_freq:Q',
            title=None,
            scale=alt.Scale(padding=0),
            axis=None,
        ),
        y2=alt.Y2(
            'end_freq:Q',
        ),
        color=alt.Color(
            'hex_code:N',
            scale=None,
        ),
        # tooltip=['scene', 'hex_code', 'closest_cm_color_name', 'frequency'],
        tooltip=[
            alt.Tooltip(
                'scene',
                title='Scene',
            ),
            alt.Tooltip(
                'hex_code',
                title='Hex color code',
            ),
            alt.Tooltip(
                'frequency_within_the_scene',
                title='Frequency within the scene',
            ),
        ],
    ).transform_stack(
        stack='frequency_within_the_scene',
        groupby=['scene'],
        as_=['start_freq', 'end_freq'],
    ).properties(
        width=400,
        height=400,
    ))
    original_colors_marimekko_hue = original_colors_marimekko.transform_stack(
        stack='frequency_within_the_scene',
        groupby=['scene'],
        as_=['start_freq', 'end_freq'],
        sort=[
            # l, h, c
            alt.SortField('ext_h', order='ascending'),
            alt.SortField('ext_l', order='ascending'),
            alt.SortField('ext_c', order='ascending'),
        ],
    )

    original_colors_marimekko_lightness = original_colors_marimekko.transform_stack(
        stack='frequency_within_the_scene',
        groupby=['scene'],
        as_=['start_freq', 'end_freq'],
        sort=[
            # l, h, c
            alt.SortField('ext_l', order='ascending'),
            alt.SortField('ext_h', order='ascending'),
            alt.SortField('ext_c', order='ascending'),
        ],
    )

    ## Save charts to PDF

    ### Original

    original_colors_marimekko.configure_axis(grid=False).configure_view(stroke=None).properties(width='container',
                                                                                                height='container').save(
        f'{commercial_color_chart_folder}/{commercial_id}.marimekko.original.pdf')

    ### Ordered by hue

    original_colors_marimekko_hue.configure_axis(grid=False).configure_view(stroke=None).properties(width='container',
                                                                                                    height='container').save(
        f'{commercial_color_chart_folder}/{commercial_id}.marimekko.by_hue.pdf')

    ### Ordered by lightness

    marimekko_charts_by_lightness_folder: str = f'_export/colors/{commercial_id}'
    os.makedirs(marimekko_charts_by_lightness_folder, exist_ok=True)
    original_colors_marimekko_lightness.configure_axis(grid=False).configure_view(stroke=None).properties(
        width='container', height='container').save(
        f'{commercial_color_chart_folder}/{commercial_id}.marimekko.by_lightness.pdf')


You will find pdfs of the exported marimekko charts in the `_export/colors/<sample_commercial_id>` folder

In [None]:
# original_colors_marimekko.configure_axis(grid=False).configure_view(stroke=None)
# original_colors_marimekko

In [None]:
# original_colors_marimekko_hue

In [None]:
# original_colors_marimekko_lightness