In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

# Identical to Adam's answer
import plotly.colors
from PIL import ImageColor

pd.set_option('display.max_columns', None)


In [20]:
def get_unit_price_comparison_data(df, sorting_value='ratio_mini_lt_full'):
    '''
    Preprocessing required to compare mini and standard size products with one another 
    Args:
    Returns:
    '''
    # for each product, compare all mini size to standard using cross join
    df_compare = df[df['swatch_group']=='mini size'].merge(
        df[df['swatch_group']=='standard size'],
        on=['product_id','product_name','brand_name'],
        suffixes=('_mini','_standard')
    )
    # only calculate ratio in one direction 
    df_compare = df_compare[df_compare['amount_adj_mini']<df_compare['amount_adj_standard']]
    # if ratio < 1, mini is better value per oz, if ratio > 1, standard is better value
    df_compare['mini_to_standard_ratio'] = df_compare['unit_price_mini'] / df_compare['unit_price_standard']
    df_compare = df_compare.reset_index().rename(columns={'index':'prod_rank'})

    # df_compare = sort_product_comparison_data(df_compare, sorting_value)

    df_compare = df_compare.melt(['product_id','brand_name','product_name',
                                'prod_rank','amount_adj_mini', 'amount_adj_standard',
                                'mini_to_standard_ratio'])
    df_compare = df_compare[df_compare['variable'].isin(['unit_price_mini','unit_price_standard'])]
    df_compare = df_compare.merge(df, 
                    on=['product_id','brand_name','product_name'],
                    how='left')
    df_compare['pretty_ratio'] = df_compare['mini_to_standard_ratio'].round(2).astype(str)
    df_compare['display_name'] = df_compare['brand_name']+",<br>"+df_compare['lvl_2_cat']+" ("+df_compare['pretty_ratio']+")"
    return df_compare

In [21]:
df = pd.read_csv('../data/agg_prod_data.csv')

df = get_unit_price_comparison_data(df)

In [25]:
def get_color(colorscale_name, loc):
    from _plotly_utils.basevalidators import ColorscaleValidator
    # first parameter: Name of the property being validated
    # second parameter: a string, doesn't really matter in our use case
    cv = ColorscaleValidator("colorscale", "")
    # colorscale will be a list of lists: [[loc1, "rgb1"], [loc2, "rgb2"], ...] 
    colorscale = cv.validate_coerce(colorscale_name)
    
    if hasattr(loc, "__iter__"):
        return [get_continuous_color(colorscale, x) for x in loc]
    return get_continuous_color(colorscale, loc)
        
# This function allows you to retrieve colors from a continuous color scale
# by providing the name of the color scale, and the normalized location between 0 and 1
# Reference: https://stackoverflow.com/questions/62710057/access-color-from-plotly-color-scale

def get_continuous_color(colorscale, intermed):
    """
    Plotly continuous colorscales assign colors to the range [0, 1]. This function computes the intermediate
    color for any value in that range.

    Plotly doesn't make the colorscales directly accessible in a common format.
    Some are ready to use:
    
        colorscale = plotly.colors.PLOTLY_SCALES["Greens"]

    Others are just swatches that need to be constructed into a colorscale:

        viridis_colors, scale = plotly.colors.convert_colors_to_same_type(plotly.colors.sequential.Viridis)
        colorscale = plotly.colors.make_colorscale(viridis_colors, scale=scale)

    :param colorscale: A plotly continuous colorscale defined with RGB string colors.
    :param intermed: value in the range [0, 1]
    :return: color in rgb string format
    :rtype: str
    """
    if len(colorscale) < 1:
        raise ValueError("colorscale must have at least one color")

    hex_to_rgb = lambda c: "rgb" + str(ImageColor.getcolor(c, "RGB"))

    if intermed <= 0 or len(colorscale) == 1:
        c = colorscale[0][1]
        return c if c[0] != "#" else hex_to_rgb(c)
    if intermed >= 1:
        c = colorscale[-1][1]
        return c if c[0] != "#" else hex_to_rgb(c)

    for cutoff, color in colorscale:
        if intermed > cutoff:
            low_cutoff, low_color = cutoff, color
        else:
            high_cutoff, high_color = cutoff, color
            break

    if (low_color[0] == "#") or (high_color[0] == "#"):
        # some color scale names (such as cividis) returns:
        # [[loc1, "hex1"], [loc2, "hex2"], ...]
        low_color = hex_to_rgb(low_color)
        high_color = hex_to_rgb(high_color)

    return plotly.colors.find_intermediate_color(
        lowcolor=low_color,
        highcolor=high_color,
        intermed=((intermed - low_cutoff) / (high_cutoff - low_cutoff)),
        colortype="rgb",
    )

In [72]:
# colorbar theme - this will be in global styling 
COLOUR_SCALE='plotly3_r'


# Create subplots, left pair plot, right scatter - both sharing colour bar
import plotly.subplots as sp


def normalize_colour_value(data_point, all_values):
    """
    Normalizes data point values for use with colour bar. 
    """
    return (data_point - min(all_values)) / (max(all_values) - min(all_values))


def create_join_pair_scatter(df_product_pairs, df_base):
    """
    Left subplot is a slope/pair plot of mini-standard product pairs
    Y = unit price
    colour = mini-to-standard unit price ratio
    initially, product and brand names were shown with pairs on subplot but
    this takes up too much space - all additional info has been moved to hover

    Right subsplot is all products in scatter plot
    X = size
    Y = price
    colour = sharing same ratio colour bar scale from left subplot
    this means only mini and standard size products will have non-grey colours
     4 product size categories will be shown with scatter shape - Refill, Value, Mini, Standard
    """

    fig = sp.make_subplots(rows=1, cols=2, column_widths=[0.4, 0.6],
        subplot_titles=("default pairplot title", "default scatterplot title "))
    
    # pair plot - need to draw twice to use continuous colour scale with line plot...
    # need to plot pairs traces one-by-one
    # this plot should be limited to 10 lines max - otherwise it is too difficult to read - influenced by sorting callback 
    for i, row in df_product_pairs.iterrows():

        colour_val_normalized = normalize_colour_value(row['mini_to_standard_ratio'], df_product_pairs['mini_to_standard_ratio'])

        pair_line_trace = go.Scatter(
            x=['Mini', 'Standard'],
            y=[row['unit_price_mini'], row['unit_price_standard']],
            mode='lines',
            marker=dict(
                color=get_color(COLOUR_SCALE, colour_val_normalized)
            ),
            line=dict(
                width=5
            ),
            showlegend = False,
        )

        fig.add_trace(pair_line_trace, row=1, col=1)

        line_hover_template = '{}<br>{}<br>Size: {} oz.<br>Price: ${}<br>Category: {}<br>Mini-to-Standard Ratio: {:.2f}'

        pair_scatter_trace = go.Scatter(
            x=['Mini','Standard'], 
            y=[row['unit_price_mini'], row['unit_price_standard']], 
            mode='markers',
            marker=dict(
                size=0, 
                color="rgba(0,0,0,0)", 
                colorscale=COLOUR_SCALE, 
                showscale=False,
                cmin=min(df_compare['mini_to_standard_ratio']),
                cmax=max(df_compare['mini_to_standard_ratio']),
                colorbar=dict(
                    thickness=25,
                    title='Unit Price Ratio',
                    x=0.9,
                    xref="container",
                ),
            ),
            hovertemplate = 'Unit Price: %{y:$.2f} <br>%{text}',
            text=[line_hover_template.format(row['product_name'], row['brand_name'], row['amount_a_mini'], row['price_mini'], row['lvl_2_cat_mini'], row['mini_to_standard_ratio']),
                  line_hover_template.format(row['product_name'], row['brand_name'], row['amount_a_standard'], row['price_standard'], row['lvl_2_cat_standard'], row['mini_to_standard_ratio'])],
        )
        
        fig.add_trace(pair_scatter_trace, row=1, col=1)
    
        fig.add_vline(x=0, line_width=1, line_dash="dash", line_color="grey", row=1, col=1)
        fig.add_vline(x=1, line_width=1, line_dash="dash", line_color="grey", row=1, col=1)


        fig.update_layout(
            xaxis=dict(
                title='Product Size',
                type='category',
                tickmode='array',
                # really difficult to get categorical axis spacing right
                range=[-0.2, 2 - 0.7],
                linecolor='rgb(204, 204, 204)',
            ),
            yaxis=dict(
                title='Unit Price ($/oz.)',
                showgrid=False,
                zeroline=True,
                showline=False,
                showticklabels=True,
            ),
            showlegend=False,
            plot_bgcolor='white',
        )


    # right side - scatter plot
    marker_shapes = {'mini size':'circle', 'standard size':'square', 'refill size':'diamond', 'value size':'cross'}

    # grey markers, no mini-to-standard ratio
    df_no_ratio = df_base[df_base['mini_to_standard_ratio'].isna()]

    background_scatter = go.Scatter(
        x=df_no_ratio['amount_a'],
        y=df_no_ratio['price'],
        mode="markers",
        marker=dict(
            color=['grey' for x in range(df_no_ratio.shape[0])],
            symbol=[marker_shapes[x['swatch_group']] for i, x in df_no_ratio.iterrows()]
        ),
        opacity=0.7,
    )

    fig.add_trace(background_scatter, row=1, col=2)

    df_w_ratio = df_base[df_base['mini_to_standard_ratio'].notnull()]

    scatter_highlight = go.Scatter(
        x=df_w_ratio['amount_a'],
        y=df_w_ratio['price'],
        mode="markers",
        marker=dict(
            color=df_w_ratio['mini_to_standard_ratio'],
            colorbar=dict(
                title="Mini-to-Standard <br>Unit Price Ratio"
            ),
            colorscale=COLOUR_SCALE,
            cmin=min(df_base['mini_to_standard_ratio']),
            cmax=max(df_base['mini_to_standard_ratio']),
            symbol=[marker_shapes[x['swatch_group']] for i, x in df_w_ratio.iterrows()]
        ),
    )
    fig.add_trace(scatter_highlight, row=1, col=2)
    
    # fig.update_layout(
    #     coloraxis_colorbar=dict(
    #         title=''

    #     )
    # )#{'colorscale':COLOUR_SCALE})

    


    fig.update_xaxes(title_text="Size (oz.)", row=1, col=2)
    fig.update_yaxes(title_text="Price ($)", row=1, col=2)
    return fig




df = pd.read_csv('../data/agg_prod_data.csv')
df_compare = df[df['swatch_group']=='mini size'].merge(
    df[df['swatch_group']=='standard size'],
    on=['product_id','product_name','brand_name'],
    suffixes=('_mini','_standard'))

df_compare = df_compare[df_compare['amount_adj_mini']<df_compare['amount_adj_standard']]
# if ratio < 1, mini is better value per oz, if ratio > 1, standard is better value
df_compare['mini_to_standard_ratio'] = df_compare['unit_price_mini'] / df_compare['unit_price_standard']
df_compare = df_compare.reset_index().rename(columns={'index':'prod_rank'})

df = df.merge(
    df_compare[['product_id','product_name','brand_name','mini_to_standard_ratio']],
    on=['product_id','product_name','brand_name'],
    how='left')

df.loc[df['swatch_group'].isin(['value size','refill size']), 'mini_to_standard_ratio'] = np.nan
fig = create_join_pair_scatter(df_compare[50:70], df)


In [73]:
fig.show()

In [56]:
df_compare.head()

Unnamed: 0,prod_rank,index_mini,product_id,product_name,brand_name,swatch_group_mini,amount_a_mini,unit_a_mini,price_mini,internal_product_id_mini,rating_mini,product_reviews_mini,n_loves_mini,lvl_0_cat_mini,lvl_1_cat_mini,lvl_2_cat_mini,sku_mini,amount_b_mini,unit_b_mini,product_multiplier_mini,amount_adj_mini,unit_price_mini,prod_size_rank_mini,index_standard,swatch_group_standard,amount_a_standard,unit_a_standard,price_standard,internal_product_id_standard,rating_standard,product_reviews_standard,n_loves_standard,lvl_0_cat_standard,lvl_1_cat_standard,lvl_2_cat_standard,sku_standard,amount_b_standard,unit_b_standard,product_multiplier_standard,amount_adj_standard,unit_price_standard,prod_size_rank_standard,mini_to_standard_ratio
0,0,11,107319,Cleansing And Softening Refillable Shower Oil ...,L'Occitane,mini size,2.5,oz,13.0,1,4.3497,1800.0,82800.0,Bath & Body,Bath & Shower,Body Wash & Shower Gel,['1317981'],75.0,ml,1.0,2.5,5.2,4671.0,13,standard size,8.4,oz,34.0,1,4.3497,1800.0,82800.0,Bath & Body,Bath & Shower,Body Wash & Shower Gel,['763706'],250.0,ml,1.0,8.4,4.047619,6569.0,1.284706
1,1,15,109908,Translucent Loose Setting Powder,Laura Mercier,mini size,0.33,oz,30.0,1,4.5007,9400.0,848500.0,Makeup,Face,Setting Spray & Powder,['2250520' '2376572' '2250538'],9.3,g,1.0,0.33,90.909091,1398.0,16,standard size,1.0,oz,56.0,1,4.5007,9400.0,848500.0,Makeup,Face,Setting Spray & Powder,['870618' '2391464' '1926435'],29.0,g,1.0,1.0,56.0,2513.0,1.623377
2,2,19,110113,for her Eau de Toilette,Narciso Rodriguez,mini size,0.25,oz,37.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['1475904'],7.5,ml,1.0,0.25,148.0,1030.0,20,standard size,1.0,oz,75.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['1410737'],30.0,ml,1.0,1.0,75.0,2514.0,1.973333
3,3,19,110113,for her Eau de Toilette,Narciso Rodriguez,mini size,0.25,oz,37.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['1475904'],7.5,ml,1.0,0.25,148.0,1030.0,21,standard size,1.6,oz,114.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['860882'],50.0,ml,1.0,1.6,71.25,3377.0,2.077193
4,4,19,110113,for her Eau de Toilette,Narciso Rodriguez,mini size,0.25,oz,37.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['1475904'],7.5,ml,1.0,0.25,148.0,1030.0,22,standard size,3.3,oz,150.0,1,4.5183,656.0,26300.0,Fragrance,Women,Perfume,['860890'],100.0,ml,1.0,3.3,45.454545,4998.0,3.256
