In [84]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [85]:
def get_unit_price_comparison_data(df, sorting_value='ratio_mini_lt_full'):
    '''
    Preprocessing required to compare mini and standard size products with one another 
    Args:
    Returns:
    '''
    # for each product, compare all mini size to standard using cross join
    df_compare = df[df['swatch_group']=='mini size'].merge(
        df[df['swatch_group']=='standard size'],
        on=['product_id','product_name','brand_name'],
        suffixes=('_mini','_standard')
    )
    # only calculate ratio in one direction 
    df_compare = df_compare[df_compare['amount_adj_mini']<df_compare['amount_adj_standard']]
    # if ratio < 1, mini is better value per oz, if ratio > 1, standard is better value
    df_compare['mini_to_standard_ratio'] = df_compare['unit_price_mini'] / df_compare['unit_price_standard']
    df_compare = df_compare.reset_index().rename(columns={'index':'prod_rank'})

    # df_compare = sort_product_comparison_data(df_compare, sorting_value)

    df_compare = df_compare.melt(['product_id','brand_name','product_name',
                                'prod_rank','amount_adj_mini', 'amount_adj_standard',
                                'mini_to_standard_ratio'])
    df_compare = df_compare[df_compare['variable'].isin(['unit_price_mini','unit_price_standard'])]
    df_compare = df_compare.merge(df, 
                    on=['product_id','brand_name','product_name'],
                    how='left')
    df_compare['pretty_ratio'] = df_compare['mini_to_standard_ratio'].round(2).astype(str)
    df_compare['display_name'] = df_compare['brand_name']+",<br>"+df_compare['lvl_2_cat']+" ("+df_compare['pretty_ratio']+")"
    return df_compare

In [86]:
df = pd.read_csv('../data/agg_prod_data.csv')



In [30]:
df = get_unit_price_comparison_data(df)
prod_comparisons = df.brand_name.value_counts()
brand_names = prod_comparisons[prod_comparisons>30].index.tolist()

In [31]:
import plotly.express as px
df = pd.read_csv('../data/agg_prod_data.csv')

fig = px.scatter(df[df['lvl_2_cat']=='Mascara'], x="amount_a", y="price",
                 color='brand_name',
                 hover_data=['brand_name', 'product_name', 'index', 'unit_price'],
)
fig.show()

In [47]:
# import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np



df = df[df['brand_name']=='Dior']

# This function allows you to retrieve colors from a continuous color scale
# by providing the name of the color scale, and the normalized location between 0 and 1
# Reference: https://stackoverflow.com/questions/62710057/access-color-from-plotly-color-scale

def get_color(colorscale_name, loc):
    from _plotly_utils.basevalidators import ColorscaleValidator
    # first parameter: Name of the property being validated
    # second parameter: a string, doesn't really matter in our use case
    cv = ColorscaleValidator("colorscale", "")
    # colorscale will be a list of lists: [[loc1, "rgb1"], [loc2, "rgb2"], ...] 
    colorscale = cv.validate_coerce(colorscale_name)
    
    if hasattr(loc, "__iter__"):
        return [get_continuous_color(colorscale, x) for x in loc]
    return get_continuous_color(colorscale, loc)
        

# Identical to Adam's answer
import plotly.colors
from PIL import ImageColor

def get_continuous_color(colorscale, intermed):
    """
    Plotly continuous colorscales assign colors to the range [0, 1]. This function computes the intermediate
    color for any value in that range.

    Plotly doesn't make the colorscales directly accessible in a common format.
    Some are ready to use:
    
        colorscale = plotly.colors.PLOTLY_SCALES["Greens"]

    Others are just swatches that need to be constructed into a colorscale:

        viridis_colors, scale = plotly.colors.convert_colors_to_same_type(plotly.colors.sequential.Viridis)
        colorscale = plotly.colors.make_colorscale(viridis_colors, scale=scale)

    :param colorscale: A plotly continuous colorscale defined with RGB string colors.
    :param intermed: value in the range [0, 1]
    :return: color in rgb string format
    :rtype: str
    """
    if len(colorscale) < 1:
        raise ValueError("colorscale must have at least one color")

    hex_to_rgb = lambda c: "rgb" + str(ImageColor.getcolor(c, "RGB"))

    if intermed <= 0 or len(colorscale) == 1:
        c = colorscale[0][1]
        return c if c[0] != "#" else hex_to_rgb(c)
    if intermed >= 1:
        c = colorscale[-1][1]
        return c if c[0] != "#" else hex_to_rgb(c)

    for cutoff, color in colorscale:
        if intermed > cutoff:
            low_cutoff, low_color = cutoff, color
        else:
            high_cutoff, high_color = cutoff, color
            break

    if (low_color[0] == "#") or (high_color[0] == "#"):
        # some color scale names (such as cividis) returns:
        # [[loc1, "hex1"], [loc2, "hex2"], ...]
        low_color = hex_to_rgb(low_color)
        high_color = hex_to_rgb(high_color)

    return plotly.colors.find_intermediate_color(
        lowcolor=low_color,
        highcolor=high_color,
        intermed=((intermed - low_cutoff) / (high_cutoff - low_cutoff)),
        colortype="rgb",
    )

fig = go.Figure()

## add the lines+markers
for color_val in df.mini_to_standard_ratio.unique():
    color_val_normalized = (color_val - min(df.mini_to_standard_ratio)) / (max(df.mini_to_standard_ratio) - min(df.mini_to_standard_ratio))
    # print(f"color_val={color_val}, color_val_normalized={color_val_normalized}")
    df_subset = df[df['mini_to_standard_ratio'] == color_val]
    fig.add_trace(go.Scatter(
        x=df_subset['variable'],
        y=df_subset['value'],
        mode='lines+markers',
        marker=dict(color=get_color('Plasma', color_val_normalized)),
        name=f"line+marker {color_val}",
        legendgroup=f"line+marker {color_val}"
    ))

## add invisible markers to display the colorbar without displaying the markers
fig.add_trace(go.Scatter(
    x=df['variable'],
    y=df['value'],
    mode='markers',
    marker=dict(
        size=0, 
        color="rgba(0,0,0,0)", 
        colorscale='Plasma', 
        cmin=min(df.mini_to_standard_ratio),
        cmax=max(df.mini_to_standard_ratio),
        colorbar=dict(thickness=40)
    ),
    showlegend=False
))

fig.update_layout(
    legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01),
    yaxis_range=[min(df.value)-2,max(df.value)+2]
)

fig.show()


In [48]:
df_compare = df[df['swatch_group']=='mini size'].merge(
    df[df['swatch_group']=='standard size'],
    on=['product_id','product_name','brand_name'],
    suffixes=('_mini','_standard')
)
# only calculate ratio in one direction 
df_compare = df_compare[df_compare['amount_adj_mini']<df_compare['amount_adj_standard']]
# if ratio < 1, mini is better value per oz, if ratio > 1, standard is better value
df_compare['mini_to_standard_ratio'] = df_compare['unit_price_mini'] / df_compare['unit_price_standard']
# df_compare = df_compare.reset_index().rename(columns={'index':'prod_rank'})


In [81]:
scale='plotly3_r'

fig = go.Figure()
temp = df_compare[50:60]#[df_compare['brand_name']=='Anastasia Beverly Hills']
for i, row in temp.iterrows():
    
    color_val_normalized = (row['mini_to_standard_ratio'] - min(temp['mini_to_standard_ratio'])) / (max(temp['mini_to_standard_ratio']) - min(temp['mini_to_standard_ratio']))

    # line plots
    fig.add_trace(go.Scatter(
        x=['Mini','Standard'], 
        y=[row['unit_price_mini'], row['unit_price_standard']], 
        mode='lines',
        marker=dict(
            color=get_color(scale, color_val_normalized)),
            line=dict(
                width=5
        ),
        hovertemplate = 'Price: %{y:$.2f}<extra></extra>',
        showlegend = False
        )
        
    )
    
    
    ## add invisible markers to display the colorbar without displaying the markers
    fig.add_trace(go.Scatter(
        x=['mini','standard'], 
        y=[row['unit_price_mini'], row['unit_price_standard']], 
        mode='markers',
        marker=dict(
            size=0, 
            color="rgba(0,0,0,0)", 
            colorscale=scale, 
            cmin=min(temp['mini_to_standard_ratio']),
            cmax=max(temp['mini_to_standard_ratio']),
            colorbar=dict(
                thickness=25,
                title='Unit Price Ratio',
                x=0.9,
                xref="container",
                # borderwidth=0.01
        ),
        # showlegend=False
    )))


fig.add_vline(x=0, line_width=1, line_dash="dash", line_color="grey")
fig.add_vline(x=1, line_width=1, line_dash="dash", line_color="grey")

    
fig.update_layout(
    xaxis=dict(
        title='Product Size',
        type='category',
        tickmode='array',
        # really difficult to get categorical axis spacing right
        range=[-0.2, 2 - 0.7],
        linecolor='rgb(204, 204, 204)',
        # linewidth=2,
    ),
    yaxis=dict(
        title='Unit Price ($/oz.)',
        showgrid=False,
        zeroline=True,
        showline=False,
        showticklabels=True,
    ),
    autosize=False,
    width=700,
    height=500,
    
    margin=dict(
        autoexpand=False,
        l=180,
        r=180,
        t=110,
    ),
    showlegend=False,
    plot_bgcolor='white'
)
    
    
    
# annotations = []
# y_data = df_compare[['unit_price_mini','unit_price_standard']][0:10].values
# labels = df_compare['brand_name'][0:10]
# # Adding labels
# for y_trace, label in zip(y_data, labels):
#     # # labeling the left_side of the plot
#     # annotations.append(dict(xref='paper', x=1, y=y_trace[1],
#     #                               xanchor='right', yanchor='middle',
#     #                               text=label,# + ' {}$/oz.'.format(y_trace[0]),
#     #                               font=dict(family='Arial',
#     #                                         size=10),
#     #                               showarrow=False))

# fig.update_layout(annotations=annotations)

fig.show()

In [87]:
df = pd.read_csv('../data/agg_prod_data.csv')

Unnamed: 0,index,product_id,product_name,brand_name,swatch_group,amount_a,unit_a,price,internal_product_id,rating,...,lvl_0_cat,lvl_1_cat,lvl_2_cat,sku,amount_b,unit_b,product_multiplier,amount_adj,unit_price,prod_size_rank
0,0,1018539,Master Mattes™ Liquid Eyeliner,MAKEUP BY MARIO,standard size,0.04,oz,33.0,1,3.8494,...,Makeup,Eye,Eyeliner,['2389542'],1.2,ml,1.0,0.04,825.0,155.0
1,1,1479545,Pro Longwear Fluidline eyeliner,MAC Cosmetics,standard size,0.1,oz,32.0,1,3.9024,...,Makeup,Eye,Eyeliner,['2091122' '2091114' '2091072'],3.0,g,1.0,0.1,320.0,346.0
2,2,4546778,Mineralize Timecheck Lotion,MAC Cosmetics,standard size,1.0,oz,62.0,1,4.5,...,Skincare,Moisturizers,Moisturizers,['2099588'],30.0,ml,1.0,1.0,62.0,2512.0
3,3,4845668,Strobe Cream,MAC Cosmetics,standard size,1.7,oz,49.0,1,4.3269,...,Skincare,Moisturizers,Moisturizers,['2099505' '2099513'],50.0,ml,1.0,1.7,28.823529,3595.0
4,4,4879887,Prep + Prime Highlighter,MAC Cosmetics,standard size,0.12,oz,39.5,1,4.5714,...,Makeup,Face,Highlighter,['2096923' '2096949' '2096931' '2096956'],3.6,ml,1.0,0.12,329.166667,481.0


In [111]:

# Create subplots
import plotly.subplots as sp
fig = sp.make_subplots(rows=1, cols=2, column_widths=[0.5, 0.5])


categories = df.swatch_group.unique()
marker_shapes = ['circle', 'square', 'diamond', 'cross']

# # Create a Scatter plot for each category
# fig = go.Figure()

for i, category in enumerate(categories):
    data = df[df['swatch_group']==category]
    marker_shape = marker_shapes[i]
    
    scatter = go.Scatter(
        x=data['price'],
        y=data['amount_a'],
        name=category,
        mode='markers',
        marker=dict(
            symbol=marker_shape,
            size=10,  # Adjust marker size as needed
            colorscale=scale, 
        )
    )
        # Customize the layout
    fig.update_layout(
        title='Scatter Plot with Categorical Values and Marker Shapes',
        showlegend=True
    )
    fig.add_trace(scatter, row=1, col=2)




scale='plotly3_r'

temp = df_compare[50:60]#[df_compare['brand_name']=='Anastasia Beverly Hills']
for i, row in temp.iterrows():
    
    color_val_normalized = (row['mini_to_standard_ratio'] - min(temp['mini_to_standard_ratio'])) / (max(temp['mini_to_standard_ratio']) - min(temp['mini_to_standard_ratio']))

    # line plots
    fig.add_trace(go.Scatter(
        x=['Mini','Standard'], 
        y=[row['unit_price_mini'], row['unit_price_standard']], 
        mode='lines',
        marker=dict(
            color=get_color(scale, color_val_normalized)),
            line=dict(
                width=5
        ),
        hovertemplate = 'Price: %{y:$.2f}<extra></extra>',
        showlegend = False
        ), 
        row=1, col=1
    )
    
    ## add invisible markers to display the colorbar without displaying the markers
    fig.add_trace(go.Scatter(
        x=['mini','standard'], 
        y=[row['unit_price_mini'], row['unit_price_standard']], 
        mode='markers',
        marker=dict(
            size=0, 
            color="rgba(0,0,0,0)", 
            colorscale=scale, 
            cmin=min(temp['mini_to_standard_ratio']),
            cmax=max(temp['mini_to_standard_ratio']),
            colorbar=dict(
                thickness=25,
                title='Unit Price Ratio',
                x=0.9,
                xref="container",
                # borderwidth=0.01
        ),
        # showlegend=False
        )),
        row=1,
        col=1
    )
    
fig.update_layout(
    xaxis=dict(
        title='Product Size',
        type='category',
        tickmode='array',
        # really difficult to get categorical axis spacing right
        range=[-0.2, 2 - 0.7],
        linecolor='rgb(204, 204, 204)',
        # linewidth=2,
    ),
    yaxis=dict(
        title='Unit Price ($/oz.)',
        showgrid=False,
        zeroline=True,
        showline=False,
        showticklabels=True,
    ),
    # autosize=False,
    # width=700,
    # height=500,
    
    # margin=dict(
    #     autoexpand=False,
    #     l=180,
    #     r=180,
    #     t=110,
    # ),
    showlegend=False,
    plot_bgcolor='white'
)

# Show the plot
fig.show()

