# VW study analysis using Colab

In [21]:
# Compute cumulative frequencies
def cdf(df, col):
    '''
    Input: A data frame and a column for which we wish to obtain its CDF
    Output: A pd.series object with the column's CDF
    '''
    # Frequency
    stats_df = df.groupby(col)[col].agg('count').pipe(pd.DataFrame).rename(columns = {col: f'{col}_frequency'})

    # PDF
    stats_df[f'{col}_pdf'] = stats_df[f'{col}_frequency'] / sum(stats_df[f'{col}_frequency'])

    # CDF
    stats_df[f'{col}_cdf'] = stats_df[f'{col}_pdf'].cumsum()
    stats_df.reset_index(inplace=True)
    stats_df.drop([f'{col}_frequency', f'{col}_pdf'], axis=1, inplace=True)
    stats_df.rename(columns = {col: 'Price', f'{col}_cdf': col}, inplace=True)
    
    return stats_df

def cdf_table(df, price_cols, interpolate=True):
    '''
    Re-creating R's function output$data_vanwestendorp
    '''
    df.rename(columns={price_cols[0]: "too_cheap", price_cols[1]: "cheap", price_cols[2]: "expensive", price_cols[3]: "too_expensive"})
    cdfs = [cdf(df, "too_cheap"), cdf(df, "cheap"), cdf(df, "expensive"), cdf(df, "too_expensive")]
    cdfs = reduce(lambda left, right: pd.merge(left, right, on=['Price'], how='outer'), cdfs).sort_values('Price')
    cdfs = cdfs.fillna(method='ffill').fillna(0)
    cdfs["too_cheap"] = 1 - cdfs["too_cheap"]
    cdfs["cheap"] = 1 - cdfs["cheap"]
    cdfs['Not Cheap'] = 1 - cdfs["cheap"]
    cdfs['Not Expensive'] = 1 - cdfs["expensive"]
    cdfs = cdfs.clip(lower=0)
    if interpolate == True:
        low = cdfs.Price.min()
        high = cdfs.Price.max()
        cdfs = pd.merge(pd.DataFrame(list(np.arange(low,high,0.01)), columns = ['Price']), cdfs, how='outer').sort_values('Price')
        cdfs['Price'] = cdfs['Price'].apply(lambda value: round(float(value),2))
        cdfs.drop_duplicates(['Price'], keep='last', inplace=True)
        cdfs = cdfs.interpolate(method ='linear', limit_direction ='forward')
        cdfs["too_cheap"] = cdfs["too_cheap"].fillna(1)
        cdfs["cheap"] = cdfs["cheap"].fillna(0)
        cdfs["expensive"] = cdfs["expensive"].fillna(0)
        cdfs["too_expensive"] = cdfs["too_expensive"].fillna(0)
        cdfs['Not Cheap'] = cdfs['Not Cheap'].fillna(0)
        cdfs['Not Expensive'] = cdfs['Not Expensive'].fillna(1)
        cdfs.reset_index(inplace=True)
        cdfs.drop('index', axis=1, inplace=True)
    return cdfs    


# Plot function
def plot_function(cdfs, 
                  Point_of_Marginal_Cheapness, PMC_height,
                  Point_of_Marginal_Expensiveness, PME_height,
                  Indifference_Price_Point, IPP_height,
                  Optimal_Price_Point, OPP_height,
                  title=""):
    line_width = 1
    marker_size = 3

    var = "too_expensive"
    trace1 = go.Scatter(
                    x=cdfs.Price.values,
                    y=cdfs[var].values,
                    text=[f"{var}<br>Price: ${price:.2f}<br>Participants: {val*100:.2f}%" for (price,val) in zip(cdfs.Price.values,cdfs[var].values)],
                        mode='lines', 
                        opacity=0.8,
                        marker={
                            'size': marker_size,
                            'color': "red"
                        },
                        hoverinfo='text',
                        line = {
                            'color': "red",
                            'width':line_width
                        },
                        name=var
                    ) 

    var = "Not Expensive"
    trace2 = go.Scatter(
                    x=cdfs.Price.values,
                    y=cdfs[var].values,
                    text=[f"{var}<br>Price: ${price:.2f}<br>Participants: {val*100:.2f}%" for (price,val) in zip(cdfs.Price.values,cdfs[var].values)],
                        mode='lines', 
                        opacity=0.8,
                        marker={
                            'size': marker_size,
                            'color': "orange"
                        },
                        hoverinfo='text',
                        line = {
                            'color': "orange",
                            'width':line_width
                        },
                        name=var
                    ) 

    var = "Not Cheap"
    trace3 = go.Scatter(
                    x=cdfs.Price.values,
                    y=cdfs[var].values,
                    text=[f"{var}<br>Price: ${price:.2f}<br>Participants: {val*100:.2f}%" for (price,val) in zip(cdfs.Price.values,cdfs[var].values)],
                        mode='lines', 
                        opacity=0.8,
                        marker={
                            'size': marker_size,
                            'color': "blue"
                        },
                        hoverinfo='text',
                        line = {
                            'color': "blue",
                            'width': line_width
                        },
                        name=var
                    ) 

    var = "too_cheap"
    trace4 = go.Scatter(
                    x=cdfs.Price.values,
                    y=cdfs[var].values,
                    text=[f"{var}<br>Price: ${price:.2f}<br>Participants: {val*100:.2f}%" for (price,val) in zip(cdfs.Price.values,cdfs[var].values)],
                        mode='lines', 
                        opacity=0.8,
                        marker={
                            'size': marker_size,
                            'color': "green"
                        },
                        hoverinfo='text',
                        line = {
                            'color': "green",
                            'width':line_width
                        },
                        name=var
                    ) 

    point1 = go.Scatter(
                    x=[Point_of_Marginal_Cheapness],
                    y=[PMC_height],
                    text=[f"Point of Marginal Cheapness: ${Point_of_Marginal_Cheapness:.2f}<br>Participants: {PMC_height*100:.2f}%"],
                        mode='markers', 
                        opacity=1,
                        marker={
                            'size': 7,
                            'color': "blue"
                        },
                        hoverinfo='text',
                        name=f"<br>Point of Marginal Cheapness<br>${Point_of_Marginal_Cheapness:.2f}"
                    ) 

    point2 = go.Scatter(
                    x=[Point_of_Marginal_Expensiveness],
                    y=[PME_height],
                    text=[f"Point of Marginal Expensiveness: ${Point_of_Marginal_Expensiveness:.2f}<br>Participants: {PME_height*100:.2f}%"],
                        mode='markers', 
                        opacity=1,
                        marker={
                            'size': 7,
                            'color': "red"
                        },
                        hoverinfo='text',
                        name=f"Point of Marginal Expensiveness<br>${Point_of_Marginal_Expensiveness:.2f}"
                    ) 

    point3 = go.Scatter(
                    x=[Indifference_Price_Point],
                    y=[IPP_height],
                    text=[f"Indifference Price Point: ${Indifference_Price_Point:.2f}<br>Participants: {IPP_height*100:.2f}%"],
                        mode='markers', 
                        opacity=1,
                        marker={
                            'size': 7,
                            'color': "orange"
                        },
                        hoverinfo='text',
                        name=f"Indifference Price Point<br>${Indifference_Price_Point:.2f}"
                    ) 

    point4 = go.Scatter(
                    x=[Optimal_Price_Point],
                    y=[OPP_height],
                    text=[f"Optimal Price Point: ${Optimal_Price_Point:.2f}<br>Participants: {OPP_height*100:.2f}%"],
                        mode='markers', 
                        opacity=1,
                        marker={
                            'size': 7,
                            'color': "green"
                        },
                        hoverinfo='text',
                        name=f"Optimal Price Point<br>${Optimal_Price_Point:.2f}"
                    ) 

    data = [trace1, trace2, trace3, trace4, point1, point2, point3, point4]

    layout = go.Layout(title=f"Van Westendorp's Price Sensitivity Meter<br>{title}",
                    xaxis=dict(title='$ Price', range=(cdfs.Price.min()-5, cdfs.Price.max()+5)),
                    yaxis=dict(title='% of Participants', range=(-0.1,1.1)),
                    template="plotly_white"
                    )
    
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

    return None


# Get results
def results(df, price_cols, plot=True, plot_title=""):
    cdfs = cdf_table(df, price_cols)

    Point_of_Marginal_Cheapness = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_cheap"] - cdfs['Not Cheap']))).flatten()+1]['Price'].values[0]
    Point_of_Marginal_Expensiveness = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_expensive"] - cdfs['Not Expensive']))).flatten()+1]['Price'].values[0]
    Indifference_Price_Point = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs['Not Cheap'] - cdfs['Not Expensive']))).flatten()+1]['Price'].values[0]
    Optimal_Price_Point = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_expensive"] - cdfs["too_cheap"]))).flatten()+1]['Price'].values[0]

    # For the plot
    PMC_height = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_cheap"] - cdfs['Not Cheap']))).flatten()+1][["too_cheap", 'Not Cheap']].mean(axis=1).values[0]
    PME_height = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_expensive"] - cdfs['Not Expensive']))).flatten()+1][["too_expensive", 'Not Expensive']].mean(axis=1).values[0]
    IPP_height = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs['Not Cheap'] - cdfs['Not Expensive']))).flatten()+1][['Not Cheap', 'Not Expensive']].mean(axis=1).values[0]
    OPP_height = cdfs.iloc[np.argwhere(np.diff(np.sign(cdfs["too_expensive"] - cdfs["too_cheap"]))).flatten()+1][["too_expensive", "too_cheap"]].mean(axis=1).values[0]

    print(f"Accepted Price Range: ${Point_of_Marginal_Cheapness:.2f} - ${Point_of_Marginal_Expensiveness:.2f}")
    print(f"Indifference Price Point: ${Indifference_Price_Point:.2f}")
    print(f"Optimal Price Point: ${Optimal_Price_Point:.2f}")

    if plot==True:
        plot_function(cdfs, 
                      Point_of_Marginal_Cheapness, PMC_height,
                      Point_of_Marginal_Expensiveness, PME_height,
                      Indifference_Price_Point, IPP_height,
                      Optimal_Price_Point, OPP_height,
                      plot_title)


'''
If you'd like to contribute to make this code better, write me at Twitter @vivmarquez
If you thought it was useful, also tweet me, it would make me happy :)
'''

"\nIf you'd like to contribute to make this code better, write me at Twitter @vivmarquez\nIf you thought it was useful, also tweet me, it would make me happy :)\n"

In [26]:
cdf_table(df, my_cols)

Unnamed: 0,Price,too_cheap,cheap,expensive,too_expensive,Not Cheap,Not Expensive
0,0.00,5.012146e-01,8.133603e-01,0.0,0.000000,0.18664,1.0
1,0.01,5.011741e-01,8.133603e-01,0.0,0.000000,0.18664,1.0
2,0.02,5.011336e-01,8.133603e-01,0.0,0.000000,0.18664,1.0
3,0.03,5.010931e-01,8.133603e-01,0.0,0.000000,0.18664,1.0
4,0.04,5.010526e-01,8.133603e-01,0.0,0.000000,0.18664,1.0
...,...,...,...,...,...,...,...
14996,149.96,1.110223e-16,2.220446e-16,1.0,0.999988,1.00000,0.0
14997,149.97,1.110223e-16,2.220446e-16,1.0,0.999991,1.00000,0.0
14998,149.98,1.110223e-16,2.220446e-16,1.0,0.999994,1.00000,0.0
14999,149.99,1.110223e-16,2.220446e-16,1.0,0.999997,1.00000,0.0


In [2]:
import pandas as pd
#import VanWestendorp_PriceSensitivityMeter as VWPSM
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from google.colab import drive # to allow colab save file in my drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
%cd "/content/drive/Shared drives/Global Pricing/5 - Useful files/4 - GitHub/global_pricing/VW"
%pwd
%ls

/content/drive/Shared drives/Global Pricing/5 - Useful files/4 - GitHub/global_pricing/VW
 Bolivia.xlsx
'Copy of Price Sensitivity Meter - Van Westendorp Method (Step by step in Python).ipynb'
 test.csv
 test.xlsx
 vw_response_taiwan.csv
 vw_response_taiwan.xlsx
 VW_Test.ipynb


In [4]:

my_data = pd.read_excel("/content/drive/Shared drives/Global Pricing/5 - Useful files/4 - GitHub/global_pricing/VW/vw_response_taiwan.xlsx")
my_data = my_data.rename(columns={"What delivery fee would be so low that you would doubt the quality of the service and not order (too cheap)?":"too_cheap", 
                                  "What delivery fee would you see as a bargain (great value for the money)?":"cheap",
                                  "What delivery fee value would you see as not cheap (but you would still place an order)?":"expensive",
                                  "What delivery fee value would you see as too expensive (you would no longer consider placing an order)?":"too_expensive"})

my_data.shape

Unnamed: 0,Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Recipient Last Name,Recipient First Name,Recipient Email,External Data Reference,Location Latitude,Location Longitude,Distribution Channel,User Language,Q_RecaptchaScore,Q_RelevantIDDuplicate,Q_RelevantIDDuplicateScore,Q_RelevantIDFraudScore,Q_BallotBoxStuffing,"Keeping your recent order experience in mind, how likely are you to recommend foodpanda to a friend or colleague? - Group","Keeping your recent order experience in mind, how likely are you to recommend foodpanda to a friend or colleague?",Which of the following order steps contributed most to your score?,What factor contributed most to your score above? [NOT SHOWN] - Selected Choice,What factor contributed most to your score above? [NOT SHOWN] - Other - Text,Browser Meta Info - Browser,Browser Meta Info - Version,Browser Meta Info - Operating System,Browser Meta Info - Resolution,Please rate the following attributes related to your waiting & delivery experience: - Order delivery time,Please rate the following attributes related to your waiting & delivery experience: - Communication of order status,Please rate the following attributes related to your waiting & delivery experience: - Live tracking functionality,Please rate the following attributes related to your waiting & delivery experience: - How professional the delivery rider was,Please rate the following attributes related to your waiting & delivery experience: - How well the meal was packaged,Please rate the following attributes related to your waiting & delivery experience: - Order status page,Please rate the following attributes related to your waiting & delivery experience: - How close the actual delivery time was to the expected time,Please rate the following attributes related to your waiting & delivery experience: - How clearly the delivery riders communicated with you,Please rate the following attributes related to your waiting & delivery experience: - How well the delivery riders presented themselves,...,RecipientEmail,sid,SurveyID,segmentRecency,email,last_order_date,customer_id,customerId,externalId,dhhOptinStatus,segmantDiscountOther,segmentVariety,propensityToOrder,predictedFutureValue,lastNpsAfterOrder,lastFailedOrderStatus,lastFailedOrderRest,firstOrderTimestamp,lastOrderTimestamp,voucherCode,paymentMethod,preferredOrderPeriod,preferredMainCuisineLocal,platform,lastVendorId,lastVendorNameEnglish,lastVendorNameLocal,loyaltyStatus,loyaltyNba,lastOrderedVertical,segmentLoyalty,VendorNameAllVerts,CityNameAllVerts,Q_PopulateResponse,Q_URL,Performance,Q-URL,Q1.2a,Q.CNPS,Q_DataPolicyViolations
0,2020-04-15 19:02:31,2020-04-15 19:05:05,Survey Preview,,100,153,True,2020-04-15 19:05:05,R_R8Cm7jkICCPcjiV,,,,,50.118805,8.684296,preview,EN,0.9,,0.0,0.0,,Detractor,5,"Checkout process (e.g. Payment, Voucher, Confi...",,,Chrome,80.0.3987.163,Windows NT 10.0,1920x1080,,,,,,,,,,...,,SV_6lZ3uvHUtVuMXOZ,SV_6lZ3uvHUtVuMXOZ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,https://deliveryhub.eu.qualtrics.com/jfe4/prev...,0.0,,,,
1,2020-04-15 19:16:02,2020-04-15 19:17:25,IP Address,123.193.83.117,100,83,True,2020-04-15 19:17:26,R_Z7QovRYInomZjc5,,,,,25.047806,121.531799,anonymous,ZH-T,0.9,,0.0,5.0,,Detractor,6,The Order was cancelled,,,Chrome,80.0.3987.162,Android 8.1.0,360x720,,,,,,,,,,...,,SV_6lZ3uvHUtVuMXOZ,SV_6lZ3uvHUtVuMXOZ,,jassson12251031@gmail.com,,,3031557.0,TW_3031557,,N,L,0.380969,,,failed_delivery,dh,2019-09-29 00:00:00 UTC,2020-04-14 00:00:00 UTC,KFC50YUM,cash,multi weekday | multi daytime,中港,android,z5nj,肯德基 (文山興隆店),,Active not Steady nor Frequent,,restaurants,Active not Steady nor Frequent,肯德基 (文山興隆店),Taipei City,,https://deliveryhub.eu.qualtrics.com/jfe/form/...,0.0,,,,
2,2020-04-15 19:15:14,2020-04-15 19:17:38,IP Address,114.136.94.199,100,143,True,2020-04-15 19:17:39,R_1gtC2S5ebJl8GZS,,,,,25.047806,121.531799,anonymous,ZH-T,0.9,,0.0,5.0,,Detractor,5,foodpanda's brand in general (e.g. trustworthi...,,,Safari iPhone,Unknown,iPhone,375x667,,,,,,,,,,...,,SV_6lZ3uvHUtVuMXOZ,SV_6lZ3uvHUtVuMXOZ,,super2pm66@gmail.com,,,4211942.0,TW_4211942,,N,N,0.894418,,,,,2020-04-14 00:00:00 UTC,2020-04-14 00:00:00 UTC,First7,cash,single weekday | single daytime,中港,iOS,y0bt,福生園,,Acquisition,,restaurants,Acquisition,福生園,Taipei City,,https://deliveryhub.eu.qualtrics.com/jfe/form/...,0.0,,,,
3,2020-04-15 19:22:00,2020-04-15 19:24:55,IP Address,175.97.53.108,100,175,True,2020-04-15 19:24:56,R_1DHKUfoW4wdr2oJ,,,,,25.047806,121.531799,anonymous,ZH-T,0.9,,0.0,5.0,,Promoter,10,Waiting and Delivery of meal (e.g. Communicati...,,,Safari iPhone,Unknown,iPhone,414x736,Dissatisfied,Dissatisfied,Dissatisfied,Neither satisfied nor dissatisfied,,,Very Dissatisfied,,,...,,SV_6lZ3uvHUtVuMXOZ,SV_6lZ3uvHUtVuMXOZ,,abcdefg12390012@gmail.com,,,3285834.0,TW_3285834,,N,H,0.911108,,,,,2019-10-18 00:00:00 UTC,2020-04-14 00:00:00 UTC,,cash,multi weekday | multi daytime,中港,iOS,b7tz,ㄔˇ留香,,Loyal,online_payment,restaurants,Loyal,ㄔˇ留香,Yilan County,,https://deliveryhub.eu.qualtrics.com/jfe/form/...,0.0,,,,
4,2020-04-15 19:18:13,2020-04-15 19:28:43,IP Address,49.216.188.222,100,630,True,2020-04-15 19:28:44,R_ewZCPl69fFsDAC5,,,,,25.047806,121.531799,anonymous,ZH-T,0.9,,0.0,5.0,,Passive,8,Waiting and Delivery of meal (e.g. Communicati...,,,Safari iPhone,Unknown,iPhone,414x736,,,,Satisfied,Satisfied,Neither satisfied nor dissatisfied,Satisfied,,Satisfied,...,,SV_6lZ3uvHUtVuMXOZ,SV_6lZ3uvHUtVuMXOZ,,ch041305@yahoo.com.tw,,,209921.0,TW_209921,,N,H,,,9.0,failed_payment,customer,2016-02-12 00:00:00 UTC,2020-04-14 00:00:00 UTC,STAYHOME,cash|card,multi weekday | multi daytime,中港,iOS,y3wy,Noona 努娜炸雞,,Loyal,,restaurants,Loyal,Noona 努娜炸雞,Taichung City,,https://deliveryhub.eu.qualtrics.com/jfe/form/...,0.8,,,,


In [0]:
#df.loc[(df['First_name'] == 'Bill') | (df['First_name'] == 'Emma'), 'name_match'] = 'Match'  

In [0]:
# define qualification
def validation(row):
    if row["Response Type"] == "IP Address" and row["Finished"] == True and row["too_cheap"] == 0 and  row["cheap"] == 0 and row["cheap"] < row ["expensive"] and row ["expensive"] < row ["too_expensive"]:
        return True
    elif row["Response Type"] == "IP Address" and row["Finished"] == True and row["too_cheap"] <  row["cheap"] and row["cheap"] < row ["expensive"] and row ["expensive"] < row ["too_expensive"]:
        return True
    else:
        return False

my_data = my_data.assign(validation=my_data.apply(validation, axis=1))

In [0]:
my_data_filtere = my_data.copy()
df = df[df['validation'] == True]

In [0]:
my_cols = ['too_cheap', "cheap", "expensive", 'too_expensive']


In [22]:
results(df,my_cols)

Output hidden; open in https://colab.research.google.com to view.

In [0]:
my_validate = validate(my_data, my_cols)
my_validate.shape

Total data set contains 922 cases, 600 cases were kept (transitive price preferences).



(600, 6)

In [0]:
df = my_data
price_cols = my_cols

In [0]:
df = df.rename(columns={price_cols[0]: "too_cheap", price_cols[1]: "cheap", price_cols[2]: "expensive", price_cols[3]: "too_expensive"})

In [0]:
cdfs = [cdf(df, 'too_cheap'), cdf(df, "cheap"), cdf(df, "expensive"), cdf(df, 'too_expensive')]