# Libraries

In [14]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Personas

In [96]:
personas = {
    'Caffeine': {'Caffeine Junkie Richard': 10, 'Moderate Mindy': 5, 'Stingy Steve': 7, 'Hippie Hendrix': 3},
    'Sugar': {'Caffeine Junkie Richard': 5, 'Moderate Mindy': 5, 'Stingy Steve': 5, 'Hippie Hendrix': 0},
    'Artificial Sweetener': {'Caffeine Junkie Richard': 6, 'Moderate Mindy': 3, 'Stingy Steve': 5, 'Hippie Hendrix': 0},
    'Price': {'Caffeine Junkie Richard': 0, 'Moderate Mindy': 4, 'Stingy Steve': 10, 'Hippie Hendrix': 0},
    'Size': {'Caffeine Junkie Richard': 6, 'Moderate Mindy': 4, 'Stingy Steve': 6, 'Hippie Hendrix': 3},
    'Calories': {'Caffeine Junkie Richard': 4, 'Moderate Mindy': 5, 'Stingy Steve': 5, 'Hippie Hendrix': 2},
    'Energy Drink': {'Caffeine Junkie Richard': 8, 'Moderate Mindy': 5, 'Stingy Steve': 5, 'Hippie Hendrix': 0},
    'Coffee': {'Caffeine Junkie Richard': 5, 'Moderate Mindy': 5, 'Stingy Steve': 3, 'Hippie Hendrix': 8},
    'Tea': {'Caffeine Junkie Richard': 0, 'Moderate Mindy': 5, 'Stingy Steve': 2, 'Hippie Hendrix': 10}
}

df_persona = pd.DataFrame(personas)
df_persona.head()

Unnamed: 0,Caffeine,Sugar,Artificial Sweetener,Price,Size,Calories,Energy Drink,Coffee,Tea
Caffeine Junkie Richard,10,5,6,0,6,4,8,5,0
Moderate Mindy,5,5,3,4,4,5,5,5,5
Stingy Steve,7,5,5,10,6,5,5,3,2
Hippie Hendrix,3,0,0,0,3,2,0,8,10


In [97]:
scaler = StandardScaler()
df_persona_std = pd.DataFrame(scaler.fit_transform(df_persona), index=df_persona.index, columns=df_persona.columns)
df_persona_std.head()

Unnamed: 0,Caffeine,Sugar,Artificial Sweetener,Price,Size,Calories,Energy Drink,Coffee,Tea
Caffeine Junkie Richard,1.450105,0.57735,1.091089,-0.855186,0.96225,0.0,1.218544,-0.140028,-1.12833
Moderate Mindy,-0.483368,0.57735,-0.218218,0.122169,-0.57735,0.816497,0.174078,-0.140028,0.199117
Stingy Steve,0.290021,0.57735,0.654654,1.588203,0.96225,0.816497,0.174078,-1.260252,-0.597351
Hippie Hendrix,-1.256757,-1.732051,-1.527525,-0.855186,-1.347151,-1.632993,-1.566699,1.540308,1.526564


Caffeine: 0-10 = LOW-HIGH\
Sugar: 0-10 = LOW-HIGH\
Artificial Sweetener: 0-10 = HATE-LOVE\
Price: 0-10 = DONT_CARE-CHEAP\
Size: 0-10 = SMALL-LARGE\
Calories: 0-10 = LOW-HIGH\
Energy Drink: 0-10 = HATE-LOVE\
Coffee: 0-10 = HATE-LOVE\
Tea: 0-10 = HATE-LOVE

In [99]:
nick = [7, 5, 2, 6, 7, 5, 4, 10, 5]
richard = [10, 0, 5, 5, 10, 3, 10, 10, 0]
kabir = []

df_test_subject = pd.DataFrame([nick], columns=df_persona.columns)
df_test_subject_std = scaler.transform(df_test_subject)
distances = np.sqrt(np.sum((df_persona_std - df_test_subject_std)**2, axis=1))
closest_persona = distances.idxmin()
print(f"Nick: {closest_persona}")

df_test_subject = pd.DataFrame([richard], columns=df_persona.columns)
df_test_subject_std = scaler.transform(df_test_subject)
distances = np.sqrt(np.sum((df_persona_std - df_test_subject_std)**2, axis=1))
closest_persona = distances.idxmin()
print(f"Richard: {closest_persona}")

# df_test_subject = pd.DataFrame([kabir], columns=df_persona.columns)
# df_test_subject_std = scaler.transform(df_test_subject)
# distances = np.sqrt(np.sum((df_persona_std - df_test_subject_std)**2, axis=1))
# closest_persona = distances.idxmin()
# print(f"Kabir: {closest_persona}")

Nick: Moderate Mindy
Richard: Caffeine Junkie Richard


# Caffeine

In [100]:
df_caffeine = pd.read_csv('./Caffeine.csv')
df_caffeine.head()

Unnamed: 0,Location,Drink Name,Type,Price ($),Caffeine (mg),Calories,Volume (oz),Sugar (g),Artificial Sweetner,Hot/Cold
0,Campus Market,Redbull,Energy Drink,4.85,80.0,110,8.4,26,N,Cold
1,Campus Market,Redbull (large),Energy Drink,6.8,114.0,160,12.0,38,N,Cold
2,Campus Market,Redbull Sugarfree,Energy Drink,4.85,80.0,10,8.4,0,Y,Cold
3,Campus Market,Redbull Sugarfree (large),Energy Drink,6.8,114.0,20,12.0,0,Y,Cold
4,Campus Market,Yerba Mate (Normal),Tea,4.2,150.0,120,16.0,27,N,Cold


In [101]:
df_caffeine['Artificial Sweetener'] = df_caffeine['Artificial Sweetner'].apply(lambda x: 1 if x == 'Y' else 0)
df_caffeine.drop(columns=['Artificial Sweetner'], inplace=True)
df_caffeine['Cold'] = df_caffeine['Hot/Cold'].apply(lambda x: 1 if x == 'Cold' else 0)
df_caffeine.drop(columns=['Hot/Cold'], inplace=True)
df_caffeine['Energy Drink'] = df_caffeine['Type'].apply(lambda x: 1 if x == 'Energy Drink' else 0)
df_caffeine['Coffee'] = df_caffeine['Type'].apply(lambda x: 1 if x == 'Coffee' else 0)
df_caffeine['Tea'] = df_caffeine['Type'].apply(lambda x: 1 if x == 'Tea' else 0)
df_caffeine.drop(columns=['Type'], inplace=True)

df_caffeine.rename(columns={"Price ($)": "Price",
                            "Caffeine (mg)": "Caffeine",
                            "Sugar (g)": "Sugar",
                            "Volume (oz)": "Size"}, inplace=True)
df_caffeine.head()

Unnamed: 0,Location,Drink Name,Price,Caffeine,Calories,Size,Sugar,Artificial Sweetener,Cold,Energy Drink,Coffee,Tea
0,Campus Market,Redbull,4.85,80.0,110,8.4,26,0,1,1,0,0
1,Campus Market,Redbull (large),6.8,114.0,160,12.0,38,0,1,1,0,0
2,Campus Market,Redbull Sugarfree,4.85,80.0,10,8.4,0,1,1,1,0,0
3,Campus Market,Redbull Sugarfree (large),6.8,114.0,20,12.0,0,1,1,1,0,0
4,Campus Market,Yerba Mate (Normal),4.2,150.0,120,16.0,27,0,1,0,0,1


In [102]:
relevant_features = df_caffeine[['Caffeine', 'Sugar', 'Artificial Sweetener', 'Price', 'Size', 'Calories', 'Energy Drink', 'Coffee', 'Tea']]
standardized_drinks = scaler.transform(relevant_features)

def find_closest_persona(drink_features):
    distances = np.sqrt(np.sum((df_persona_std - drink_features)**2, axis=1))
    return distances.idxmin()

df_caffeine_persona = df_caffeine.copy()
df_caffeine_persona['Persona'] = [find_closest_persona(drink) for drink in standardized_drinks]
df_caffeine_persona.head()

Unnamed: 0,Location,Drink Name,Price,Caffeine,Calories,Size,Sugar,Artificial Sweetener,Cold,Energy Drink,Coffee,Tea,Persona
0,Campus Market,Redbull,4.85,80.0,110,8.4,26,0,1,1,0,0,Stingy Steve
1,Campus Market,Redbull (large),6.8,114.0,160,12.0,38,0,1,1,0,0,Stingy Steve
2,Campus Market,Redbull Sugarfree,4.85,80.0,10,8.4,0,1,1,1,0,0,Caffeine Junkie Richard
3,Campus Market,Redbull Sugarfree (large),6.8,114.0,20,12.0,0,1,1,1,0,0,Caffeine Junkie Richard
4,Campus Market,Yerba Mate (Normal),4.2,150.0,120,16.0,27,0,1,0,0,1,Stingy Steve


In [104]:
df_caffeine_persona['Persona'].value_counts()

Persona
Stingy Steve               40
Caffeine Junkie Richard    26
Name: count, dtype: int64