# 2.2 Plot customer journeys in 3D PCA food space, inspect further dimensions

In [2]:
from game.utils import run_query
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import plotly.express as px
import nbformat
import plotly.graph_objects as go

from game.flaskapp_andrius.api import preprocesser
from game.src.feature_generator import get_embedding, get_vector_list, get_euc_dist_from_origin
from game.src.game_2_builder import get_scaled_pc_by_fg
from game.src.ab_test_preprocessor import calc_new_delta, get_last_order_hist, get_all_orders, get_order_hist_from_observed

In [3]:
df_full = pd.read_csv('../data/recipe_pca.csv')
df_full.head(1)

Unnamed: 0,id,PC_1,PC_2,PC_3,PC_4,PC_5,PC_6,PC_7,PC_8,PC_9,...,PC_19,PC_20,PC_21,PC_22,PC_23,PC_24,title,food_group,key_ingredient,image_url
0,1,0.173857,0.670006,-0.116477,0.130757,0.124489,-0.063687,-0.231062,-0.119998,-0.117414,...,-0.131298,0.046059,-0.014324,-0.084446,-0.006169,-0.192258,Seared beef with spring vegetable medley,Beef,Grass-fed heritage breed Yorkshire beef,https://mindfulchef-uat.imgix.net/recipes/1/2e...


In [4]:
# Setting categorical variables as numbers for colour scale

df_full['fg_code'] = pd.factorize(df_full['food_group'])[0]
print(pd.factorize(df_full['food_group'])[1])

Index(['Beef', 'Chicken', 'Vegan', 'Pork', 'Fish', 'Lamb'], dtype='object')


In [5]:
order_history = pd.read_csv('../data/order_history_g2.csv', sep=',')

# Preprocessing order history table
order_history = order_history.reset_index()
order_history.columns = order_history.iloc[0].tolist()
order_history = order_history.drop(0)
order_history = order_history.dropna(subset=['id'])
order_history['id'] = order_history['id'].astype(int)
order_history['delivery_id'] = order_history['delivery_id'].astype(int)
order_history['customer_id'] = order_history['customer_id'].astype(int)
order_history.head(1)

Unnamed: 0,delivery_id,delivery_date,customer_id,allergens,calories,carbs,cuisine,fat,food_group,protein,...,id,cooking_time,image_url,instructions,key_ingredient,title,description,price_1p_pence,price_2p_pence,price_4p_pence
1,8089256,2020-03-28,100043,{Celery},623.4,75.5,Mexican,22.81,Vegan,26.84,...,570,30,https://mindfulchef-uat.imgix.net/recipes/570/...,Boil a kettle. Rinse the brown rice and place ...,Sunflower family organic sunflower mince,"Sunflower 'mince' chilli, kidney beans & guac","It’s all the comfort of chilli con carne, minu...",900,1200,2200


# 1 customer journey

In [6]:
g_customers = order_history['customer_id'].unique()
g_historical = pd.DataFrame(g_customers, columns=['customer_id'])
g_historical['orders'] = g_historical['customer_id'].apply(lambda x: get_all_orders(order_history, x))

In [7]:
vegan_cust = g_historical.iloc[15][1]

In [8]:
test_list = g_historical.iloc[15][1]

In [9]:
df_sample = df_full[df_full['id'].isin(test_list)]

In [10]:
fig = px.scatter_3d(df_sample, x='PC_2', y='PC_3', z='PC_4', color='food_group', text='title')
fig.update_traces(marker=dict(size=3))
fig.update_layout(scene = dict(xaxis_title='2',yaxis_title='2',zaxis_title='3'))

fig.update_layout(legend=dict(
    yanchor="bottom",
    y=0.3,
    xanchor="right",
    x=0.95
))
fig.update_layout(legend= {'itemsizing': 'constant'})

fig.show()

# Different dimensions of whole recipe table

**Hypothesis**: Principal components 1/2 differentiate between food groups/key ingredients. Maybe the later PC (3,4,5) describe secondary descriptors beyond food group?

- Hard to say. Clustering by food group still present in higher dimensions

In [11]:
fig = px.scatter_3d(df_full[df_full['food_group']=='Vegan'], x='PC_3', y='PC_4', z='PC_5', color='food_group')
fig.update_traces(marker=dict(size=3))
fig.update_layout(scene = dict(xaxis_title='3',yaxis_title='4',zaxis_title='5'))

fig.show()

In [12]:
fig = px.scatter_3d(df_full, x='PC_2', y='PC_3', z='PC_4', color='food_group')
fig.update_traces(marker=dict(size=3))
fig.update_layout(scene = dict(xaxis_title='',yaxis_title='',zaxis_title=''))

fig.show()