In [1]:
## Initialization and imports
import pandas as pd 
import numpy as np
from scipy import sparse

from sklearn.model_selection import train_test_split
from sklearn.decomposition import NMF

## Changing directory to top folder (All programs run from top)
import os
os.chdir('/Users/nschumacher/docs/galvanize/smunch-user-food-analysis')

from db.python_db import run_sql_query

from matplotlib import pyplot as plt
%matplotlib inline  
%config InlineBackend.figure_format='retina'

## Set random seed
np.random.seed(seed=14)

In [2]:
## Selecting pre created table with rating info
df = run_sql_query("SELECT * from noah.cust_ratings")
df.sample(3)

Unnamed: 0,cust_id,meal_id,meal_name,category,restaurant_name,delivery_tm,meal_rating
14991,0030N00002LQqcxQAD,a050N00000zZfz8QAC,Fajita Veggies Quinoa Bowl,freakyfit,Chupenga,2018-05-22 09:00:00+00:00,5.0
25804,0030N00002LQqMKQA1,a050N00000zZg49QAC,Grilled Teriyaki Salmon Bento,livinglight,Hashi Izakaya & Japanese Kitchen,2017-08-31 09:00:00+00:00,4.0
9812,0030N00002iBdCJQA0,a050N00000zbFdZQAU,Rice Noodle Salad with Chicken Breast,livinglight,Dave B.,2019-02-06 11:20:00+00:00,4.5


In [3]:
df = df[['cust_id', 'meal_name', 'meal_rating']]
df.sample(3)

Unnamed: 0,cust_id,meal_name,meal_rating
38716,0030N00002LQpCqQAL,Bibimbap with Minced Beef,4.0
37601,0030N00002LQpGrQAL,Fajita Veggies Quinoa Bowl,5.0
58627,0030N00002LQpm9QAD,Lamb and Veggie Curry,3.0


In [4]:
## Pivoting the table to get in format for ALS
table = pd.pivot_table(df,
                       values='meal_rating',
                       index=['cust_id'],
                       columns=['meal_name'],
                       fill_value=0)
table.shape

(4931, 838)

In [5]:
table_empty = table.copy()

In [6]:
## Querry to get the avg meal rating for each meal
querry = '''
SELECT
    product_name as meal_name,
    AVG(rating_score) as avg_meal_rating,
    COUNT(rating_score) as rating_count
FROM 
    bi.executed_order_employee
WHERE
    order_type = 'single' and rating_score IS NOT NULL
GROUP BY
    product_name'''
avg = run_sql_query(querry)
avg.sample(4)

Unnamed: 0,meal_name,avg_meal_rating,rating_count
636,'Caeser's Delight' Chicken Salad with Anchovie...,3.846154,65
193,Paneer Tikka 'Naanwich' Roll,4.25,40
765,Club Chicken Bowl with Ceasar Dressing (only t...,5.0,1
460,Chicken Avocado Arepa,3.818898,381


In [7]:
## Function to assign missing values to avg
def con_avg(x, avg):
    if x == 0:
        return avg
    return x
    
## For each column get avg value and assign it to missing info
for meal in table.columns:
    avg_val = avg.loc[avg['meal_name'] == meal, :].avg_meal_rating.values[0]
    table[meal] = table[meal].apply(con_avg, args=(avg_val,))
    
table.sample(2)

meal_name,'Amatriciana' Casarecce Pasta with Guanciale Bacon (Pork cheek),'Be Veggie' Tofu Burger in Brioche Bun,'Berliner Bowl' with Beef-Meatballs,'Caeser's Delight' Chicken Salad with Anchovies & Fresh Veggies in Herb Dressing,'Com Chay' Lemongrass Tofu with Pan-fried Sesame-Veggies,'Com Ga' Lemongrass Chicken with Pan-fried Sesame-Veggies,'New Classic' Beef Burger with Bacon in Brioche Bun,'Pad Lao' Rice Noodles with Chicken,'Pad Lao' Rice Noodles with Tofu,'Pasta Bella Bologna' Fusili in Bolognese Beef Sauce (750ml),...,Wild Rice Bowl with Salmon,Winter Bowl with Yoghurt Dressing (only the bread contains gluten),Wok Chicken in Cocos-Curry,Woked Vegetables & Udon Noodles,Wrap with Mixed Salad & Balsamic Dressing,Za'tar Turkey with Couscous Salad,Zucchini-Aubergine-Paprika Sandwich with Bulgur-Chickpea Salad,Zula Special Chicken Salad,​Beef Kofte & Vermicelli-Rice ​,​​Japanese Bowl with Salmon Ceviche
cust_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0030N00002LQpa4QAD,4.1,2.769231,4.192308,3.846154,3.2,3.333333,3.429293,3.818182,3.714286,3.648352,...,5.0,2.0,4.125,3.518519,3.190476,2.833333,3.524648,3.181818,3.364103,4.578947
0030N00002LQpeqQAD,4.1,2.769231,4.192308,3.846154,3.2,3.333333,3.429293,3.818182,3.714286,3.648352,...,5.0,2.0,4.125,3.518519,3.190476,2.833333,3.524648,3.181818,3.364103,4.578947


In [8]:
## Taking subset of data to perform NMF on
X = np.round(table.values, 3)
X

array([[4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579],
       [4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579],
       [4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579],
       ...,
       [4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579],
       [4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579],
       [4.1  , 2.769, 4.192, ..., 3.182, 3.364, 4.579]])

In [9]:
## Creating NMF object
nmf= NMF(max_iter=100, n_components=3, solver='cd')

W = np.round(nmf.fit_transform(X), 5)  ## W matrix: has n hidden user topics
H = np.round(nmf.components_, 5)          ## H matrix: has n hidden meal topics

In [10]:
W

array([[1.2326 , 0.07823, 0.12481],
       [1.23794, 0.07399, 0.07448],
       [1.23683, 0.01153, 0.12368],
       ...,
       [1.2394 , 0.07113, 0.07353],
       [1.23847, 0.07098, 0.07396],
       [1.23941, 0.07911, 0.06609]])

In [11]:
H

array([[3.28617e+00, 2.21927e+00, 3.35680e+00, ..., 2.56153e+00,
        2.67403e+00, 3.67062e+00],
       [0.00000e+00, 2.97000e-03, 3.87410e-01, ..., 6.40000e-04,
        6.48880e-01, 0.00000e+00],
       [3.81880e-01, 2.48100e-01, 8.10300e-02, ..., 1.16520e-01,
        6.21900e-02, 4.17260e-01]])

In [12]:
preds = np.round(np.dot(W, H), 3)

## Explore hidden topics