In [1]:
## Initialization and imports
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt

## sklearn models and validation
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor

## Changing directory to top folder (All programs run from top)
import os
os.chdir('/Users/nschumacher/docs/galvanize/smunch-user-food-analysis')

## local run sql file
from db.python_db import connect, run_sql_query
from run2.models.user_class import User


%matplotlib inline  
%config InlineBackend.figure_format='retina'

## Set random seed
np.random.seed(seed=14)

In [2]:
conn = connect()

In [3]:
##List of top 20 ordering customers
top_20 = ['0030N00002LQq9ZQAT', '0030N00002LQqjPQAT', '0030N00002LQpX3QAL',
       '0030N00002LQpucQAD', '0030N00002LQqNIQA1', '0030N00002LQqAGQA1',
       '0030N00002LQp6XQAT', '0030N00002LQqcbQAD', '0030N00002LQq9lQAD',
       '0030N00002LQpubQAD', '0030N00002LQptWQAT', '0030N00002LQpO3QAL',
       '0030N00002LQq9gQAD', '0030N00002LQq9dQAD', '0030N00002LQq9cQAD',
       '0030N00002LQqvkQAD', '0030N00002LQpwoQAD', '0030N00002LQptYQAT',
       '0030N00002LQpN2QAL', '0030N00002LQptaQAD']
run_sql_query("SELECT DISTINCT(account_sfid_order) FROM bi.executed_order_employee WHERE contact_sfid=%s")

In [3]:
# ## Building user object dictionary
# users = {}
# for i in top_20:
#     u = User(i, conn)
#     u.build_table()
#     users[i] = u
#     print(i, "done.")

user_id = '0030N00002LQqB9QAL'
account_id = '0010N00004IaGG6QAN'
u = User(user_id, account_id, conn)
u.build_table()

               meal_id  meal_count  \
0   a050N00000zZg5KQAS           1   
1   a050N00000zZfzCQAS           1   
2   a050N00000zZfyUQAS           2   
3   a050N000010W5ezQAC           1   
4   a050N00000zZg8NQAS           2   
5   a050N00000zZg5EQAS           3   
6   a050N00000zZgH3QAK           3   
7   a050N00000zZg8AQAS           8   
8   a050N000010W5f9QAC           1   
9   a050N00000za4nqQAA           7   
10  a050N000010XhDBQA0           1   
11  a050N000010W5f8QAC           1   
12  a050N00000zZgFTQA0           1   
13  a050N00000zZg5LQAS           2   
14  a050N00000zbGGHQA2           1   
15  a050N00000zbGCDQA2           1   
16  a050N00000zZg63QAC           5   
17  a050N00000zZfz2QAC           1   
18  a050N00000zZfysQAC           2   
19  a050N00000zZg0HQAS           2   
20  a050N00000zZg0ZQAS           2   
21  a050N00000zZg44QAC           1   
22  a050N000010W5fIQAS           1   
23  a050N00000zZg0XQAS           3   
24  a050N00000zZg4XQAS           3   
25  a050N000

In [4]:
## Function gives list of ingredients user as has seen (remove never seen ingrds)
def seen_ingredients(user):
    rows, cols = user.X.shape
    keeps = []
    for i in range(cols):
        if (user.X[:,i] == 1).sum() != 0:
            keeps.append(i)
            
    return keeps

## Gets cross validated accuracy and AUC for different models
def cross_val(X_train, y_train, func):

    mse = -sum(cross_val_score(func, X_train, y_train, cv=4, scoring='neg_mean_squared_error'))/4
    func_name = str(func.__class__.__name__)
    print("{0:27} Train CV | Mean Square Error: {1:5.4}".format(func_name, mse))
    return mse

In [5]:
keeps = seen_ingredients(u)
X = u.X[:,keeps]
y = u.y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
rf_model = RandomForestRegressor(max_depth=30, n_estimators=20)
cross_val(X_train, y_train, rf_model)**.5

RandomForestRegressor       Train CV | Mean Square Error: 0.1316


0.3627079659020997

In [6]:
u.y

array([1.        , 1.        , 0.33333333, 0.25      , 0.66666667,
       0.6       , 0.6       , 0.88888889, 0.5       , 1.        ,
       0.33333333, 0.5       , 0.5       , 0.5       , 1.        ,
       0.33333333, 0.55555556, 0.25      , 1.        , 1.        ,
       0.28571429, 1.        , 1.        , 0.42857143, 0.5       ,
       0.5       , 0.28571429, 1.        , 0.5       , 0.25      ,
       1.        , 0.5       , 0.5       , 0.33333333, 0.14285714,
       0.5       , 0.25      , 0.66666667, 1.        , 0.5       ,
       0.09090909, 0.28571429, 0.33333333, 1.        , 1.        ,
       0.5       , 0.33333333, 0.5       , 0.6       , 1.        ,
       0.5       , 0.42857143, 0.28571429, 1.        , 1.        ,
       0.14285714, 1.        , 0.2       , 0.14285714, 1.        ,
       1.        , 0.14285714, 0.5       , 0.25      , 1.        ,
       0.3       , 0.4       , 1.        , 1.        , 0.28571429,
       1.        , 1.        , 1.        , 0.25      , 0.5    