In [1]:
import numpy as np
from pandas import read_csv
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from absl import flags

learning_rate = 0.000000001

dataset = "epi_r.csv"
datapath = "documents/ml_tf/final_project"

In [20]:
def download_and_clean_file(dataset, datapath):
    # set working directory and download csv file; save as data
    os.chdir(datapath)
    data = read_csv(dataset)

    # clean and order file
    data.dropna(inplace=True) # remove NAs
    data.reset_index(inplace=True) # resetting index
    data.drop('index', axis='columns', inplace=True)

    return(data)

In [27]:
def input_columns(dataset,datapath):
    data = download_and_clean_file(dataset,datapath)
    
    # Preparing columns
    data.head(1)              # reads the first line
    rows = len(data)          # counts the number of rows in the file
    shape = data.shape        # shows the shape
    
    
    features = []
    label = []

    rating = data['rating'] # label
    
    calories = data['calories']
    protein = data['protein']
    fat = data['fat']
    sodium = data['sodium']

    dessert = data['dessert'] # possible label #2
    peanut_free = data['peanut free']
    soy_free = data['soy free']
    tree_nut_free = data['tree nut free']
    vegetarian = data['vegetarian']
    gourmet = data['gourmet']
    kosher = data['kosher']
    pescatarian = data['pescatarian']
    quick_easy = data['quick & easy']
    wheat_gluten_free = data['wheat/gluten-free']
    bake = data['bake']
    summer = data['summer']
    dairy_free = data['dairy free']
    side = data['side']
    no_sugar_added = data['no sugar added']
    winter = data['winter']
    fall = data['fall']
    dinner = data['dinner']
    sugar_conscious = data['sugar conscious']
    healthy = data['healthy']
    kidney_friendly = data['kidney friendly']
    onion = data['onion']
    tomato = data['tomato']
    vegetable = data['vegetable']
    milk_cream = data['milk/cream']
    fruit = data['fruit']
    vegan = data['vegan']
    kid_friendly = data['kid-friendly']
    egg = data['egg']
    spring = data['spring']
    herb = data['herb']
    garlic = data['garlic']
    salad = data['salad']
    dairy = data['dairy']
    thanksgiving = data['thanksgiving']
    appetizer = data['appetizer']
    lunch = data['lunch']
    cheese = data['cheese']
    chicken = data['chicken']
    roast = data['roast']
    no_cook = data['no-cook']
    soup_stew = data['soup/stew']
    cocktail_party = data['cocktail party']
    ginger = data['ginger']
    potato = data['potato']
    chill = data['chill']
    grill_barbecue = data['grill/barbecue']
    lemon = data['lemon']
    drink = data['drink']
    sauce = data['sauce']
    low_cal = data['low cal']
    christm as = data['christmas']
    high_fiber = data['high fiber']
    food_processor = data['food processor']

    for k in range(rows): # use loop to put it in the expected format
        # appending features
        features.append([calories[k],protein[k], fat[k], sodium[k], peanut_free[k],soy_free[k],tree_nut_free[k], 
        vegetarian[k],gourmet[k], kosher[k], pescatarian[k], quick_easy[k], wheat_gluten_free[k], bake[k], summer[k], 
        dessert[k], dairy_free[k],side[k], no_sugar_added[k], winter[k], fall[k], dinner[k], sugar_conscious[k], 
        healthy[k], kidney_friendly[k], onion[k], tomato[k], vegetable[k], milk_cream[k], fruit[k], vegan[k], 
        kid_friendly[k],egg[k], spring[k], herb[k], garlic[k], salad[k], dairy[k], thanksgiving[k], appetizer[k], lunch[k],
        cheese[k], chicken[k], roast[k], no_cook[k], soup_stew[k], cocktail_party[k], ginger[k], potato[k],
        chill[k], grill_barbecue[k], lemon[k], drink[k], sauce[k], low_cal[k], christmas[k], high_fiber[k], food_processor[k]])
        
        # creating classes for labels into 5 buckets, i.e less than 1 star rating is class 0, between 1 & 2 star rating is class 1
        if rating[k] <= 1:
            label.append(0)
        elif rating[k]<=2:
            label.append(1)
        elif rating[k]<=3:
            label.append(2)
        elif rating[k]<=4:
            label.append(3)
        else: label.append(4)

    return np.array(label), np.array(features)

In [569]:
def split_data():
# splitting data 70% train 30% test
    label, features = input_columns()
    train_len = int(len(features) * 0.7)
    train_label, train_data = label[:train_len], features[:train_len]
    test_label, test_data = label[train_len:], features[train_len:]
    return train_label, train_data, test_label, test_data

In [584]:
def build_estimator(model_type, model_dir):
# Build 3 layer DNN with 100, 75, 50, 25 units respectively.
    hidden_units = [100, 75, 50, 25]
    
    feature_columns = [tf.feature_column.numeric_column("x",shape=[58])]
    deep_columns = [tf.feature_column.numeric_column("deep",shape=[4])]
    wide_columns = [tf.feature_column.numeric_column("wide",shape=[54])]
    
    if model_type == 'wide':
        return tf.estimator.LinearClassifier(feature_columns=feature_columns,
                                      n_classes=5,
                                      model_dir=model_dir)
        print("wide_model")
    elif model_type == 'deep':
        return tf.estimator.DNNClassifier(feature_columns=feature_columns,
                                      n_classes=5,
                                      hidden_units = hidden_units,
                                      model_dir=model_dir)
        print("deep_model")
    else:
        return tf.estimator.DNNLinearCombinedClassifier(
                                          n_classes=5,
                                          linear_feature_columns=wide_columns,
                                          dnn_feature_columns=deep_columns,
                                          dnn_hidden_units = hidden_units,
                                          model_dir=model_dir)
        print("wide+deep_model")
    

In [586]:
classifier = build_estimator("deep", "test")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'test', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12ee0b9b0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [593]:
train_input_fn = model_input("deep")
train_input_fn


<function tensorflow.python.estimator.inputs.numpy_io.numpy_input_fn.<locals>.input_fn>

In [594]:
classifier.train(input_fn=train_input_fn, steps=2000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into test/model.ckpt.
INFO:tensorflow:loss = 10730.1, step = 1
INFO:tensorflow:global_step/sec: 484.844
INFO:tensorflow:loss = 262.146, step = 101 (0.208 sec)
INFO:tensorflow:global_step/sec: 676.521
INFO:tensorflow:loss = 173.012, step = 201 (0.145 sec)
INFO:tensorflow:global_step/sec: 715.902
INFO:tensorflow:loss = 135.98, step = 301 (0.140 sec)
INFO:tensorflow:global_step/sec: 713.201
INFO:tensorflow:loss = 156.776, step = 401 (0.140 sec)
INFO:tensorflow:global_step/sec: 728.778
INFO:tensorflow:loss = 139.107, step = 501 (0.136 sec)
INFO:tensorflow:global_step/sec: 720.509
INFO:tensorflow:loss = 126.317, step = 601 (0.139 sec)
INFO:tensorflow:global_step/sec: 731.704
INFO:tensorflow:loss = 145.465, step = 701 

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x12ee0b390>

In [596]:
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
print(accuracy_score)
print("\nTest Accuracy: {0:f}\n".format(accuracy_score),"model_type: ", model_type)

INFO:tensorflow:Calling model_fn.


ValueError: Feature x is not in features dictionary.

In [572]:
def model_input(model_type):
    if model_type == 'wide' or model_type == 'deep':
        return tf.estimator.inputs.numpy_input_fn(
        x={"x": np.array(train_data)},
        y=np.array(train_label),
        num_epochs=None,
        shuffle=True)
    
    else:
    # Define the training inputs
        return tf.estimator.inputs.numpy_input_fn(
          x={"wide": np.array(train_data[:,0:4]), "deep": np.array(train_data[:,4:])},
          y=np.array(train_label),
        num_epochs=None,
        shuffle=True)

In [571]:
def train_model(model_type, model_dir):
    classifier = build_estimator(model_type, model_dir)
    train_input_fn = model_input(model_type)
    classifier.train(input_fn=train_input_fn, steps=2000)

In [581]:
def test_model_accuracy(model_type, model_dir):
    accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]
    print(accuracy_score)
    print("\nTest Accuracy: {0:f}\n".format(accuracy_score),"model_type: ", model_type)

In [583]:
# test_model_accuracy(model_type = "wide", "/tmp/fv_a")
test_model_accuracy(model_type = "wide", model_dir = "/tmp/fv_g")
# test_model_accuracy(model_type = "wide+deep", model_dir = "/tmp/fv_e")

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-25-07:57:00
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/test_model15/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-25-07:57:29
INFO:tensorflow:Saving dict for global step 2000: accuracy = 0.537185, average_loss = 1.25748, global_step = 2000, loss = 160.957
0.537185

Test Accuracy: 0.537185
 model_type:  wide


In [543]:
# classify new samples
new_samples = train_data[10:15]

predict_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": new_samples},
  num_epochs=1,
  shuffle=False)

predictions = list(classifier.predict(input_fn=predict_input_fn))
predicted_classes = [p["classes"] for p in predictions]

print("New Samples, Class Predictions:    {}\n"
      .format(predicted_classes))

INFO:tensorflow:Calling model_fn.


ValueError: Feature deep is not in features dictionary.

In [19]:
os.getcwd()

'/Users/pyu18/Documents/ML_TF/Final_Project'