In [9]:
import os
import sys 
import urllib.request

import tensorflow as tf

In [10]:
import pandas as pd

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [31]:
TEMP_DIR = '/tmp/tensorflow_tutorials'

def download_and_cache(url, fname=None, dest=TEMP_DIR):
    if not os.path.exists(dest):
        os.makedirs(dest)
    if fname is None:
        fname = url.split('/')[-1]
        print("Using fname:", fname)
    fpath = os.path.join(dest, fname)
    if not os.path.exists(fpath):
        def _progress(count, block_size, total_size):
            percentage = float(count * block_size) / float(total_size) * 100.0
            sys.stdout.write('\r>> Downloading {} {:1.1f}%'.format(fname, percentage))
            sys.stdout.flush()
        fpath, _ = urllib.request.urlretrieve(url, fpath, _progress)
        print()
        statinfo = os.stat(fpath)
        print('Successfully downloaded', fname, statinfo.st_size, 'bytes.')
    return fpath

In [34]:
def load_data():
    train_path = download_and_cache(TRAIN_URL)
    train = pd.read_csv(train_path, header=0, names=['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species'])
    train_y = train.pop('Species') 

In [26]:
def load_data(y_name='Species'):
    train_path, test_path = maybe_download()

    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop(y_name)

    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)

In [28]:
train_y

0      2
1      1
2      2
3      0
4      0
5      0
6      0
7      2
8      1
9      0
10     1
11     1
12     0
13     0
14     2
15     1
16     2
17     2
18     2
19     0
20     2
21     2
22     0
23     2
24     2
25     0
26     1
27     2
28     1
29     1
      ..
90     2
91     1
92     0
93     0
94     2
95     0
96     0
97     2
98     1
99     0
100    0
101    1
102    0
103    1
104    0
105    0
106    0
107    0
108    1
109    0
110    2
111    1
112    0
113    2
114    0
115    1
116    1
117    0
118    0
119    1
Name: Species, Length: 120, dtype: int64

In [29]:
CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]

def _parse_line(line):
    fields = tf.decode_csv(line, record_defaults=CSV_TYPES)

    features = dict(zip(CSV_COLUMN_NAMES, fields))

    label = features.pop('Species')

    return features, label


def csv_input_fn(csv_path, batch_size):
    dataset = tf.data.TextLineDataset(csv_path).skip(1)

    dataset = dataset.map(_parse_line)

    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset