In [2]:
import numpy as np
import os
import pandas as pd
import shutil
import tarfile
import time
import zipfile
import cv2
from PIL import Image
from io import BytesIO
from collections import namedtuple

try:
    from urllib.request import urlretrieve
except ImportError:
    from urllib import urlretrieve


import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim.nets import resnet_v2
from pyspark.sql.functions import col, pandas_udf, udf, array
from pyspark.sql.types import LongType, IntegerType, BinaryType, StringType, StructField, StructType, to_arrow_type, to_arrow_schema
tf.__version__

'1.14.0'

In [3]:
from pyspark.sql.functions import col, pandas_udf, PandasUDFType

In [4]:
spark.conf.set("spark.sql.execution.arrow.enabled", "true")
spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "6400") # Set a large batch size in practice.

In [5]:
# Internet URL for the tar-file with the Inception model.
# Note that this might change in the future and will need to be updated.
data_url = "http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz"
# Directory to store the downloaded data.
data_dir = "resnet/"

In [6]:
def maybe_download_and_extract(url, download_dir):
    filename = url.split('/')[-1]
    file_path = os.path.join(download_dir, filename)
    if not os.path.exists(file_path):
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)

        file_path, _ = urlretrieve(url=url, filename=file_path)

        print()
        print("Download finished. Extracting files.")

        if file_path.endswith(".zip"):
            # Unpack the zip-file.
            zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
        elif file_path.endswith((".tar.gz", ".tgz")):
            # Unpack the tar-ball.
            tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)

        print("Done.")
    else:
        print("Data has apparently already been downloaded and unpacked.")

In [7]:
maybe_download_and_extract(url=data_url, download_dir=data_dir)

Data has apparently already been downloaded and unpacked.


In [8]:
height = width = 224
channels = 3
num_classes = 1001

sess = tf.Session()

inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
  _logits, _endpoints = resnet_v2.resnet_v2_50(inputs, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE)


saver = tf.train.Saver()
saver.restore(sess, './resnet/resnet_v2_50.ckpt')

bc_model = sc.broadcast(sess.graph_def)
sess.close()



























Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./resnet/resnet_v2_50.ckpt


In [8]:
# %%sh
# # git clone https://github.com/tensorflow/models/
# cd models/research/slim/
# DATA_DIR=/tmp/data/flowers/databricks/python/bin/python download_and_convert_data.py \
#     --dataset_name=flowers \
#     --dataset_dir="${DATA_DIR}"
# cp -r /tmp/data/flowers /dbfs/tmp

In [9]:
# input_local_dir = "/temp/flowers"

In [10]:
# from pyspark.sql.types import *

# schema = StructType([StructField('image/class/label', IntegerType(), True),
#                      StructField('image/width', IntegerType(), True),
#                      StructField('image/height', IntegerType(), True),
#                      StructField('image/format', StringType(), True),
#                      StructField('image/encoded', BinaryType(), True)])

# df = spark.read.format("tfrecords").schema(schema).load(input_local_dir+'/flowers_train*.tfrecord')
# df = df.limit(3200)

Things I need to get S3 files from my local machine

In [9]:
import boto3
import configparser
config = configparser.ConfigParser()
config.read(os.path.expanduser("~/.aws/credentials"))
access_id = config.get('fohr_derrick', "aws_access_key_id") 
access_key = config.get('fohr_derrick', "aws_secret_access_key")
hadoop_conf=spark._jsc.hadoopConfiguration()
hadoop_conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem")
hadoop_conf.set("fs.s3a.awsAccessKeyId", access_id)
hadoop_conf.set("fs.s3a.awsSecretAccessKey", access_key)

In [10]:
IMAGES_PATH = 's3a://social-system-test/instagram_graph_image_store/1/*/'

In [11]:
df = spark.read.format("image").option("dropInvalid", True).load(IMAGES_PATH)

In [12]:
df.printSchema()

root
 |-- image: struct (nullable = true)
 |    |-- origin: string (nullable = true)
 |    |-- height: integer (nullable = true)
 |    |-- width: integer (nullable = true)
 |    |-- nChannels: integer (nullable = true)
 |    |-- mode: integer (nullable = true)
 |    |-- data: binary (nullable = true)



In [13]:
#From Databricks ImageIO source code:
_OcvType = namedtuple("OcvType", ["name", "ord", "nChannels", "dtype"])

_SUPPORTED_OCV_TYPES = (
    _OcvType(name="CV_8UC1", ord=0, nChannels=1, dtype="uint8"),
    _OcvType(name="CV_32FC1", ord=5, nChannels=1, dtype="float32"),
    _OcvType(name="CV_8UC3", ord=16, nChannels=3, dtype="uint8"),
    _OcvType(name="CV_32FC3", ord=21, nChannels=3, dtype="float32"),
    _OcvType(name="CV_8UC4", ord=24, nChannels=4, dtype="uint8"),
    _OcvType(name="CV_32FC4", ord=29, nChannels=4, dtype="float32"),
)

#  NOTE: likely to be migrated to Spark ImageSchema code in the near future.
_OCV_TYPES_BY_ORDINAL = {m.ord: m for m in _SUPPORTED_OCV_TYPES}


def imageTypeByOrdinal(ordinal):
    if not ordinal in _OCV_TYPES_BY_ORDINAL:
        raise KeyError("unsupported image type with ordinal %d, supported OpenCV types = %s" % (
            ordinal, str(_SUPPORTED_OCV_TYPES)))
    return _OCV_TYPES_BY_ORDINAL[ordinal]

In [24]:
def convert_imageStruct(image_df):
    shape = (image_df['height'], 
             image_df['width'],
             image_df['nChannels'])
    dtype = imageTypeByOrdinal(image_df['mode']).dtype
    image = np.ndarray(shape, dtype, image_df['data'])
#     return Image.fromarray(obj=image, mode='RGB')
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.image.central_crop(image, central_fraction=0.875)
    image = tf.expand_dims(image, 0)
    image = tf.image.resize_bilinear(image, [height, width],
                                       align_corners=False)
    image = tf.squeeze(image, [0])
    image = tf.subtract(image, 0.5)
    image = tf.multiply(image, 2.0)
    return image

In [15]:
def parse_example(image_data):
    
    shape = (image_df['height'], 
             image_df['width'],
             image_df['nChannels'])
    dtype = imageTypeByOrdinal(image_df['mode']).dtype
    image = np.ndarray(shape, dtype, image_df['data'])
#     image = Image.fromarray(obj=image, mode='RGB')
    
#     image = tf.image.decode_image(image_data, channels=channels)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.image.central_crop(image, central_fraction=0.875)
    image = tf.expand_dims(image, 0)
    image = tf.image.resize_bilinear(image, [height, width],
                                       align_corners=False)
    image = tf.squeeze(image, [0])
    image = tf.subtract(image, 0.5)
    image = tf.multiply(image, 2.0)
    return image

In [75]:
def predict_batch(image_batch):
    tf.reset_default_graph()
    sess = tf.Session()

    batch_size = 64
    
    # make this part a pandas data frame to iterate
    #changed from tf.string to tf.uint8
#     image_input = tf.placeholder(dtype=tf.uint8, shape=[None])
#     dataset = tf.data.Dataset(image_batch.values)
#     dataset = tf.data.Dataset.from_tensor_slices(image_input)

#     dataset = image_batch.map(parse_example)
#     dataset = dataset.map(parse_example, num_parallel_calls=16).prefetch(512).batch(batch_size)
#     iterator = dataset.iteritems()
#     iterator = dataset.make_initializable_iterator()
#     image = next(iterator)
#     image = iterator.get_next()

    imagebatch = image_batch.apply(convert_imageStruct, axis=1)
    
    graph_def = bc_model.value
    tf.graph_util.import_graph_def(graph_def)
  
    inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        logits, endpoints = resnet_v2.resnet_v2_50(inputs, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE)
  
    sess.run(tf.global_variables_initializer())
#     sess.run(iterator.initializer, feed_dict={image_input: image_batch})
  
    softmax_tensor = endpoints["predictions"]
    result = []
#     try:
#         while True:
#             batch = sess.run(image)
#             preds = sess.run(softmax_tensor, {'Placeholder_1:0': batch})
#             result = result + list(preds.reshape(-1, num_classes))
#     except tf.errors.OutOfRangeError:
#         pass
    
    #maybe try this?
#     output_layer = "final_result"
#     output_name = "import/" + output_layer
#     output_operation = graph_def.get_operation_by_name(output_name)
    image_ph = tf.placeholder(tf.float32, shape=[224,224,3], name='image_ph')
    for index, image in imagebatch.items():
        batch = sess.run(image)
        preds = sess.run(softmax_tensor, {image_ph: batch})
        result = result + list(preds.reshape(-1, num_classes))
        

    return pd.Series(result)

In [76]:
# dataset = tf.data.Dataset.from_tensor_slices(np.vstack(image_batch['new_image']))
# dataset = tf.data.Dataset(image_batch['new_image'])

In [77]:
images = predict_batch(image_batch)
# print(images)
# print(images.shape)





























InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,224,224,3]
	 [[node Placeholder (defined at <ipython-input-75-66f59040391e>:25) ]]

Original stack trace for 'Placeholder':
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 541, in execute_request
    user_expressions, allow_stdin,
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-77-652b8aee33a7>", line 1, in <module>
    images = predict_batch(image_batch)
  File "<ipython-input-75-66f59040391e>", line 25, in predict_batch
    inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2143, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6262, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()


In [55]:
image_batch = df.select('image.origin',
                        'image.height',
                        'image.width',
                        'image.mode',
                        'image.nChannels',
                        'image.data').limit(10).toPandas()#.loc[: , "data"]

In [18]:
target = image_batch.pop('origin')

In [25]:
image_batch['new_image'] = image_batch.apply(convert_imageStruct, axis=1)

In [193]:
# convert_batch_udf = udf(convert_imageStruct)
# new_images = df.select(convert_batch_udf).show()

In [20]:
#predict_batch_udf = pandas_udf(ArrayType(FloatType()), PandasUDFType.SCALAR)(predict_batch)
#predictions = df.select(predict_batch_udf(col("image.data")).alias("prediction"))

In [21]:
# predictions.write.mode("overwrite").save("/tmp/predictions")

This could solve color classification

In [22]:
shape = (df.select("image.height").take(1)[0][0],df.select("image.width").take(1)[0][0],df.select("image.nChannels").take(1)[0][0])
dtype = "uint8"
raw = df.select("image.data").take(1)[0][0]

In [23]:
testImage = np.ndarray(shape, dtype, raw)

In [24]:
bgr = testImage[..., ::-1]

In [28]:
bgr.shape

(1440, 1440, 3)

(1440, 1440, 3)

In [177]:
testImage2 = Image.fromarray(obj=testImage, mode='RGB')

In [233]:
testImage2.size

(1440, 1440)

(1440, 1440)

In [234]:
pilTry = BytesIO()
testImage2.save(pilTry, format='jpeg')

In [235]:
type(pilTry)

_io.BytesIO

_io.BytesIO

In [52]:
tf.reset_default_graph()
sess = tf.Session()
graph_def = bc_model.value
tf.graph_util.import_graph_def(graph_def)
  
inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
    logits, endpoints = resnet_v2.resnet_v2_50(inputs, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE)

sess.run(tf.global_variables_initializer())






























In [54]:
image_batch["new_image"].iloc[1].eval(session=sess)

ValueError: Cannot use the given session to evaluate tensor: the tensor's graph is different from the session's graph.

In [53]:
sess.run(image_batch["new_image"].iloc[1])

ValueError: Fetch argument <tf.Tensor 'Mul_1:0' shape=(224, 224, 3) dtype=float32> cannot be interpreted as a Tensor. (Tensor Tensor("Mul_1:0", shape=(224, 224, 3), dtype=float32) is not an element of this graph.)

In [61]:
imageTypeByOrdinal(image_batch["mode"].iloc[1]).dtype

'uint8'

'uint8'

In [197]:
byte_im = parse_example(image_batch['new_image'].values[1])

In [43]:
batch = sess.run(image_batch['new_image'].values[1])

RuntimeError: Attempted to use a closed Session.

In [146]:
softmax_tensor = endpoints["predictions"]
preds = sess.run(softmax_tensor,)

InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,224,224,3]
	 [[node Placeholder (defined at <ipython-input-17-e10d8189e3eb>:6) ]]

Original stack trace for 'Placeholder':
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 541, in execute_request
    user_expressions, allow_stdin,
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-e10d8189e3eb>", line 6, in <module>
    inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2143, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6262, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()


InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,224,224,3]
	 [[node Placeholder (defined at <ipython-input-17-e10d8189e3eb>:6) ]]

Original stack trace for 'Placeholder':
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 541, in execute_request
    user_expressions, allow_stdin,
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-e10d8189e3eb>", line 6, in <module>
    inputs = tf.placeholder(tf.float32, shape=[None, height, width, channels])
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py", line 2143, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 6262, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/Users/derricklewis/anaconda3/envs/pipeline/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()


In [None]:
bytetest = df.select('image.data').limit(2).toPandas().loc[: , "data"]

In [None]:
bytetest[0]

In [None]:
parse_example(test.values[0].data)

In [None]:
predict_batch_udf = pandas_udf(ArrayType(FloatType()), PandasUDFType.SCALAR)(predict_batch)
predictions = df.select(predict_batch_udf(col("image/encoded")).alias("prediction"))