In [1]:
#start

In [2]:
import re
from bigdl.util.common import * 
from bigdl.transform.vision.image import *
from bigdl.transform.vision import image
from pyspark.sql.functions import col, udf
from pyspark.sql.types import DoubleType, StringType
from bigdl.nn.layer import *
from bigdl.nn.criterion import *
from pyspark import SparkConf
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

from zoo.common.nncontext import *
from zoo.pipeline.nnframes.nn_classifier import *
from zoo.pipeline.nnframes.nn_image_reader import *
from zoo.pipeline.nnframes.nn_image_transformer import *

In [3]:
    sparkConf = SparkConf().setAppName("ImageTransferLearningExample")
    sc = get_nncontext(sparkConf)
    redire_spark_logs()

    model_path = '/Users/svermoli/sandbox/dogsvscats/model/bigdl_inception-v1_imagenet_0.4.0.model' 
    image_path = '/Users/svermoli/sandbox/dogsvscats/demo' + '/*/*'
    imageDF = NNImageReader.readImages(image_path, sc)

    #change code for house style here:
    
    getName = udf(lambda row: re.search(r'(cat|dog)\.([\d]*)\.jpg', row[0], re.IGNORECASE).group(0), StringType())
    getLabel = udf(lambda name: 1.0 if name.startswith('cat') else 2.0, DoubleType())
    labelDF = imageDF.withColumn("name", getName(col("image"))) \
        .withColumn("label", getLabel(col('name')))
    (trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1])

    # compose a pipeline that includes feature transform, pretrained model and Logistic Regression
    transformer = NNImageTransformer(
        image.Pipeline([Resize(256, 256), CenterCrop(224, 224), ChannelNormalize(123.0, 117.0, 104.0)])
    ).setInputCol("image").setOutputCol("features")

    preTrainedNNModel = NNModel(Model.loadModel(model_path), [3,224,224]).setPredictionCol("embedding")

    lrModel = Sequential().add(Linear(1000, 2)).add(LogSoftMax())
    classifier = NNClassifier(lrModel, ClassNLLCriterion(), [1000]) \
        .setLearningRate(0.003).setBatchSize(40).setMaxEpoch(20).setFeaturesCol("embedding")

    pipeline = Pipeline(stages=[transformer, preTrainedNNModel, classifier])

    catdogModel = pipeline.fit(trainingDF)
    predictionDF = catdogModel.transform(validationDF).cache()
    predictionDF.show()

    evaluator = MulticlassClassificationEvaluator(
        labelCol="label", predictionCol="prediction", metricName="accuracy")
    accuracy = evaluator.evaluate(predictionDF)
    # expected error should be less than 10%
    print("Test Error = %g " % (1.0 - accuracy))

creating: createResize
creating: createCenterCrop
creating: createChannelNormalize
creating: createPipeline
creating: createNNImageTransformer
creating: createNNModel
creating: createSequential
creating: createLinear
creating: createLogSoftMax
creating: createClassNLLCriterion
creating: createNNClassifier
+--------------------+------------+-----+--------------------+--------------------+----------+
|               image|        name|label|            features|           embedding|prediction|
+--------------------+------------+-----+--------------------+--------------------+----------+
|[file:/Users/sver...|cat.5006.jpg|  1.0|[file:/Users/sver...|[3.01570817100582...|       1.0|
|[file:/Users/sver...|cat.5008.jpg|  1.0|[file:/Users/sver...|[2.42088958657404...|       1.0|
|[file:/Users/sver...|cat.5016.jpg|  1.0|[file:/Users/sver...|[3.27992069060201...|       1.0|
|[file:/Users/sver...|cat.5017.jpg|  1.0|[file:/Users/sver...|[4.88881005367147...|       1.0|
|[file:/Users/sver...| cat.5

In [4]:
#end