In [8]:
#start

In [9]:
import re
from bigdl.util.common import *
from bigdl.transform.vision.image import *
from bigdl.transform.vision import image
from pyspark.sql.functions import col, udf
from pyspark.sql.types import DoubleType, StringType
from bigdl.nn.layer import *
from bigdl.nn.criterion import *
from pyspark import SparkConf
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

from zoo.common.nncontext import *
from zoo.pipeline.nnframes.nn_classifier import *
from zoo.pipeline.nnframes.nn_image_reader import *
from zoo.pipeline.nnframes.nn_image_transformer import *

In [10]:
    sparkConf = SparkConf().setAppName("ImageTransferLearningExample")
    sc = get_nncontext(sparkConf)
    #redire_spark_logs()

    model_path = '/Users/svermoli/sandbox/dogsvscats/model/bigdl_inception-v1_imagenet_0.4.0.model' 
    image_path = '/Users/svermoli/sandbox/dogsvscats/demo' + '/*/*'
    imageDF = NNImageReader.readImages(image_path, sc).repartition(12).cache()
    
    print("Partition number: ", imageDF.rdd.getNumPartitions())
    print("Image number", imageDF.count())

    #change code for house style here:
    
    getName = udf(lambda row: re.search(r'(cat|dog)\.([\d]*)\.jpg', row[0], re.IGNORECASE).group(0), StringType())
    getLabel = udf(lambda name: 1.0 if name.startswith('cat') else 2.0, DoubleType())
    labelDF = imageDF.withColumn("name", getName(col("image"))) \
        .withColumn("label", getLabel(col('name')))
    (trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1])

    # compose a pipeline that includes feature transform, pretrained model and Logistic Regression
    transformer = NNImageTransformer(
        image.Pipeline([Resize(256, 256), CenterCrop(224, 224), ChannelNormalize(123.0, 117.0, 104.0)])
    ).setInputCol("image").setOutputCol("features")

    preTrainedNNModel = NNModel(Model.loadModel(model_path), [3,224,224]).setPredictionCol("embedding")
    
    print("Loaded pretrained model")
    
    lrModel = Sequential().add(Linear(1000, 2)).add(LogSoftMax())
    classifier = NNClassifier(lrModel, ClassNLLCriterion(), [1000]) \
        .setLearningRate(0.003).setBatchSize(40).setMaxEpoch(20).setFeaturesCol("embedding")

    print ("stage 1")    
    pipeline = Pipeline(stages=[transformer, preTrainedNNModel, classifier])
    print ("start training")
    catdogModel = pipeline.fit(trainingDF)
    print ("end training")
    predictionDF = catdogModel.transform(validationDF).cache()
    predictionDF.show()

    
    evaluator = MulticlassClassificationEvaluator(
        labelCol="label", predictionCol="prediction", metricName="accuracy")
    accuracy = evaluator.evaluate(predictionDF)
    # expected error should be less than 10%
    print("Test Error = %g " % (1.0 - accuracy))

('Partition number: ', 12)
('Image number', 2222)
creating: createResize
creating: createCenterCrop
creating: createChannelNormalize
creating: createPipeline
creating: createNNImageTransformer
creating: createNNModel
Loaded pretrained model
creating: createSequential
creating: createLinear
creating: createLogSoftMax
creating: createClassNLLCriterion
creating: createNNClassifier
stage 1
start training
end training
+--------------------+------------+-----+--------------------+--------------------+----------+
|               image|        name|label|            features|           embedding|prediction|
+--------------------+------------+-----+--------------------+--------------------+----------+
|[file:/Users/sver...|cat.5116.jpg|  1.0|[file:/Users/sver...|[2.18015699715579...|       1.0|
|[file:/Users/sver...|cat.5170.jpg|  1.0|[file:/Users/sver...|[5.75808542180311...|       2.0|
|[file:/Users/sver...| cat.529.jpg|  1.0|[file:/Users/sver...|[3.48281209880951...|       2.0|
|[file:/Users

In [11]:
#end