## Deep Learning - Deep Vision Classifier

### Environment Setup -- reinstall horovod based on new version of pytorch

In [None]:
%pip install /dbfs/FileStore/shared_uploads/serenaruan@microsoft.com/synapseml_dl-0.9.5.dev1-py3-none-any.whl --force-reinstall --no-deps

In [None]:
! horovodrun --check-build

In [None]:
import os
import numpy as np
from PIL import Image
import sys

from pyspark.sql.functions import udf, col
from pyspark.sql.types import DoubleType, IntegerType

from pyspark.sql.functions import udf
import pyspark.sql.types as T
import numpy as np

from pyspark.ml.linalg import DenseVector, VectorUDT
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

import torchvision.transforms as transforms

### Read Dataset

In [None]:
def assign_label(path):
    num = int(path.split("/")[-1].split(".")[0].split("_")[1])
    return num // 81


assign_label_udf = udf(assign_label, IntegerType())

In [None]:
# These files are already uploaded for build test machine
train_df = spark.read.format("binaryFile")\
          .option("pathGlobFilter", "*.jpg")\
          .load("/tmp/17flowers/train")\
          .withColumn("image", col("path"))\
          .withColumn("label", assign_label_udf(col("path")))\
          .select("image", "label")

display(train_df.limit(100))

In [None]:
test_df = (
    spark.read.format("binaryFile")
    .option("pathGlobFilter", "*.jpg")
    .load("/tmp/17flowers/test")
    .withColumn("image", col("path"))
    .withColumn("label", assign_label_udf(col("path")))
    .select("image", "label")
)

### Training

In [None]:
from horovod.spark.common.store import DBFSLocalStore
from horovod.spark.common.backend import SparkBackend
from pytorch_lightning.callbacks import Callback, ModelCheckpoint
from synapse.ml.dl import *

run_output_dir = "/dbfs/FileStore/test/resnet50"
store = DBFSLocalStore(run_output_dir)
epochs = 10
callbacks = [ModelCheckpoint(filename="{epoch}-{train_loss:.2f}")]

In [None]:
deep_vision_classifier = DeepVisionClassifier(
    backbone="resnet50",
    store=store,
    callbacks=callbacks,
    num_classes=17,
    batch_size=16,
    epochs=epochs,
    validation=0.1,
)

deep_vision_model = deep_vision_classifier.fit(train_df).setOutputCols(["label_prob"])

### Prediction

In [None]:
pred_df = deep_vision_model.transform(test_df_trans)
evaluator = MulticlassClassificationEvaluator(
    predictionCol="label_pred", labelCol="label", metricName="accuracy"
)
print("Test accuracy:", evaluator.evaluate(pred_df))