In [0]:
# Install libraries - you might need to restart the runtime after doing this
!pip install turicreate==5.6
# The wrong version of mxnet will be installed
!pip uninstall -y mxnet
# Install CUDA10-compatible version of mxnet
!pip install mxnet-cu100
# install Skafos python sdk
!pip install skafos

In [0]:
# Import necessary libraries
import os
import urllib

import coremltools 
import turicreate as tc
import pandas as pd
import numpy as np
from s3fs.core import S3FileSystem

In [0]:
# Set AWS environment variables. There is a better way to do this using Google Drive. 
# Deleting for now so private keys aren't in the notebook. 

In [0]:
# Get latest version of data and json from S3 bucket
# For now, this is so I can put together a model to go with the existing data
# Logic needs to be in place to find latest directory--this hard coding will go away

#Pull in boot images from S3 
_local_dir = 'boot_images'

if not os.path.exists(_local_dir):
    os.makedirs(_local_dir)

# Connect to S3
s3 = S3FileSystem(anon=False)

# Bring in boot data
boots = s3.ls("s3://skafos.bootfinder/20190703202949/boot_images/")
        
# Loop over the image paths, adding them to the SFrames
for b in boots: 
    _local_file = "/".join(b.split("/")[-1:])
    _local_path = _local_dir + "/" + _local_file
    s3.get("s3://" + b, _local_path) # download the image
    #print("Downloaded:" + str(b))

In [0]:
# Read in metadata and convert to SFrame
json_path = "s3://skafos.bootfinder/20190703202949/boots_meta_data.json"
df = pd.read_json(json_path, orient="index")
df["image_id"] =  df.index
meta_sf = tc.SFrame(df)

In [0]:
# Read in boot data into SFrame and join with metadata
boot_data  = tc.image_analysis.load_images('boot_images')
boot_data["image_id"] = boot_data['path'].apply(lambda x: x.split("/")[-2:][1])
boot_data = boot_data.join(meta_sf, on=["image_id"], how="left")
boot_data = boot_data.add_row_number()

In [0]:
# Build image similarity model using SqueezeNet, as it is smaller than Resnet
model = tc.image_similarity.create(boot_data, model="squeezenet_v1.1")

In [0]:
# Specify the CoreML model name and export
model_name = 'ImageSimilarity'
coreml_model_name = model_name + '.mlmodel'

# Export the trained model to CoreML format
res = model.export_coreml(coreml_model_name)