<a href="https://colab.research.google.com/github/spatialthoughts/projects/blob/master/tf_classification_local.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Neural Net Classification with Tensorflow and Keras

Adapted from https://developers.google.com/earth-engine/guides/tf_examples#multi-class-prediction-with-a-dnn

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
import tensorflow as tf
from tensorflow import keras

from pprint import pprint


In [None]:
!pip install -U earthengine-api --no-deps


In [None]:
import ee
# Replace the cloud_project with your own project
cloud_project = 'spatialthoughts'

try:
    ee.Initialize(project=cloud_project)
except:
    ee.Authenticate()
    ee.Initialize(project=cloud_project)

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&code_challenge=BQ6GuAv-MSd7JH5YXeMxgMDLOTSS261u5YhqYfdjsew&code_challenge_method=S256

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/1AY0e-g6TR0ypUNSaHS9CkoGME3RZYm3tJeR1Aadf788T5YBEHTaxGB3GUIQ

Successfully saved authorization token.


In [None]:
PROJECT = 'deep-learning-287813'
REGION = 'us-central1'

FEATURE_NAMES = [ 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'landcover']
BANDS = [ 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

LABEL = 'landcover'
N_CLASSES = 4
TRAIN_FILE_PATH = 'gs://earthengine-tf/arkavathy_training.tfrecord.gz'
TEST_FILE_PATH = 'gs://earthengine-tf/arkavathy_testing.tfrecord.gz'
MODEL_DIR = 'gs://earthengine-tf/arkavathy_model'
EEIFIED_DIR = 'gs://earthengine-tf/arkavathy_eeified_model'
MODEL_NAME = 'arkavathy_tf_model4'
VERSION_NAME = 'v1'

In [None]:

# Create a dataset from the TFRecord file in Cloud Storage.
train_dataset = tf.data.TFRecordDataset(TRAIN_FILE_PATH, compression_type='GZIP')
# Print the first record to check.
print(iter(train_dataset).next())

tf.Tensor(b'\n\x86\x02\n*\n\x0csystem:index\x12\x1a\n\x18\n\x16000000000000000000af_0\n\x15\n\tlandcover\x12\x08\x12\x06\n\x04\x00\x00\x00\x00\n\x0f\n\x03B11\x12\x08\x12\x06\n\x04=\x9b\x05?\n\x0f\n\x03B12\x12\x08\x12\x06\n\x042w\r?\n\x0f\n\x03B8A\x12\x08\x12\x06\n\x04a2\xb5>\n\x0e\n\x02B1\x12\x08\x12\x06\n\x04\xc9\xe5?>\n\x0e\n\x02B2\x12\x08\x12\x06\n\x04\xf6(\x9c>\n\x0e\n\x02B3\x12\x08\x12\x06\n\x04z\xa5\xac>\n\x0e\n\x02B4\x12\x08\x12\x06\n\x04\xc8\x98\xbb>\n\x0e\n\x02B5\x12\x08\x12\x06\n\x04U\xc1\xc8>\n\x0e\n\x02B6\x12\x08\x12\x06\n\x04%u\xc2>\n\x0e\n\x02B7\x12\x08\x12\x06\n\x04\xbf}\xbd>\n\x0e\n\x02B8\x12\x08\x12\x06\n\x04\xd74\xaf>\n\x0e\n\x02B9\x12\x08\x12\x06\n\x04ff\xa6>', shape=(), dtype=string)


In [None]:

# List of fixed-length features, all of which are float32.
columns = [
  tf.io.FixedLenFeature(shape=[1], dtype=tf.float32) for k in FEATURE_NAMES
]

# Dictionary with names as keys, features as values.
features_dict = dict(zip(FEATURE_NAMES, columns))

pprint(features_dict)

{'B1': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B11': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B12': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B2': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B3': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B4': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B5': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B6': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B7': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B8': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B8A': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'B9': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None),
 'landcover': FixedLenFeature(shape=[1], dtype=tf.float32, default_value=None)}


In [None]:
def parse_tfrecord(example_proto):
  """The parsing function.

  Read a serialized example into the structure defined by featuresDict.

  Args:
    example_proto: a serialized Example.

  Returns:
    A tuple of the predictors dictionary and the label, cast to an `int32`.
  """
  parsed_features = tf.io.parse_single_example(example_proto, features_dict)
  labels = parsed_features.pop(LABEL)
  return parsed_features, tf.cast(labels, tf.int32)

# Map the function over the dataset.
parsed_dataset = train_dataset.map(parse_tfrecord, num_parallel_calls=5)

# Print the first parsed record to check.
pprint(iter(parsed_dataset).next())

({'B1': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.1874], dtype=float32)>,
  'B11': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.5219], dtype=float32)>,
  'B12': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.5526], dtype=float32)>,
  'B2': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.305], dtype=float32)>,
  'B3': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3372], dtype=float32)>,
  'B4': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3664], dtype=float32)>,
  'B5': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3921], dtype=float32)>,
  'B6': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3798], dtype=float32)>,
  'B7': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3701], dtype=float32)>,
  'B8': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3422], dtype=float32)>,
  'B8A': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.3539], dtype=float32)>,
  'B9': <tf.Tensor: shape=(1,), dtype=float32, numpy

In [None]:
# Keras requires inputs as a tuple.  Note that the inputs must be in the
# right shape.  Also note that to use the categorical_crossentropy loss,
# the label needs to be turned into a one-hot vector.
def to_tuple(inputs, label):
  return (tf.transpose(list(inputs.values())),
          tf.one_hot(indices=label, depth=N_CLASSES))

# Map the to_tuple function, shuffle and batch.
input_dataset = parsed_dataset.map(to_tuple).shuffle(128).batch(8)
pprint(iter(input_dataset).next())
# Define the layers in the model.
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(64, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(N_CLASSES, activation=tf.nn.softmax)
])

# Compile the model with the specified loss function.
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Fit the model to the training data.
model.fit(x=input_dataset, epochs=50)

(<tf.Tensor: shape=(8, 1, 12), dtype=float32, numpy=
array([[[0.0768 , 0.19875, 0.1872 , 0.0947 , 0.1083 , 0.11695, 0.1479 ,
         0.1519 , 0.1585 , 0.1392 , 0.16135, 0.2015 ]],

       [[0.0768 , 0.1829 , 0.16735, 0.0952 , 0.107  , 0.1183 , 0.13155,
         0.14795, 0.15745, 0.1392 , 0.1601 , 0.2015 ]],

       [[0.0814 , 0.4616 , 0.38165, 0.1101 , 0.1626 , 0.2227 , 0.254  ,
         0.28105, 0.30665, 0.32465, 0.3327 , 0.32335]],

       [[0.09145, 0.5072 , 0.427  , 0.161  , 0.2251 , 0.2937 , 0.325  ,
         0.34165, 0.35945, 0.36825, 0.37225, 0.3373 ]],

       [[0.0694 , 0.3943 , 0.3396 , 0.11   , 0.1606 , 0.2288 , 0.2405 ,
         0.2718 , 0.2882 , 0.3128 , 0.3065 , 0.2868 ]],

       [[0.2121 , 0.392  , 0.4282 , 0.2712 , 0.2762 , 0.2486 , 0.2527 ,
         0.2478 , 0.2336 , 0.2345 , 0.2362 , 0.2942 ]],

       [[0.05175, 0.40585, 0.3452 , 0.0838 , 0.1311 , 0.201  , 0.22905,
         0.24645, 0.26455, 0.2735 , 0.2845 , 0.27645]],

       [[0.0553 , 0.4174 , 0.3491 , 0.079  ,

<tensorflow.python.keras.callbacks.History at 0x7fd577f33110>

In [None]:
test_dataset = (
  tf.data.TFRecordDataset(TEST_FILE_PATH, compression_type='GZIP')
    .map(parse_tfrecord, num_parallel_calls=5)
    .map(to_tuple)
    .batch(1))

model.evaluate(test_dataset)



[0.26062050461769104, 0.9269663095474243]

In [None]:
model.save(MODEL_DIR, save_format='tf')

INFO:tensorflow:Assets written to: gs://earthengine-tf/arkavathy_model/assets


In [None]:
# Get a list of all the files in the output bucket.
files_list = !gsutil ls 'gs://earthengine-tf'
# Get only the files generated by the image export.
exported_files_list = [s for s in files_list if 'arkavathy_image' in s]

# Get the list of image files and the JSON mixer file.
image_files_list = []
json_file = None
for f in exported_files_list:
  if f.endswith('.tfrecord.gz'):
    image_files_list.append(f)
  elif f.endswith('.json'):
    json_file = f

# Make sure the files are in the right order.
image_files_list.sort()

pprint(image_files_list)
print(json_file)


['gs://earthengine-tf/arkavathy_image00000.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00001.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00002.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00003.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00004.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00005.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00006.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00007.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00008.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00009.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00010.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00011.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00012.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00013.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00014.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00015.tfrecord.gz',
 'gs://earthengine-tf/arkavathy_image00016.tfrecord.gz',
 'gs://earthengine-tf/arkavathy

In [None]:
import json

# Load the contents of the mixer file to a JSON object.
json_text = !gsutil cat {json_file}
# Get a single string w/ newlines from the IPython.utils.text.SList
mixer = json.loads(json_text.nlstr)
pprint(mixer)

{'patchDimensions': [256, 256],
 'patchesPerRow': 21,
 'projection': {'affine': {'doubleMatrix': [8.983152841195215e-05,
                                            0.0,
                                            77.20409012614085,
                                            0.0,
                                            -8.983152841195215e-05,
                                            13.383370597397867]},
                'crs': 'EPSG:4326'},
 'totalPatches': 1008}


In [None]:
# Get relevant info from the JSON mixer file.
patch_width = mixer['patchDimensions'][0]
patch_height = mixer['patchDimensions'][1]
patches = mixer['totalPatches']
patch_dimensions_flat = [patch_width * patch_height, 1]

# Note that the tensors are in the shape of a patch, one patch for each band.
image_columns = [
  tf.io.FixedLenFeature(shape=patch_dimensions_flat, dtype=tf.float32) 
    for k in BANDS
]

# Parsing dictionary.
image_features_dict = dict(zip(BANDS, image_columns))

# Note that you can make one dataset from many files by specifying a list.
image_dataset = tf.data.TFRecordDataset(image_files_list, compression_type='GZIP')

# Parsing function.
def parse_image(example_proto):
  return tf.io.parse_single_example(example_proto, image_features_dict)

# Parse the data into tensors, one long tensor per patch.
image_dataset = image_dataset.map(parse_image, num_parallel_calls=5)

# Break our long tensors into many little ones.
image_dataset = image_dataset.flat_map(
  lambda features: tf.data.Dataset.from_tensor_slices(features)
)


# Turn the dictionary in each record into a tuple without a label.
image_dataset = image_dataset.map(
  lambda data_dict: (tf.transpose(list(data_dict.values())), )
)

# Turn each patch into a batch.
image_dataset = image_dataset.batch(patch_width * patch_height)

In [None]:
# Run prediction in batches, with as many steps as there are patches.
predictions = model.predict(image_dataset, steps=patches, verbose=1)

# Note that the predictions come as a numpy array.  Check the first one.
print(predictions[0])

[[7.3796171e-03 5.0105405e-04 9.7446072e-01 1.7658679e-02]]


In [None]:
OUTPUT_IMAGE_FILE = 'gs://earthengine-tf/arkavathy_classified_full.TFRecord'

# Instantiate the writer.
writer = tf.io.TFRecordWriter(OUTPUT_IMAGE_FILE)

# Every patch-worth of predictions we'll dump an example into the output
# file with a single feature that holds our predictions. Since our predictions
# are already in the order of the exported data, the patches we create here
# will also be in the right order.
patch = [[]]
cur_patch = 1
for prediction in predictions:
  patch[0].append(tf.argmax(prediction, 1))

  # Once we've seen a patches-worth of class_ids...
  if (len(patch[0]) == patch_width * patch_height):
    print('Done with patch ' + str(cur_patch) + ' of ' + str(patches) + '...')
    # Create an example
    example = tf.train.Example(
      features=tf.train.Features(
        feature={
          'prediction': tf.train.Feature(
              int64_list=tf.train.Int64List(
                  value=patch[0]))
        }
      )
    )
    # Write the example to the file and clear our patch array so it's ready for
    # another batch of class ids
    writer.write(example.SerializeToString())
    patch = [[]]
    cur_patch += 1

writer.close()

Done with patch 1 of 1008...
Done with patch 2 of 1008...
Done with patch 3 of 1008...
Done with patch 4 of 1008...
Done with patch 5 of 1008...
Done with patch 6 of 1008...
Done with patch 7 of 1008...
Done with patch 8 of 1008...
Done with patch 9 of 1008...
Done with patch 10 of 1008...
Done with patch 11 of 1008...
Done with patch 12 of 1008...
Done with patch 13 of 1008...
Done with patch 14 of 1008...
Done with patch 15 of 1008...
Done with patch 16 of 1008...
Done with patch 17 of 1008...
Done with patch 18 of 1008...
Done with patch 19 of 1008...
Done with patch 20 of 1008...
Done with patch 21 of 1008...
Done with patch 22 of 1008...
Done with patch 23 of 1008...
Done with patch 24 of 1008...
Done with patch 25 of 1008...
Done with patch 26 of 1008...
Done with patch 27 of 1008...
Done with patch 28 of 1008...
Done with patch 29 of 1008...
Done with patch 30 of 1008...
Done with patch 31 of 1008...
Done with patch 32 of 1008...
Done with patch 33 of 1008...
Done with patch 34 

In [None]:
OUTPUT_ASSET_ID = 'users/ujavalgandhi/temp/arkavathy_classified_tf_full'
!earthengine upload image --asset_id={OUTPUT_ASSET_ID} --pyramiding_policy=mode {OUTPUT_IMAGE_FILE} {json_file}



Instructions for updating:
non-resource variables are not supported in the long term
Started upload task with ID: PXQ75OZQMVPM3WMZGOZ5S3LW
