# Code to ensure data type consistency of image chips between model build and model deployment workflows 

## Import Libraries

In [1]:
!pip install fastai==1.0.61



In [2]:
import os
import shutil
from pathlib import Path

import boto3
import numpy as np
import pandas as pd

import rasterio

from fastai import *
from fastai.vision import *
from fastai.widgets import ClassConfusion

## Model Build

### Download .tar File of Chips From S3 Bucket

In [3]:
AWS_SOURCE_PATH = 'L8-TIR-macro-localization-model-build3'
TARGET_PATH = '/scratch/ALD_L8_TIR_chips_v4p1_train3'
IMG_DIR = 'ALD_L8_TIR_steel_chips_v4p1_2020_train3'

!mkdir -p {TARGET_PATH}

In [4]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('sfi-shared-assets')

bucket.download_file(str(Path(AWS_SOURCE_PATH, IMG_DIR+'.tar')), 
                     str(Path(TARGET_PATH, IMG_DIR+'.tar')))

In [5]:
!cd {TARGET_PATH} && tar xf {str(Path(IMG_DIR+'.tar'))}

### Select Sample GeoTIFF, Open with Rasterio, Check Type

In [6]:
tif_list = os.listdir(Path(TARGET_PATH, IMG_DIR))
tif_file = tif_list[0]
tif_file

'CHN0104_steel_v4p1_2020_L8_TIR.tif'

In [7]:
infile = rasterio.open(Path(TARGET_PATH, IMG_DIR, tif_file))
raster = infile.read()
print('GeoTIFF')
print('-------')
print('Numpy object: ', type(raster))
print('Data type: ', raster.dtype)
print('Size of numpy object: ', raster.shape)
print('Minimum of numpy array: ', raster.min(axis=(1,2)))
print('Maximum of numpy array: ', raster.max(axis=(1,2)))

GeoTIFF
-------
Numpy object:  <class 'numpy.ndarray'>
Data type:  uint16
Size of numpy object:  (3, 35, 35)
Minimum of numpy array:  [0 0 0]
Maximum of numpy array:  [65535 65535 65535]


### Write GeoTiff to PNG, Open PNG, Check Type

In [8]:
png_file = tif_file.replace('.tif', '.png')

In [9]:
profile = infile.profile
profile['driver'] = 'PNG'

with rasterio.open(Path(TARGET_PATH, IMG_DIR, png_file), 'w', **profile) as dst:
    dst.write(raster)

In [10]:
infile2 = rasterio.open(Path(TARGET_PATH, IMG_DIR, png_file))
raster2 = infile2.read()
print('PNG')
print('-------')
print('Numpy object: ', type(raster2))
print('Data type: ', raster2.dtype)
print('Size of numpy object: ', raster2.shape)
print('Minimum of numpy array: ', raster2.min(axis=(1,2)))
print('Maximum of numpy array: ', raster2.max(axis=(1,2)))

PNG
-------
Numpy object:  <class 'numpy.ndarray'>
Data type:  uint16
Size of numpy object:  (3, 35, 35)
Minimum of numpy array:  [0 0 0]
Maximum of numpy array:  [65535 65535 65535]


### Fastai

In [11]:
os.mkdir(Path(TARGET_PATH, IMG_DIR, 'train'))
shutil.move(Path(TARGET_PATH, IMG_DIR, png_file), Path(TARGET_PATH, IMG_DIR, 'train', png_file))

PosixPath('/scratch/ALD_L8_TIR_chips_v4p1_train3/ALD_L8_TIR_steel_chips_v4p1_2020_train3/train/CHN0104_steel_v4p1_2020_L8_TIR.png')

In [24]:
tfms = get_transforms(do_flip=True,
                      flip_vert=True, 
                      max_lighting=None, 
                      max_zoom=1.5, 
                      max_warp=0.2)

data = (ImageDataBunch.from_folder(Path(TARGET_PATH, IMG_DIR), train='train', valid='validate', 
                                   ds_tfms=tfms, bs=16, num_workers=0, seed=42)
        .normalize(imagenet_stats))
raster3 = data.x[0].data



In [40]:
print('Fastai')
print('------')
print('Object type: ', type(raster3))
print('Data type:', raster3.dtype)
print('Size: ', raster3.shape)
print('Image minimum:', raster3.min(0))
print('Image maximum:', raster3.max(0))
print('Image mean: ', raster3.mean(0))
print('Image standard deviation: ', np.sqrt(raster3.var(0)))
print('Data: ', raster3)

Fastai
------
Object type:  <class 'torch.Tensor'>
Data type: torch.float32
Size:  torch.Size([3, 35, 35])
Image minimum: torch.return_types.min(
values=tensor([[0.2039, 0.2235, 0.2353,  ..., 0.0314, 0.0235, 0.0902],
        [0.1882, 0.2078, 0.2235,  ..., 0.0275, 0.0353, 0.1373],
        [0.1804, 0.2000, 0.2196,  ..., 0.0431, 0.0667, 0.1843],
        ...,
        [0.3882, 0.3451, 0.2941,  ..., 0.4627, 0.3882, 0.3176],
        [0.2745, 0.2314, 0.2000,  ..., 0.3686, 0.3176, 0.2627],
        [0.2196, 0.1882, 0.1843,  ..., 0.2667, 0.2353, 0.1961]]),
indices=tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [2, 1, 1,  ..., 2, 2, 2],
        [1, 1, 1,  ..., 2, 2, 2],
        [1, 0, 1,  ..., 2, 0, 0]]))
Image maximum: torch.return_types.max(
values=tensor([[0.3804, 0.3843, 0.3725,  ..., 0.1804, 0.1686, 0.2235],
        [0.3647, 0.3725, 0.3569,  ..., 0.1804, 0.2078, 0.2784],
        [0.3529, 0.3608, 0.3490,  ..., 0.1686, 

## Conversion test

In [27]:
raster4 = torch.from_numpy(raster2.astype('float32'))

In [34]:
print('Conversion test')
print('------')
print('Object type: ', type(raster4))
print('Data type:', raster4.dtype)
print('Size: ', raster4.shape)
print('Image minimum:', raster4.min())
print('Image maximum:', raster4.max())
print('Image mean: ', raster4.mean())
print('Image standard deviation: ', np.sqrt(raster4.var()))
print('Data: ', raster4)

Conversion test
------
Object type:  <class 'torch.Tensor'>
Data type: torch.float32
Size:  torch.Size([3, 35, 35])
Image minimum: tensor(0.)
Image maximum: tensor(65535.)
Image mean:  tensor(23198.9355)
Image standard deviation:  tensor(15612.5215)
Data:  tensor([[[16104., 17539., 18735.,  ...,  2232.,  1753.,  6019.],
         [15945., 17300., 17818.,  ...,  1833.,  2351.,  9009.],
         [16383., 17978., 18257.,  ...,  2910.,  4504., 12198.],
         ...,
         [27505., 25552., 24515.,  ..., 34601., 30415., 26269.],
         [20649., 17101., 18616.,  ..., 26668., 23559., 20330.],
         [15028., 12357., 15387.,  ..., 18416., 15586., 12995.]],

        [[13350., 14679., 15471.,  ...,  6929., 11257., 14679.],
         [12445., 13802., 14792.,  ...,  7071., 13576., 18300.],
         [11822., 13208., 14566.,  ...,  8428., 16998., 21948.],
         ...,
         [25682., 22684., 19431.,  ..., 38381., 34082., 30179.],
         [18130., 15217., 13265.,  ..., 31056., 26276., 22429.]

In [29]:
raster5 = raster4 / (raster4.max() - raster4.min())

In [36]:
print('Conversion test 2')
print('------')
print('Object type: ', type(raster5))
print('Data type:', raster5.dtype)
print('Size: ', raster5.shape)
print('Image minimum:', raster5.min())
print('Image maximum:', raster5.max())
print('Image mean: ', raster5.mean())
print('Image standard deviation: ', np.sqrt(raster5.var()))
print('Data: ', raster5)

Conversion test
------
Object type:  <class 'torch.Tensor'>
Data type: torch.float32
Size:  torch.Size([3, 35, 35])
Image minimum: tensor(0.)
Image maximum: tensor(1.)
Image mean:  tensor(0.3540)
Image standard deviation:  tensor(0.2382)
Data:  tensor([[[0.2457, 0.2676, 0.2859,  ..., 0.0341, 0.0267, 0.0918],
         [0.2433, 0.2640, 0.2719,  ..., 0.0280, 0.0359, 0.1375],
         [0.2500, 0.2743, 0.2786,  ..., 0.0444, 0.0687, 0.1861],
         ...,
         [0.4197, 0.3899, 0.3741,  ..., 0.5280, 0.4641, 0.4008],
         [0.3151, 0.2609, 0.2841,  ..., 0.4069, 0.3595, 0.3102],
         [0.2293, 0.1886, 0.2348,  ..., 0.2810, 0.2378, 0.1983]],

        [[0.2037, 0.2240, 0.2361,  ..., 0.1057, 0.1718, 0.2240],
         [0.1899, 0.2106, 0.2257,  ..., 0.1079, 0.2072, 0.2792],
         [0.1804, 0.2015, 0.2223,  ..., 0.1286, 0.2594, 0.3349],
         ...,
         [0.3919, 0.3461, 0.2965,  ..., 0.5857, 0.5201, 0.4605],
         [0.2766, 0.2322, 0.2024,  ..., 0.4739, 0.4009, 0.3422],
         [

In [31]:
imagenet_stats

([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])