# Add Prediction to Unlabelled Data

In [None]:
import numpy as np
import pickle
import sys
import keras.preprocessing.image 
from keras.models import Sequential,Model,load_model
from keras.preprocessing.image import img_to_array
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Activation, Dropout, Flatten, Dense,  GlobalMaxPooling2D, Lambda, Concatenate
from keras.applications.imagenet_utils import decode_predictions
import matplotlib.pyplot as plt
from keras.layers import Input
import csv
import pandas as pd   
import os
import h5py
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split

In [2]:
sys.argv[1] = "mhelabd"

In [28]:
#Global Variables
WANTED_BANDS = [3, 2, 1]
IMAGE_HEIGHT, IMAGE_WIDTH, NUM_BANDS, NUM_OG_BANDS = (64, 64, len(WANTED_BANDS), 13) 
MODEL_NAME = "Augmented-drop-neighbors-offset-CNN-({})-input-({}, {}, {})-bands-({})".format("Resnet50", IMAGE_HEIGHT, IMAGE_WIDTH, NUM_BANDS, str(WANTED_BANDS))
PATH = "/atlas/u/{}/data/kiln-scaling/models/{}/".format(sys.argv[1], MODEL_NAME) 
MODEL_WEIGHTS_PATH = PATH + "weights/trial_6_epoch_55.h5"
# DATASET = "bangladesh_2020_2021"
DATASET = "bangladesh_2019-2020"
DATA_PATH = "/atlas/u/mliu356/data/kiln-scaling/{}/".format(DATASET)
LABELLED_DATA_PATH = "/atlas/u/mhelabd/data/kiln-scaling/labelled_{}/".format(DATASET.replace("-", "_"))

In [20]:
def mkdirs(names):
    for name in names:
        if not os.path.exists(name):
            os.makedirs(name)
mkdirs([LABELLED_DATA_PATH])

In [21]:
def load_data_from_h5(filename):
    X, y = [], []
    with h5py.File(DATA_PATH + filename, "r") as f:
        print("extracting: ", filename) 
        X = np.array(f["images"][()])
        y = np.array(f["labels"][()])
        bounds = np.array(f["bounds"][()])
        indices = np.array(f["indices"][()])
    print("x shape: ", X.shape)
    print("y shape: ", y.shape)
    return X, y, bounds, indices

In [22]:
def make_model(weights="imagenet", 
               include_top=False, 
               load_weights=None, # path of weights
               loss=keras.losses.binary_crossentropy, 
               optimizer=keras.optimizers.Adam(), 
               metrics=[tf.keras.metrics.BinaryAccuracy(threshold=0.5)]):
    
    image_input = Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, NUM_BANDS))
    base_model = ResNet50(include_top=include_top, weights=weights, input_tensor=image_input, classes=2)
    x = base_model.output
    x = GlobalMaxPooling2D()(x)
    x = Dense(1024,activation='relu')(x) 
    x = Dense(1024,activation='relu')(x) 
    x = Dense(512,activation='relu')(x) 
    x = Dense(1, activation= 'sigmoid')(x)
    model = Model(inputs = base_model.input, outputs = x)
    
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    if load_weights:
        model.load_weights(load_weights)
    return model


In [23]:
def save_h5_file(filename, X, y, bounds, indices, y_pred):
    filepath = LABELLED_DATA_PATH + filename
    print("Saving file", filepath)
    f = h5py.File(filepath, 'w')
    bounds_dset = f.create_dataset("bounds", data=bounds)
    examples_dset = f.create_dataset("images", data=X)
    labels_dset = f.create_dataset("labels", data=y)
    labels_dset = f.create_dataset("pred_labels", data=y_pred)
    labels_dset = f.create_dataset("indices", data=indices)
    f.close()

In [24]:
model = make_model(load_weights=MODEL_WEIGHTS_PATH)
for i, filename in enumerate(os.listdir(DATA_PATH)):
    X, y, bounds, indices = load_data_from_h5(filename)
    X_prime = np.array(X)\
            .reshape((-1, NUM_OG_BANDS, IMAGE_HEIGHT, IMAGE_WIDTH))
    X_prime = np.moveaxis(X_prime, 1, -1)[:, :, :, WANTED_BANDS]
    y_pred = model.predict(X_prime) > 0.5
    save_h5_file(filename, X, y, bounds,indices, y_pred)

extracting:  examples_77.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_77.hdf5
extracting:  examples_3.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_3.hdf5
extracting:  examples_46.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_46.hdf5
extracting:  examples_24.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_24.hdf5
extracting:  examples_86.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_86.hdf5
extracting:  examples_15.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_202

extracting:  examples_49.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_49.hdf5
extracting:  examples_33.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_33.hdf5
extracting:  examples_78.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_78.hdf5
extracting:  examples_74.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_74.hdf5
extracting:  examples_0.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_0.hdf5
extracting:  examples_45.hdf5
x shape:  (999, 13, 64, 64)
y shape:  (999, 1)
Saving file /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_202

# Testing

In [30]:
filename = "/atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_32.hdf5"

In [31]:
 with h5py.File(filename, "r") as f:
    print("extracting: ", filename) 
    X = np.array(f["images"][()])
    y = np.array(f["labels"][()])
    bounds = np.array(f["bounds"][()])
    y_pred = np.array(f["pred_labels"][()])      
    print(y.shape)
    print(y_pred.shape)

extracting:  /atlas/u/mhelabd/data/kiln-scaling/labelled_bangladesh_2020_2021/examples_32.hdf5
(999, 1)
(999, 1)


In [29]:
len(os.listdir(DATA_PATH)), len(os.listdir(LABELLED_DATA_PATH))

(87, 87)