In [None]:
import os
import shutil
import random
from tqdm import tqdm

import numpy as np
import pandas as pd

import PIL
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
DATASET = "../input/eurosat-dataset/EuroSATallBands"

LABELS = os.listdir(DATASET)
print(LABELS)

In [None]:

for l in LABELS:
    if l.lower().endswith(('csv','json')):
        LABELS.remove(l)
      
print(LABELS)

In [None]:
import re
from sklearn.model_selection import StratifiedShuffleSplit
from keras.preprocessing.image import ImageDataGenerator

TRAIN_DIR = './training'
TEST_DIR = './testing'
BATCH_SIZE = 64
NUM_CLASSES=len(LABELS)
INPUT_SHAPE = (64, 64, 3)
CLASS_MODE = 'categorical'

# create training and testing directories
for path in (TRAIN_DIR, TEST_DIR):
    if not os.path.exists(path):
        os.mkdir(path)

# create class label subdirectories in train and test
for l in LABELS:
    
    if not os.path.exists(os.path.join(TRAIN_DIR, l)):
        os.mkdir(os.path.join(TRAIN_DIR, l))

    if not os.path.exists(os.path.join(TEST_DIR, l)):
        os.mkdir(os.path.join(TEST_DIR, l))

In [None]:
data = {}

for l in LABELS:
    for img in os.listdir(DATASET+'/'+l):
        data.update({os.path.join(DATASET, l, img): l})

X = pd.Series(list(data.keys()))
y = pd.get_dummies(pd.Series(data.values()))

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=69)

# split the list of image paths
for train_idx, test_idx in split.split(X, y):
    
    train_paths = X[train_idx]
    test_paths = X[test_idx]

    # define a new path for each image depending on training or testing
    new_train_paths = [re.sub('../input/eurosat-dataset/EuroSATallBands', './training', i) for i in train_paths]
    new_test_paths = [re.sub('../input/eurosat-dataset/EuroSATallBands', './testing', i) for i in test_paths]

    train_path_map = list((zip(train_paths, new_train_paths)))
    test_path_map = list((zip(test_paths, new_test_paths)))
    
    # move the files
    print("moving training files..")
    for i in tqdm(train_path_map):
        if not os.path.exists(i[1]):
            if not os.path.exists(re.sub('training', 'testing', i[1])):
                shutil.copy(i[0], i[1])
    
    print("moving testing files..")
    for i in tqdm(test_path_map):
        if not os.path.exists(i[1]):
            if not os.path.exists(re.sub('training', 'testing', i[1])):
                shutil.copy(i[0], i[1])

In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
import numpy as np
import rasterio
from rasterio.plot import show, show_hist
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set(style="white")

In [None]:
path='../input/eurosat-dataset/EuroSATallBands/'
total = 0
len_categories = {}
for category in LABELS:
    len_files = len(os.listdir(path + category))
    total += len_files
    len_categories[category] = len_files
print(len_categories)

In [None]:
x = list(range(len(len_categories)))
y = list(len_categories.values())
ax = sns.barplot(x, y)
plt.title("Number of Photos in Each Category", fontsize=14)
plt.ylabel("Number")
plt.xticks(x, list(len_categories.keys()), rotation=90)
plt.tight_layout()
plt.show()

In [None]:
fp = './training/Forest/Forest_100.tif'
raster = rasterio.open(fp)

print('Height:', raster.height)
print('Width:', raster.width)
print('Number of bands:', raster.count)

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, nrows=1, figsize=(10, 4), sharey=True)

show((raster, 3), cmap='Reds', ax=ax1)
show((raster, 2), cmap='Greens', ax=ax2)
show((raster, 1), cmap='Blues', ax=ax3)

ax1.set_title("Red")
ax2.set_title("Green")
ax3.set_title("Blue")

In [None]:
red = raster.read(4)
green = raster.read(3)
blue = raster.read(2)

def normalize(array):
    array_min, array_max = array.min(), array.max()
    return ((array - array_min)/(array_max - array_min))

#     array_min, array_max = array.min(), array.max()
#     return ((array - array_min)/(array_max - array_min))*255
    
redn = normalize(red)
greenn = normalize(green)
bluen = normalize(blue)

print("Normalized bands")
print(redn.min(), '-', redn.max(), 'mean:', redn.mean())
print(greenn.min(), '-', greenn.max(), 'mean:', greenn.mean())
print(bluen.min(), '-', bluen.max(), 'mean:', bluen.mean())
rgb = np.dstack((redn, greenn, bluen))
print(np.max(red),np.min(red))
rgb = np.array(rgb)
plt.imshow(rgb)

In [None]:
show_hist(raster, bins=50, lw=0.0, stacked=False,
      histtype='stepfilled', title="Histogram")

In [None]:
import os
import shutil
import itertools
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import keras
from keras import models
from keras import backend as K
from keras.applications import imagenet_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.metrics import categorical_crossentropy
from keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.layers.convolutional import *
from sklearn.metrics import confusion_matrix

%matplotlib inline

In [None]:
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed = 123
random.seed(456)
tf.random.set_seed(789)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [None]:
base_path = '../input/eurosat-dataset/EuroSATallBands/'
out_path='./'
train_path = os.path.join(out_path, 'training/')
test_path = os.path.join(out_path, 'testing/')

In [None]:
categories = []
for (dirpath, dirnames, filenames) in os.walk(base_path):
    categories.extend(dirnames)
print(categories)

In [None]:
for category in categories:
    tr = os.path.join(train_path, category)
    len_tr = len(os.listdir(tr))

    
    test = os.path.join(test_path, category)
    len_test = len(os.listdir(test))
    
    print('Train images for ' + category + ': {}'.format(len_tr))
    print('Test images for ' + category + ': {}'.format(len_test))
    print('')

In [None]:
train_batches = ImageDataGenerator().flow_from_directory(train_path,
                                                         target_size=(64, 64),
                                                         classes=categories,
                                                         batch_size=20)

test_batches = ImageDataGenerator().flow_from_directory(test_path,
                                                         target_size=(64, 64),
                                                         classes=categories,
                                                         batch_size=10,
                                                         shuffle=False)

In [None]:
def plots(ims, figsize=(30, 15), rows=1, interp=False, titles=None):
    if type(ims[0]) is np.ndarray:
        ims = np.array(ims).astype(np.uint8)
        if(ims.shape[-1] != 3):
            ims = ims.transpose((0,2,3,1))
    f = plt.figure(figsize=figsize)
    cols = len(ims)//rows if len(ims) % 2 == 0 else len(ims) //rows + 1
    for i in range(len(ims)):
        sp = f.add_subplot(rows, cols, i+1)
        sp.axis('Off')
        if titles is not None:
            sp.set_title(titles[i], fontsize=16)
        plt.tight_layout()
        plt.imshow(ims[i], interpolation=None if interp else 'none')

In [None]:
print(train_batches)

In [None]:
imgs, labels = next(train_batches)

In [None]:
vgg16_model = keras.applications.vgg16.VGG16(include_top=False, input_shape=(64,64,3))

In [None]:
vgg16_model.summary()

In [None]:
type(vgg16_model)

In [None]:
model = Sequential()
for layer in vgg16_model.layers:
    model.add(layer)

In [None]:
NUM_TRAINABLE_LAYERS = 10
for layer in model.layers[:-NUM_TRAINABLE_LAYERS]:
    layer.trainable = False

In [None]:
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(len(categories), activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit_generator(train_batches, steps_per_epoch=1080,
                   validation_data=test_batches, validation_steps=270,
                   epochs=10,
                   verbose=2)

In [None]:
for category in LABELS:
    path=base_path+category+'/'
    all_files=os.listdir(path)
    for file in all_files:
        map_uint16_to_uint8(file)

In [None]:
import numpy as np


def map_uint16_to_uint8(img, lower_bound=None, upper_bound=None):
    '''
    Map a 16-bit image trough a lookup table to convert it to 8-bit.

    Parameters
    ----------
    img: numpy.ndarray[np.uint16]
        image that should be mapped
    lower_bound: int, optional
        lower bound of the range that should be mapped to ``[0, 255]``,
        value must be in the range ``[0, 65535]`` and smaller than `upper_bound`
        (defaults to ``numpy.min(img)``)
    upper_bound: int, optional
       upper bound of the range that should be mapped to ``[0, 255]``,
       value must be in the range ``[0, 65535]`` and larger than `lower_bound`
       (defaults to ``numpy.max(img)``)

    Returns
    -------
    numpy.ndarray[uint8]
    '''
    if not(0 <= lower_bound < 2**16) and lower_bound is not None:
        raise ValueError(
            '"lower_bound" must be in the range [0, 65535]')
    if not(0 <= upper_bound < 2**16) and upper_bound is not None:
        raise ValueError(
            '"upper_bound" must be in the range [0, 65535]')
    if lower_bound is None:
        lower_bound = np.min(img)
    if upper_bound is None:
        upper_bound = np.max(img)
    if lower_bound >= upper_bound:
        raise ValueError(
            '"lower_bound" must be smaller than "upper_bound"')
    lut = np.concatenate([
        np.zeros(lower_bound, dtype=np.uint16),
        np.linspace(0, 255, upper_bound - lower_bound).astype(np.uint16),
        np.ones(2**16 - upper_bound, dtype=np.uint16) * 255
    ])
    return lut[img].astype(np.uint8)


# Let's generate an example image (normally you would load the 16-bit image: cv2.imread(filename, cv2.IMREAD_UNCHANGED))
img = (np.random.random((100, 100)) * 2**16).astype(np.uint16)

# Convert it to 8-bit

In [None]:
t_path='./training/Highway'
import os
os.listdir(t_path)

In [None]:
path='./training/'
total = 0
len_categories = {}
for category in LABELS:
    len_files = len(os.listdir(path + category))
    total += len_files
    len_categories[category] = len_files
print(len_categories)

In [None]:
path='./training/'
total = 0
len_categories = {}
for category in LABELS:
    len_files = len(os.listdir(path + category))
    total += len_files
    len_categories[category] = len_files
print(len_categories)

In [None]:
# fp = './training/River/River_1762.tif'
# raster = rasterio.open(fp)

# print('Height:', raster.height)
# print('Width:', raster.width)
# print('Number of bands:', raster.count)

train_X = []
train_Y = []

test_X = []
test_Y = []
import os
from tqdm import tqdm

for folder in os.listdir("./training/"):
    for file in tqdm(os.listdir("./training/"+folder+"/")):
        fp =rasterio.open("./training/"+folder+"/"+file)
        r = normalize(fp.read(4))
        g = normalize(fp.read(3))
        b = normalize(fp.read(2))
        rgb = np.dstack((r,g,b))
        try:
            train_X.append(rgb)
            train_Y.append( categories.index(folder) )
        except:
            #print("One abandoned",file)
            pass
for folder in os.listdir("./testing/"):
    for file in tqdm(os.listdir("./testing/"+folder+"/")):
        fp =rasterio.open("./testing/"+folder+"/"+file)
        r = normalize(fp.read(4))
        g = normalize(fp.read(3))
        b = normalize(fp.read(2))
        rgb = np.dstack((r,g,b))
        try:
            test_X.append(rgb)
            test_Y.append( categories.index(folder) )
        except:
#             print("Abandoned",file)
            pass