In [1]:
from keras.applications.resnet import ResNet50, preprocess_input
from keras.applications.resnet import ResNet101, preprocess_input
from keras.models import Model
from keras.preprocessing import image
from keras import models
from keras import layers
from keras import optimizers
from keras.models import model_from_json

Using TensorFlow backend.


In [2]:
from matplotlib import pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
import sys
import os

from PIL import Image

from dotenv import load_dotenv

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
os.getenv('GDRIVE_FOLDER')

In [4]:
sys.path.append("../Library/")
import deep_learning as dl
import image_manipulation as ima
import machine_learning as ml

In [5]:
%load_ext autoreload
%autoreload 2

# Load Image Batch

### Define degraded resolutions and image sizes for later use

In [6]:
sizes = {}
base_res = 0.3
base_size = 512


for factor in range(2, 17):
    res = round(base_res * factor,1)
    size = round(base_size/factor)
    sizes[res] = (size, size)
print("\nSizes dictionary:\n", sizes)


Sizes dictionary:
 {0.6: (256, 256), 0.9: (171, 171), 1.2: (128, 128), 1.5: (102, 102), 1.8: (85, 85), 2.1: (73, 73), 2.4: (64, 64), 2.7: (57, 57), 3.0: (51, 51), 3.3: (47, 47), 3.6: (43, 43), 3.9: (39, 39), 4.2: (37, 37), 4.5: (34, 34), 4.8: (32, 32)}


### Load Image Batch into DataFrame

In [7]:
base_folder = "/Users/peterweber/Google Drive/MFP - Satellogic/images/usgs_512_res0.3m"
categories = ["forest-woodland", "agriculture", "shrubland-grassland", "semi-desert"]
data_folder_colab = "/Users/peterweber/Google Drive/Colab/MasterThesis/Data"
labels = [0, 1, 2]

df_images = pd.DataFrame(columns = ['filename', 'image', 'resolution', 'label', 'category'])
for category in categories:
    for label in labels:
        df = ima.load_images_into_df_by_category_and_label(base_folder, category, label)
        df_images = df_images.append(df)    

In [8]:
print("Shape of df_images:", df_images.shape)
print("Distribution of categories and labels:\n", df_images.groupby(['category', 'label']).size().reset_index(name='counts'))
print("\nDistribution of labels:\n", df_images.groupby(['label']).size().reset_index(name='counts'))

Shape of df_images: (2220, 5)
Distribution of categories and labels:
                category  label  counts
0           agriculture      0       1
1           agriculture      1       1
2           agriculture      2     417
3       forest-woodland      0     335
4       forest-woodland      1      44
5       forest-woodland      2     107
6           semi-desert      0     304
7           semi-desert      1      67
8           semi-desert      2     231
9   shrubland-grassland      0     415
10  shrubland-grassland      1      84
11  shrubland-grassland      2     214

Distribution of labels:
    label  counts
0      0    1055
1      1     196
2      2     969


### Enhance Images DataFrame by Degraded Images

In [9]:
if False:

    df_degraded = ima.load_degraded_images_into_df(df_images, sizes, label = True)

    df_images = df_images.append(df_degraded)
    print("Shape of df_images:", df_images.shape)
    del df_degraded

### Generate data set X and y and save for Colab

In [10]:
X, y = dl.generate_X_y_from_df(df_images)

Shape of image array is: (2220, 512, 512, 3)


In [12]:
from random import shuffle
random_order = [i for i in range(len(X))]
shuffle(random_order)

X, y = X[random_order], y[random_order]

not_label = y != 1
X, y = X[not_label], y[not_label]
y = np.array(ml.convert_encoding(y, ml.zero_encoding))

In [135]:
X_name = 'X_images_res0.3.npy'
y_name = 'y_images_res0.3.npy'

np.save(os.path.join(data_folder_colab, X_name), X)
np.save(os.path.join(data_folder_colab, y_name), y)