In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import copy
from environmental_raster_glc import PatchExtractor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler

In [39]:
model = Sequential()
model.add(Conv2D(128, kernel_size = 2, input_shape = (8, 8, 13), activation = 'relu'))
model.add(Conv2D(128, kernel_size = 2, activation = 'relu')) 
model.add(MaxPooling2D(pool_size = 2, strides = 1))
model.add(Conv2D(64, kernel_size = 2, activation = 'relu'))
model.add(MaxPooling2D(pool_size = 2, strides = 2))
model.add(Flatten())
model.add(Dense(units = 256, activation = 'relu'))
model.add(Dense(units = 256, activation = 'relu'))
model.add(Dense(units = 1348, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 7, 7, 128)         6784      
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 6, 6, 128)         65664     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 128)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 4, 4, 64)          32832     
_________________________________________________________________
flatten_2 (Flatten)          (None, 1024)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 2000)              2050000   
_________________________________________________________________
dense_11 (Dense)             (None, 2000)              4002000   
__________

In [25]:
def fit_and_predict(X_train, y_train, Xtest, y_test):
    global model
    
    model.fit(x = X_train, y = y_train, epochs = 1, batch_size = 2000)
    (loss, accuracy) = model.evaluate(x = X_test, y = y_test)
    print('Loss: {} Accuracy: {}'.format(loss, accuracy * 100))
    
class GeoLifeClefDataset:
    def __init__(self, extractor, dataset, labels):
        self.extractor = extractor
        self.labels = labels
        self.dataset = dataset
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        tensor = self.extractor[self.dataset[idx]]
        return tensor, self.labels[idx]

In [20]:
patch_extractor = PatchExtractor('../rasters GLC19', size=8, verbose=True)
patch_extractor.append("alti")
patch_extractor.append("awc_top")
patch_extractor.append("bs_top")
patch_extractor.append("chbio_1")
patch_extractor.append("chbio_10")
patch_extractor.append("chbio_11")
patch_extractor.append("chbio_17")
patch_extractor.append("chbio_18")
patch_extractor.append("chbio_19")
patch_extractor.append("chbio_2")
patch_extractor.append("chbio_3")
patch_extractor.append("erodi")
patch_extractor.append("etp")
# dataset
df = pd.read_csv("../PL_trusted.csv",sep=';')
classes = set(df['glc19SpId'])
df = pd.concat([df.drop('glc19SpId',axis=1),pd.get_dummies(df['glc19SpId'],dtype=int)], axis=1)
dataset_list = list(zip(df["Latitude"],df["Longitude"]))
labels_list = (df.iloc[:, 10:]).values
train_ds = GeoLifeClefDataset(patch_extractor, dataset_list[:230000], labels_list[:230000])
test_ds = GeoLifeClefDataset(patch_extractor, dataset_list[230000:], labels_list[230000:])
datasets = {"train": train_ds, "val": test_ds}
X_train, y_train, X_test, y_test = [], [], [], []
for i in range(len(train_ds)):
    X_train.append(train_ds[i][0])
    y_train.append(train_ds[i][1])
for i in range(len(test_ds)):
    X_test.append(test_ds[i][0])
    y_test.append(test_ds[i][1])

Adding: alti
Adding: awc_top
Adding: bs_top
Adding: chbio_1
Adding: chbio_10
Adding: chbio_11
Adding: chbio_17
Adding: chbio_18
Adding: chbio_19
Adding: chbio_2
Adding: chbio_3
Adding: erodi
Adding: etp


In [21]:
X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)    
X_train = np.reshape(X_train, (len(X_train),8,8,13))
X_test = np.reshape(X_test, (len(X_test),8,8,13))

In [26]:
fit_and_predict(X_train, y_train, X_test, y_test)
model.save('CNN_Model.h5')

Loss: 15.868909969239327 Accuracy: 1.545997243374586


In [28]:
class GeoLifeClefDatasetP:
    def __init__(self, extractor, dataset):
        self.extractor = extractor
        self.dataset = dataset
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        tensor = self.extractor[self.dataset[idx]]
        return tensor
df = pd.read_csv("../testSet.csv",sep=';')
X_pred = []
dataset_list = list(zip(df["Latitude"],df["Longitude"]))
pred_ds = GeoLifeClefDatasetP(patch_extractor, dataset_list)
for i in range(len(pred_ds)):
    X_pred.append(pred_ds[i])
X_pred = np.array(X_pred)
X_pred = np.reshape(X_pred, (len(X_pred),8,8,13))
pred = model.predict(X_pred)

In [29]:
sum(list(map(sum, pred)))

25000.0

In [30]:
pred[0]

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)