In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
from __future__ import print_function

import os

import matplotlib.pyplot as plt
import rasterio
from rasterio.plot import show
import numpy as np
import pandas as pd
import time


try:
    import keras
    from imblearn.keras import balanced_batch_generator
    from imblearn.under_sampling import NearMiss
    import keras.backend as K
    from keras.models import Sequential
    from keras.models import model_from_json
    from keras.layers import Dense, Dropout, Activation
    from keras.optimizers import RMSprop
    from keras.optimizers import Adam
    from keras.optimizers import Adagrad
    from keras.optimizers import SGD
    from keras.callbacks import LambdaCallback, ReduceLROnPlateau, ModelCheckpoint
    from keras.layers.core import Lambda
    from keras.losses import categorical_crossentropy
    import tensorflow as tf
    from keras import regularizers

except:
    print("Keras not found")
    
np.random.seed(42)


def my_basename(path):
    return os.path.splitext(os.path.split(path)[1])[0]

file_dir=r'C:/Users/Mark.Rademaker/PycharmProjects/InternshipNaturalis/trait-geo-diverse-dl/concept proof'

Using TensorFlow backend.


#### Trial alternative dataset construction for faster execution

Create input dataset for entire globe

In [3]:
import gdal

##opening raster as 3d numpy array
inRas=gdal.Open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')
myarray=inRas.ReadAsArray()
print(myarray.shape)
print(type(myarray))


(41, 1800, 4320)
<class 'numpy.ndarray'>


In [None]:
#get all col and row values for all cells on land 
df=pd.read_csv(file_dir+'/data/GIS/world_locations_to_predict.csv')
len_pd=np.arange(len(df))
print(len_pd)
lon=df["decimal_longitude"]
lat=df["decimal_latitude"]
lon=lon.values
lat=lat.values

row=[]
col=[]

src=rasterio.open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')

for i in len_pd:
    row_n, col_n = src.index(lon[i], lat[i])# spatial --> image coordinates
    row.append(row_n)
    col.append(col_n)


In [None]:
#collect file with mean and std_dev for each band
mean_std=pd.read_csv(file_dir+'/data/GIS/env_bio_mean_std.txt',sep="\t")
mean_std=mean_std.to_numpy()


#################################
#extract the values for all bands and prepare input data
#################################
X=[]

for j in range(0,41):
    print(j)
    #start2=time.time()
    band=myarray[j]
    x=[]

    for i in range(0,len(row)):
        value= band[row[i],col[i]]
        if value <-1000:
            value=np.nan
            x.append(value)
        else:
            value = ((value - mean_std.item((j,1))) / mean_std.item((j,2)))#scale values
            x.append(value)
    #end2=time.time()
    #print(j,start2-end2)
    X.append(x)

#include row and column values
X.append(row)
X.append(col)
#set as numpy 2d array
X =np.array([np.array(xi) for xi in X])

df=pd.DataFrame(X)

df=df.T
df=df.dropna(axis=0, how='any')
input_X=df.loc[:,0:40]


row=df[41]
col=df[42]

#convert dataframe back to numpy array
input_X=input_X.values
#convert rows and col indices back to array
row=row.values
col=col.values

#save
prediction_array=np.save(file_dir+'/data/prediction_arrays/%s_prediction_array.npy'%spec,input_X)


Predict species specific model on input dataset

In [None]:
#access file with list of taxa names
taxa=pd.read_csv(file_dir+"/results/DNN_eval.txt",sep="\t")
taxa=taxa["Species"]

In [None]:
for species in taxa[:]:
    start=time.time
    #set the species 
    spec=species
    print("processing", spec)
    spec=spec.replace(" ","_")

    ################################################################################################
    ################################################################################################
    #   run the predictions 
    ################################################################################################
    ################################################################################################
    
    input_X=np.load(file_dir+'/data/prediction_arrays/%s_prediction_array.npy'%spec)

    #create copy of band to later subset values in
    new_band=myarray[1].copy()
    new_band.shape


    ### Load DNN model for the species and predict values:
    #load json and create model
    json_file = open(file_dir+'/results/DNN_model/%s_model.json'%spec,'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)

    #load weights into new model
    loaded_model.load_weights(file_dir+'/results/DNN_model/%s_model.h5'%spec)
    #print('Loaded model from disk')

    #compile model
    loaded_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

    #predict values
    new_values = loaded_model.predict(x=input_X,batch_size=500,verbose=0) ###predict output value

    ##take the prob. of presence (new_value.item((0,1)))
    new_band_values=[]
    for i in new_values:
        new_value=i[1]
        new_band_values.append(new_value)
    new_band_values=np.array(new_band_values)


    ###############################
    ##subset into image##
    ###############################
    for i in range(0,len(row)):
        new_band[int(row[i]),int(col[i])]=new_band_values[i]

    src=rasterio.open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')
    profile=src.profile

    profile.update(count=1)

    #write to file
    with rasterio.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec, 'w', **profile) as dst:
        dst.write(new_band, 1) 
    
    end=time.time
    print(start-end)


In [None]:
for species in taxa["taxon"][:]:
    try:
        print("process species %s"%species) 
        spec=species
        spec=spec.replace(" ","_")
        input_X=np.load(file_dir+'/data/prediction_arrays/%s_prediction_array.npy'%spec)
       
        #create copy of band to later subset values in
        new_band=myarray[1].copy()
        new_band.shape


        ### Load DNN model for the species and predict values:
        #load json and create model
        json_file = open(file_dir+'/results/DNN_model/%s_model.json'%spec,'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)

        #load weights into new model
        loaded_model.load_weights(file_dir+'/results/DNN_model/%s_model.h5'%spec)
        print('Loaded model from disk')

        #compile model
        loaded_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

        #predict values
        new_values = loaded_model.predict(x=input_X,batch_size=500,verbose=0) ###predict output value

        ##take the prob. of presence (new_value.item((0,1)))
        new_band_values=[]
        for i in new_values:
            new_value=i[1]
            new_band_values.append(new_value)
        new_band_values=np.array(new_band_values)


        ###############################
        ##subset into image##
        ###############################
        for i in range(0,len(row)):
            new_band[int(row[i]),int(col[i])]=new_band_values[i]

        src=rasterio.open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')
        profile=src.profile


        profile.update(count=1)

        #write to file
        with rasterio.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec, 'w', **profile) as dst:
            dst.write(new_band, 1)

    
    except:
        pass

In [None]:
#extract the values for all bands
X=[]

 
for j in range(0,41):
    band=myarray[j]
    x=[]
    start=time.time()
    
    for i in range(0,len(row)):
        end=time.time()
        start2=time.time() 
        value= band[row[i],col[i]]
        if value <-1000:
            value=np.nan
            x.append(value)
        else:
            value = ((value - mean_std.item((j,1))) / mean_std.item((j,2)))#scale values
            x.append(value)
        
    X.append(x)
    
   

In [None]:
#include row and column values
X.append(row)
X.append(col)

In [None]:
#set as numpy 2d array
X =np.array([np.array(xi) for xi in X])

In [None]:
df=pd.DataFrame(X)

df=df.T
df=df.dropna(axis=0, how='any')
input_X=df.loc[:,0:40]

row=df[41]
col=df[42]

#convert dataframe back to numpy array
input_X=input_X.values
#convert rows and col indices back to array
row=row.values
col=col.values

In [None]:
#save
prediction_array=np.save(file_dir+'/data/prediction_array.npy',input_X)
pred=np.load(file_dir+'/data/prediction_array.npy')
pred.shape

In [None]:
input_X.shape
row.shape
col.shape

In [None]:
#for species in ....:


#create copy of band to later subset values in
new_band=myarray[1].copy()
new_band.shape


### Load DNN model for the species and predict values:
#load json and create model
json_file = open(file_dir+'/results/DNN_model/%s_model.json'%spec,'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

#load weights into new model
loaded_model.load_weights(file_dir+'/results/DNN_model/%s_model.h5'%spec)
print('Loaded model from disk')

#compile model
loaded_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

#predict values
new_values = loaded_model.predict(x=input_X,batch_size=500,verbose=0) ###predict output value

##take the prob. of presence (new_value.item((0,1)))
new_band_values=[]
for i in new_values:
    new_value=i[1]
    new_band_values.append(new_value)
new_band_values=np.array(new_band_values)





###############################
##subset into image##
###############################
for i in range(0,len(row)):
    new_band[int(row[i]),int(col[i])]=new_band_values[i]

src=rasterio.open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')
profile=src.profile


profile.update(count=1)

#write to file
with rasterio.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec, 'w', **profile) as dst:
    dst.write(new_band, 1)


In [None]:
new_values

In [None]:
##take the prob. of presence (new_value.item((0,1)))
new_band_values=[]
for i in new_values:
    new_value=i[1]
    new_band_values.append(new_value)
new_band_values=np.array(new_band_values)
print(len(new_band_values))
print(len(row))

In [None]:
np.amax(new_band_values)
plt.boxplot(new_band_values)

In [None]:
##subset into image
for i in range(0,len(row)):
    new_band[int(row[i]),int(col[i])]=new_band_values[i]


In [None]:
src=rasterio.open(file_dir+'/data/GIS/env_stacked/ENVIREM_BIOCLIM_stacked.tif')
profile=src.profile
print(profile)

profile.update(count=1)

#write to file
with rasterio.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec, 'w', **profile) as dst:
    dst.write(new_band, 1)


In [None]:
from PIL import Image
with Image.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec) as img:
    img.show()

In [None]:
import matplotlib.pyplot as plt
from rasterio.mask import mask
from rasterio.plot import show
from rasterio.plot import plotting_extent
from PIL import Image

In [None]:
clipped = rasterio.open(file_dir+'/results/DNN_global_predictions/%s_predicted_map.tif'%spec)
array = clipped.read(1,masked=True)
plt.imshow(array,interpolation='none', vmin=0)
plt.show()

In [None]:
if __name__=='__main__':
    num_processors=100
    p=Pool(processes=num_processors)
    output=p.map(global_pred.spec_img("Alcelaphus buselaphus"),[i for i in range(0,100)])
    print(output)

In [None]:
output=global_pred.spec_img(len_pd,len_b,lon,lat,src,mean_std)