In [105]:
import numpy as np
import pandas as pd
import geopandas as gp
import os
import ee
import matplotlib
import matplotlib.pyplot as plt
import datetime
import warnings

from scipy import stats

import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.tree import export_graphviz

from keras.models import Sequential
from keras.layers import Dense

warnings.filterwarnings('ignore')

# Get the data

In [106]:
cwd = os.getcwd()
up1 = os.path.split(cwd)[0]
data = [os.path.join(up1,x) for x in os.listdir(up1) if x.endswith('.csv')][0]

# Define NN model

In [122]:
def baseline_model(X_train):
    model = Sequential()
    model.add(Dense(28, input_dim=56, activation='relu'))
    model.add(Dense(14, activation='relu'))
    model.add(Dense(7, activation='relu'))
    model.add(Dense(3, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse'])
    
    return model

# Setup the data for NN

In [123]:
df = pd.read_csv(data)
scaler = MinMaxScaler() # Scale the data using sklearn minmax scaler

labels = np.array(df['dry'])

features = df.drop(['dry','name'], axis = 1)
features = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)
features = np.array(features)

In [124]:
# Train / Test split
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 10)
    
# NN naming convention
X_train = np.array(train_features)
X_test = np.array(test_features)
y_train = np.array(train_labels)
y_test = np.array(test_labels)
    
# Fit model
model = baseline_model(X_train)
model.fit(X_train, y_train, epochs=15,verbose=2)
predictions = model.predict(X_test)

Epoch 1/15
 - 2s - loss: 0.2559 - mean_squared_error: 0.0697
Epoch 2/15
 - 1s - loss: 0.2001 - mean_squared_error: 0.0512
Epoch 3/15
 - 1s - loss: 0.1973 - mean_squared_error: 0.0509
Epoch 4/15
 - 1s - loss: 0.1969 - mean_squared_error: 0.0508
Epoch 5/15
 - 1s - loss: 0.1969 - mean_squared_error: 0.0509
Epoch 6/15
 - 1s - loss: 0.1953 - mean_squared_error: 0.0506
Epoch 7/15
 - 1s - loss: 0.1950 - mean_squared_error: 0.0506
Epoch 8/15
 - 1s - loss: 0.1948 - mean_squared_error: 0.0507
Epoch 9/15
 - 1s - loss: 0.1947 - mean_squared_error: 0.0506
Epoch 10/15
 - 1s - loss: 0.1942 - mean_squared_error: 0.0506
Epoch 11/15
 - 1s - loss: 0.1935 - mean_squared_error: 0.0505
Epoch 12/15
 - 1s - loss: 0.1929 - mean_squared_error: 0.0504
Epoch 13/15
 - 1s - loss: 0.1925 - mean_squared_error: 0.0504
Epoch 14/15
 - 1s - loss: 0.1918 - mean_squared_error: 0.0502
Epoch 15/15
 - 1s - loss: 0.1921 - mean_squared_error: 0.0502


In [125]:
# Reshape 
predictions = predictions.reshape(predictions.shape[0])

# Take 
preds = []

for i in predictions:
    if i > np.percentile(predictions,95): # >95th percentile of probabilities = 1 (failure), else = 0 (non-failure)
        preds.append(1)
    else:
        preds.append(0)

In [127]:
mape = 100. * (np.abs((preds - y_test) / y_test))
np.mean(mape)

4.4442017907840139

In [128]:
accuracy = 100. - np.mean(mape)

In [129]:
accuracy

95.555798209215993