# Final Project: Deep Neutral Network
## Model 4: Predict Glacier Retreat with Tensor Flow's Keras

In [20]:
# Imports
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from pathlib import Path

---

Step 1: Prepare the data to be used on a neural network model

Note:  The raw data files have been cleaned and prepared for use in the

Jupyter Source File "Data_Cleaning_Glacier_Retreat".  The output for this file

is in the "resources" folder and includes data sets for each of the following:
1.  Environmental Parameters:  Global Temperature, Global Sea Rise, and Global CO2
        Filename:  env_parameters_1.csv

2.  Population, Economic, and Farming Parameters: World Population, Urban Population,
        investment, and cereal output production by acre.
        filename: pop_farm_parameters_2.csv

3.  Change in Temperature by Country:  Average delta T by country.
        filename: dT_Country_parameters_3.csv

4.  Change in Forestation by Country:  Percent change in forestation by country.
        filename: deforest_parameters_4.csv

---
### Fourth Model: Glacier Retreat with Change in Forestation by Country

In [21]:
# Review and load the dataset for Percent Change in Forestation by Country
file_path = "resources\deforest_parameters_4.csv"
df_parameters_1 = pd.read_csv(file_path)

# Review the DataFrame
df_parameters_1.head()

Unnamed: 0,year,glacier_retreat,AFE,AFW,AGO,ALB,ARB,ARE,ARG,ARM,...,USA,UZB,VCT,VEN,VGB,VIR,VNM,WLD,WSM,ZMB
0,1991,1,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,0.0,...,0.011857,0.0,0.25641,-0.325945,-0.026667,-1.162857,0.739851,0.0,-0.170318,-0.048158
1,1992,1,-0.151253,-0.118162,-0.124663,-0.071168,-0.018074,0.090721,-0.066723,11.742606,...,0.011857,6.186621,0.25641,-0.325945,-0.026667,-1.162857,0.739851,32.511019,-0.170318,-0.048158
2,1993,0,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,-0.007341,...,0.011857,0.096871,0.25641,-0.325945,-0.026667,-1.162857,0.739851,-0.056167,-0.170318,-0.048158
3,1994,1,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,-0.007341,...,0.011857,0.096871,0.25641,-0.325945,-0.026667,-1.162857,0.739851,-0.057699,-0.170318,-0.048158
4,1995,0,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,-0.007341,...,0.011857,0.096871,0.25641,-0.325945,-0.026667,-1.162857,0.739851,-0.059895,-0.170318,-0.048158


In [22]:
# Check the glacier retreat value counts
df_parameters_1["glacier_retreat"].value_counts()
# low quantity but evenly split

glacier_retreat
0    18
1    13
Name: count, dtype: int64

Step 2: Using the preprocessed data, create the features (`X`) and target (`y`) datasets. The target dataset should be defined by the preprocessed DataFrame column “glacier_retreat”. The remaining columns should define the features dataset.

In [23]:
# Define the target set y using the glacier_retreat column
# Remember that .values creates a numpy array
y = df_parameters_1["glacier_retreat"].values
y

array([1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1], dtype=int64)

In [24]:
# Define features set X by selecting remaining columns, drop year and 
# glacier_retreat
X = df_parameters_1.drop(["year","glacier_retreat"],axis=1)

# Review the features DataFrame
display(X.head(3),X.tail(3))

Unnamed: 0,AFE,AFW,AGO,ALB,ARB,ARE,ARG,ARM,ASM,ATG,...,USA,UZB,VCT,VEN,VGB,VIR,VNM,WLD,WSM,ZMB
0,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,0.0,-0.17,-0.15,...,0.011857,0.0,0.25641,-0.325945,-0.026667,-1.162857,0.739851,0.0,-0.170318,-0.048158
1,-0.151253,-0.118162,-0.124663,-0.071168,-0.018074,0.090721,-0.066723,11.742606,-0.17,-0.15,...,0.011857,6.186621,0.25641,-0.325945,-0.026667,-1.162857,0.739851,32.511019,-0.170318,-0.048158
2,-0.151253,-0.118162,-0.124663,-0.071168,-0.018069,0.090721,-0.066723,-0.007341,-0.17,-0.15,...,0.011857,0.096871,0.25641,-0.325945,-0.026667,-1.162857,0.739851,-0.056167,-0.170318,-0.048158


Unnamed: 0,AFE,AFW,AGO,ALB,ARB,ARE,ARG,ARM,ASM,ATG,...,USA,UZB,VCT,VEN,VGB,VIR,VNM,WLD,WSM,ZMB
28,-0.219886,-0.084885,-0.445231,0.0,-0.0178,0.0,-0.040195,-0.007376,-0.15,-0.159091,...,0.0,0.059253,0.0,-0.074554,0.0,0.428571,0.24216,-0.038881,-0.172662,-0.253191
29,-0.217373,-0.084437,-0.445223,0.0,-0.017726,0.0,-0.039464,-0.007376,-0.15,-0.136364,...,0.0,0.056848,0.0,-0.046596,0.0,0.428571,0.24216,-0.03685,-0.172662,-0.253178
30,-0.219059,-0.084741,-0.445229,0.0,-0.017774,0.0,-0.039951,-0.007376,-0.15,-0.151591,...,0.0,0.058583,0.0,-0.065234,0.0,0.428571,0.24216,0.000144,-0.172662,-0.253187


### Step 3: Split the features and target sets into training and testing datasets.


In [25]:
# Split the preprocessed data into a training and testing dataset
# First Pass: test_size=0.3  
# Third Pass: .2 Loss: 10.720776557922363, Accuracy: 0.2857142984867096
# Assign the function a random_state equal to 13
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=13)

### Step 4: Use scikit-learn's `StandardScaler` to scale the features data.

In [26]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Scale the Data <<< added by student
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


---

## Compile and Evaluate a Model Using a Neural Network

### Step 1: Create a deep neural network by assigning the number of input features, the number of layers, and the number of neurons on each layer using Tensorflow’s Keras.



In [27]:
# Define the the number of inputs (features) to the model (old value was 11)
number_input_features = len(X_train.columns)
# Review the number of features
number_input_features

228

In [28]:
#Note: first pass effort 220 / 100 / 1 Loss: 300.2790222167969, Accuracy: 0.5 testsize .3
# Second Pass   100 / 50 / 1  Loss: 91.33601379394531, Accuracy: 0.6000000238418579
# Third Pass  200 / 25/ 1  Loss: 1818.3104248046875, Accuracy: 0.375


# Define the number of hidden nodes for the first hidden layer
hidden_nodes_layer1 =  100
# Define the number of hidden nodes for the second hidden layer
hidden_nodes_layer2 = 50
# Define the number of neurons in the output layer
nn_output_layer = 1

In [29]:
# Create the Sequential model instance
nn = tf.keras.models.Sequential()

# Add the first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,
                             input_dim=number_input_features,activation="relu"))

# Add the second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Add the output layer to the model specifying the number of output neurons
# and activation function
nn.add(tf.keras.layers.Dense(units=nn_output_layer, activation="sigmoid"))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [30]:
# Display the Sequential model summary
nn.summary()

### Step 2: Compile and fit the model using the `binary_crossentropy` loss function, the `adam` optimizer, and the `accuracy` evaluation metric.


In [31]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

In [32]:
# Fit the model using xxx epochs and the training data
# Note: Varied results with 50 to 200 Epochs
# 
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 678ms/step - accuracy: 0.5714 - loss: 0.6769
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6190 - loss: 0.5623
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7619 - loss: 0.4901
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8571 - loss: 0.4397
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.9048 - loss: 0.3999
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9524 - loss: 0.3665
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.9524 - loss: 0.3373
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 1.0000 - loss: 0.3134
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

Step 3: Evaluate the model using the test data to determine the model’s loss and accuracy.


In [33]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)

# Display the model loss and accuracy results
# Note:  
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - 100ms/step - accuracy: 0.6000 - loss: 91.3360
Loss: 91.33601379394531, Accuracy: 0.6000000238418579


Note:  This model does not provide much assistance in predicting outcome.

Future work:  The glacier retreat response could be slow to respond.  Try
shifting the data.

Future work:  Vary the X data, removing some columns and leaving others




Step 4: Save and export the model to the file '`model_04.keras`.

Second Pass:  'model_04a.keras', Third Pass" 'model_04b.keras' etc.



In [34]:
# Set the model's file path
file_path = Path('saved_models/model_4a.keras')

# Export your model to a keras file
nn.save(file_path)

---
### Predict Glacier Retreat by Using your Neural Network Model

Step 1: Reload the saved model.

In [35]:
# Set the model's file path
file_path = Path('saved_models/model_4a.keras')

# Load the model to a new object
nn = tf.keras.models.load_model(file_path)

Step 2: Make predictions on the testing data and save the predictions to a DataFrame.

In [36]:
# Make predictions with the test data
predictions = nn.predict(X_test_scaled,verbose=2)

# Display a sample of the predictions
predictions[0:5]

1/1 - 0s - 41ms/step


array([[1.4659476e-01],
       [2.7745934e-11],
       [3.8412756e-01],
       [0.0000000e+00],
       [6.8043137e-01]], dtype=float32)

In [37]:
# Save the predictions to a DataFrame and round the predictions to binary results
predictions_df = pd.DataFrame(columns=["predictions"], data=predictions)
predictions_df["predictions"] = round(predictions_df["predictions"],0)
predictions_df

Unnamed: 0,predictions
0,0.0
1,0.0
2,0.0
3,0.0
4,1.0
5,1.0
6,1.0
7,0.0
8,0.0
9,0.0


### Step 4: Display a classification report with the y test data and predictions

In [38]:
# Print the classification report with the y test data and predictions
print(classification_report(y_test, predictions_df["predictions"].values))

              precision    recall  f1-score   support

           0       0.57      0.80      0.67         5
           1       0.67      0.40      0.50         5

    accuracy                           0.60        10
   macro avg       0.62      0.60      0.58        10
weighted avg       0.62      0.60      0.58        10

