Forest Cover Classification

A deep learning model to predict forest cover based on various cartographic features

In [2]:
#Importing necessary libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sweetviz as sv

import tensorflow as tf
from tensorflow import keras;
from keras import Sequential;
from keras import layers
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

dataset = pd.read_csv('cover_data.csv')
print(dataset.head())
print(dataset.info())

   Elevation  Aspect  Slope  Horizontal_Distance_To_Hydrology  \
0       2596      51      3                               258   
1       2590      56      2                               212   
2       2804     139      9                               268   
3       2785     155     18                               242   
4       2595      45      2                               153   

   Vertical_Distance_To_Hydrology  Horizontal_Distance_To_Roadways  \
0                               0                              510   
1                              -6                              390   
2                              65                             3180   
3                             118                             3090   
4                              -1                              391   

   Hillshade_9am  Hillshade_Noon  Hillshade_3pm  \
0            221             232            148   
1            220             235            151   
2            234             238   

Defining some helper methods to build and test the model

In [None]:
def build_model(num_features):
    '''
    Build and compile model
    Takes number of features to use as input dimensions
    Returns a keras model object
    '''
    classifier = Sequential()
    classifier.add(layers.Dense(64, input_dim=num_features, activation='relu'))
    classifier.add(layers.Dropout(0.3))
    classifier.add(layers.Dense(32, activation='relu'))
    classifier.add(layers.Dropout(0.3))
    classifier.add(layers.Dense(8, activation='softmax'))
    
    classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    return classifier

In [None]:
def plot_history(history, param):
    '''
    Plots model performance over number of epochs
    Depending on param passed, plots either accuracy or loss
    '''
    if param == 'accuracy':
        plt.plot(history.history['accuracy'])
        plt.plot(history.history['val_accuracy'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper left')
        plt.show()
    elif param == 'loss':
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.show()

In [None]:
def plot_heatmap(class_names, y_pred, y_test):
    '''
    Computes a confusion matrix an plots a heatmap based on the matrix
    '''
    cm = confusion_matrix(y_test, y_pred)
    fig, ax = plt.subplots(figsize=(15,15))
    heatmap = sns.heatmap(cm, fmt='g', cmap='Blues', annot=True, ax=ax)
    ax.set_xlabel('Predicted Class')
    ax.set_ylabel('True Class')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(class_names)
    ax.yaxis.set_ticklabels(class_names)
    