Forest Cover Classification

A deep learning model to predict forest cover based on various cartographic features

In [4]:
#Importing necessary libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sweetviz as sv

import tensorflow as tf
from tensorflow import keras;
from keras import Sequential;
from keras import layers
from keras.callbacks import EarlyStopping

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


Defining some helper methods to build and test the model

In [5]:
def build_model(num_features):
    '''
    Build and compile model
    Takes number of features to use as input dimensions
    Returns a keras model object
    '''
    classifier = Sequential()
    classifier.add(layers.Dense(64, input_dim=num_features, activation='relu'))
    classifier.add(layers.Dropout(0.3))
    classifier.add(layers.Dense(32, activation='relu'))
    classifier.add(layers.Dropout(0.3))
    classifier.add(layers.Dense(8, activation='softmax'))
    
    classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    return classifier

In [6]:
def plot_history(history, param):
    '''
    Plots model performance over number of epochs
    Depending on param passed, plots either accuracy or loss
    '''
    if param == 'accuracy':
        plt.plot(history.history['accuracy'])
        plt.plot(history.history['val_accuracy'])
        plt.title('model accuracy')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper left')
        plt.show()
    elif param == 'loss':
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'val'], loc='upper right')
        plt.show()

In [None]:
def plot_heatmap(class_names, y_pred, y_test):
    '''
    Computes a confusion matrix an plots a heatmap based on the matrix
    '''
    cm = confusion_matrix(y_test, y_pred)
    fig, ax = plt.subplots(figsize=(15,15))
    heatmap = sns.heatmap(cm, fmt='g', cmap='Blues', annot=True, ax=ax)
    ax.set_xlabel('Predicted Class')
    ax.set_ylabel('True Class')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(class_names)
    ax.yaxis.set_ticklabels(class_names)
    

With the helper functions, we can analyze the data and build and evaluate the model.

In [7]:
def main():
    dataset = pd.read_csv('cover_data.csv')

    #EDA
    report = sv.analyze(dataset)
    report.show_html()

    columns = dataset.columns.to_list()
    features, label = columns[:-1], columns[-1]
    
    raw_data = dataset.values
    X, y = raw_data[:, :-1], raw_data[:, -1]

    # Split into train and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)

    # normalize data
    scaler = StandardScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    #building model
    num_features = len(features)
    model = build_model(num_features)
    
    #model summary
    print("Model Summary:")
    model.summary()

    num_epochs = 100
    batch_size = 1024
    earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=3)
    history = model.fit(X_train_normalized, y_train, epochs=num_epochs, batch_size=batch_size, callbacks=[earlystop_callback], validation_split=0.1, verbose=1)

    plot_history(history, 'accuracy')
    plot_history(history, 'loss')

    score = model.evaluate(X_test_normalized, y_test, verbose=0)
    print(f'Test loss: {score[0]}')
    print(f'Test accuracy: {score[1]}')

    y_pred = model.predict(X_test_normalized)

    y_pred = np.argmax(y_pred, axis=1)
    class_names = ['Spruce/Fir', 'Lodgepole Pine',
                   'Ponderosa Pine', 'Cottonwood/Willow',
                   'Aspen', 'Douglas-fir', 'Krummholz']
    print(classification_report(y_test, y_pred, target_names=class_names))
    plot_heatmap(class_names, y_pred, y_test)

In [8]:
if __name__ == '__main__':
    main()

  all_source_names = [cur_name for cur_name, cur_series in source_df.iteritems()]
  filtered_series_names_in_source = [cur_name for cur_name, cur_series in source_df.iteritems()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  stats["mad"] = series.mad()
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for item in category_counts.iteritems():
  for 