In [22]:
%matplotlib inline
import numpy as np 
import seaborn as sns
# for reproducibility
from numpy.random import seed
seed(42)
import tensorflow as tf
tf.random.set_seed(42)
rseed = np.random.RandomState(0)

import matplotlib.pyplot as plt      # MATLAB like plotting routines
import random                        # for generating random numbers
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split # some helper from scikit for data split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Activation, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


from sklearn.preprocessing import MinMaxScaler,StandardScaler



#get rid of annoying GPU warnings (and others)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


In [23]:
# Load the Covertype dataset
data = fetch_covtype()

# Extract the feature data (excluding target labels)
features = data.data

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the features and transform the data
cov_type = scaler.fit_transform(features)

X_train, X_test, y_train, y_test = train_test_split(cov_type, data.target, test_size=0.2, random_state=rseed)

print(f"X_train's shape is {X_train.shape}")
print(f"y_train's shape is {y_train.shape}")
print(f"X_test's shape is {X_test.shape}")
print(f"y_test's shape is {y_test.shape}")



X_train's shape is (464809, 54)
y_train's shape is (464809,)
X_test's shape is (116203, 54)
y_test's shape is (116203,)


In [24]:
# I got tis information online about coverType
# I will include the files when i Submit

""" 10.	Class distribution:

           Number of records of Spruce-Fir:                211840 
           Number of records of Lodgepole Pine:            283301 
           Number of records of Ponderosa Pine:             35754 
           Number of records of Cottonwood/Willow:           2747 
           Number of records of Aspen:                       9493 
           Number of records of Douglas-fir:                17367 
           Number of records of Krummholz:                  20510  
           Number of records of other:                          0  
		
           Total records:                                  581012

===================================================================== 

"""

class_names = [
    "Spruce/Fir",
    "Lodgepole Pine",
    "Ponderosa Pine",
    "Cottonwood/Willow",
    "Aspen",
    "Douglas-fir",
    "Krummholz"
]


In [25]:
#since we will be using a sequencial, we will have to add a channel
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=rseed)

#One hot encoding the labels
y_train = tf.one_hot(y_train - 1, depth=7, dtype=tf.int32)
y_test = tf.one_hot(y_test - 1, depth=7, dtype=tf.int32)
y_val = tf.one_hot(y_val - 1, depth=7, dtype=tf.int32)

print("New shapes\n")
print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)
print("X_val shape", X_val.shape)
print("y_val shape", y_val.shape)
print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)




New shapes

X_train shape (371847, 54)
y_train shape (371847, 7)
X_val shape (92962, 54)
y_val shape (92962, 7)
X_test shape (116203, 54)
y_test shape (116203, 7)


In [26]:
# Define the model
model = Sequential()

# Input layer
# Convolutional layer
model.add(Conv1D(filters=64, kernel_size=(3), padding='same', strides=(1), input_shape=(54,1)))
model.add(Activation('relu'))

# Hidden layers 1
model.add(Conv1D(filters=32, kernel_size=(3), padding='same', strides=(1)))
model.add(MaxPooling1D(pool_size=2))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

#Fully connected layer
model.add(Flatten())
model.add(Dense(512))       
model.add(Activation('relu'))

# Output layer
model.add(Dense(7))  # we have 7 classes
model.add(Activation('softmax'))  # Softmax activation for multi-class classification

# Compile the model
optimizer=Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()


  super().__init__(


In [27]:
# we will also perform this in batch
batch_sz = 128
num_epochs = 10

model.fit(X_train, y_train, 
          batch_size=batch_sz, 
          epochs=num_epochs, 
          validation_data=(X_val, y_val), verbose=1)

print('------------------------------------------------score----------------------------------------')
score = model.evaluate(X_val, y_val)
print('Val score:', score[0])
print('Val accuracy:', score[1])


Epoch 1/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.7377 - loss: 0.6180 - val_accuracy: 0.8010 - val_loss: 0.4653
Epoch 2/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.8103 - loss: 0.4448 - val_accuracy: 0.8194 - val_loss: 0.4217
Epoch 3/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8353 - loss: 0.3895 - val_accuracy: 0.8453 - val_loss: 0.3687
Epoch 4/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8501 - loss: 0.3565 - val_accuracy: 0.8573 - val_loss: 0.3427
Epoch 5/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8593 - loss: 0.3340 - val_accuracy: 0.8639 - val_loss: 0.3269
Epoch 6/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8679 - loss: 0.3160 - val_accuracy: 0.8703 - val_loss: 0.3147
Epoch 7/10
[1m

### Maximize accuracy on the unseen test set

In [28]:
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)

# Train the model
history = model.fit(X_train, y_train, 
                    batch_size=batch_sz, 
                    epochs=num_epochs, 
                    validation_data=(X_val, y_val), 
                    callbacks=[early_stopping, reduce_lr],
                    verbose=1)



Epoch 1/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8905 - loss: 0.2653 - val_accuracy: 0.8857 - val_loss: 0.2816 - learning_rate: 0.0010
Epoch 2/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.8936 - loss: 0.2586 - val_accuracy: 0.8873 - val_loss: 0.2787 - learning_rate: 0.0010
Epoch 3/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8959 - loss: 0.2530 - val_accuracy: 0.8902 - val_loss: 0.2739 - learning_rate: 0.0010
Epoch 4/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8985 - loss: 0.2475 - val_accuracy: 0.8914 - val_loss: 0.2701 - learning_rate: 0.0010
Epoch 5/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9005 - loss: 0.2427 - val_accuracy: 0.8938 - val_loss: 0.2654 - learning_rate: 0.0010
Epoch 6/10
[1m2906/2906[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [29]:
#This is the score for the model on the test data after Maximisation
score = model.evaluate(X_test, y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])

[1m3632/3632[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 909us/step - accuracy: 0.8996 - loss: 0.2498
Test score: 0.2521664798259735
Test accuracy: 0.8983761072158813
