<a href="https://colab.research.google.com/github/ssegovba/identifying-deforestation/blob/main/notebooks/cnn_custom_model_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CNN Model on Amazon Rainforest Data for Deforestation Tracking
*Santiago Segovia*

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.preprocessing import image
import os

from google.colab import drive

In [2]:
# Mount GDrive
drive.mount("/content/drive")

Mounted at /content/drive


## I. Data Pre-processing

### I.a Handling the Labels

We begin by loading the metadata that has the names of the labels:



In [3]:
data_path = "/content/drive/Shareddrives/computer-vision-project/Data/Unzipped/planet/planet/"
train_path = "/content/drive/Shareddrives/computer-vision-project/Data/Unzipped/planet/planet/train-jpg"
test_path = "/content/drive/Shareddrives/computer-vision-project/Data/Unzipped/planet/planet/test-jpg"

In [4]:
# Load the CSV file with the metadata
labels_df = pd.read_csv(data_path + 'train_classes.csv')

# Add extension so image names match file names
labels_df['image_name'] = labels_df['image_name'].apply(lambda x: x + '.jpg')

# Convert the space-separated tags into a list of tags
labels_df['tags'] = labels_df['tags'].apply(lambda x: x.split())

# Optional: Use a MultiLabelBinarizer for the tags if needed later for model training
mlb = MultiLabelBinarizer()
labels_df['encoded_tags'] = list(mlb.fit_transform(labels_df['tags']))

In [6]:
test_df = labels_df.head(10)
test_df

Unnamed: 0,image_name,tags,encoded_tags
0,train_0.jpg,"[haze, primary]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, ..."
1,train_1.jpg,"[agriculture, clear, primary, water]","[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,train_2.jpg,"[clear, primary]","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
3,train_3.jpg,"[clear, primary]","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,train_4.jpg,"[agriculture, clear, habitation, primary, road]","[1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, ..."
5,train_5.jpg,"[haze, primary, water]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, ..."
6,train_6.jpg,"[agriculture, clear, cultivation, primary, water]","[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, ..."
7,train_7.jpg,"[haze, primary]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, ..."
8,train_8.jpg,"[agriculture, clear, cultivation, primary]","[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, ..."
9,train_9.jpg,"[agriculture, clear, cultivation, primary, road]","[1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, ..."


### I.b Setting up `ImageDataGenerator`

Keras provides `ImageDataGenerator` for data augmentation and to streamline the process of loading and preprocessing images. Here, we will use it to normalize images and set up the training and test data generators.

In [None]:
# Initialize the ImageDataGenerator with any specific augmentations
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

We define a function to load individual images based on our `labels_df`:

In [12]:
def fetch_images_and_labels(dataset, directory, batch_size=32):  # New parameter: batch_size
    n = len(dataset)
    while True:
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            batch_images = []
            batch_labels = []
            for _, row in dataset.iloc[start:end].iterrows():
                file_path = os.path.join(directory, row['image_name'])
                img = image.load_img(file_path, target_size=(128, 128))
                img_array = image.img_to_array(img)
                batch_images.append(img_array)
                batch_labels.append(row['encoded_tags'])

            # Convert lists to Numpy arrays
            batch_images = np.array(batch_images)
            batch_labels = np.array(batch_labels)

            # Apply transformations
            yield train_datagen.flow(batch_images, batch_labels, batch_size=batch_size).next()

# Create a generator for training data
train_generator = fetch_images_and_labels(test_df, train_path, batch_size=32)

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define a simple CNN model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(len(mlb.classes_), activation='sigmoid')  # Output layer nodes equal to the number of labels
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the generator
model.fit(train_generator, steps_per_epoch=100, epochs=10)


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/Shareddrives/computer-vision-project/Data/Unzipped/planet/planet/train-jpg/train_8.jpg'