# Welcome  

Notebook Author: Samuel Alter  
Notebook Subject: Capstone Project - Image Analysis

BrainStation Winter 2023: Data Science

This notebook is for running my satellite images through deep learning networks. The images have been pre-processed in a separate notebook.

The goal is to find a pre-trained CNN to adapt to our specific use-case of finding insights about wildfire-prone landscapes.

## Imports

In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Split all 7760 images in to `train` and `test` splits

In [146]:
# create list with each image's number
# there are 7760 images each in the fire and nofire datasets
file_nums=np.arange(0,7760,1)

# create variables for the train and valids sizes
train_size=0.7
# test_size=1-train_size

# create train and test splits
trains=np.random.choice(file_nums,
                        size=(int(7760*train_size)),
                        replace=False)

valids=list(set(file_nums)-set(trains))

# sanity checks
print(len(trains))
print(len(valids))
print(f'The size of train and valids together is \
{len(trains)+len(valids)}')
print(set(trains) & set(valids))

# good, that all matches

5432
2328
The size of train and tests together is 7760
set()


In [147]:
# convert list of integers to list of strings
# important for moving files in next step
trains=[str(i) for i in trains]
valids=[str(i) for i in valids]

type(trains[0])

str

#### Run these cells to move the images to their corresponding training and validation locations

In [143]:
# # fire
# # train
# # items to move: 5432

# import os
# import shutil

# def copy_files_by_number(source, dest, trains):
#     """
#     Copies files from `source` to `dest` that have numbers in their filename
#     that match any element in `trains` list.
#     """
#     for filename in os.listdir(source):
#         # Get the number in the filename
#         file_num = "".join(filter(str.isdigit, filename))
#         # Check if the number is in the trains list
#         if file_num in trains:
#             # Copy the file to the destination folder
#             shutil.copy(os.path.join(source, filename), dest)

# source = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/patch_fire'
# dest = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/train/fire'

# copy_files_by_number(source=source,dest=dest,trains=trains)

In [148]:
# # fire
# # valids
# # items to move: 2328

# import os
# import shutil

# def copy_files_by_number(source, dest, valids):
#     """
#     Copies files from `source` to `dest` that have numbers in their filename
#     that match any element in `valids` list.
#     """
#     for filename in os.listdir(source):
#         # Get the number in the filename
#         file_num = "".join(filter(str.isdigit, filename))
#         # Check if the number is in the valids list
#         if file_num in valids:
#             # Copy the file to the destination folder
#             shutil.copy(os.path.join(source, filename), dest)

# source = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/patch_fire'
# dest = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/validation/fire'

# copy_files_by_number(source=source,dest=dest,valids=valids)

In [144]:
# # nofire
# # train
# # items to move: 5432

# import os
# import shutil

# def copy_files_by_number(source, dest, trains):
#     """
#     Copies files from `source` to `dest` that have numbers in their filename
#     that match any element in `trains` list.
#     """
#     for filename in os.listdir(source):
#         # Get the number in the filename
#         file_num = "".join(filter(str.isdigit, filename))
#         # Check if the number is in the trains list
#         if file_num in trains:
#             # Copy the file to the destination folder
#             shutil.copy(os.path.join(source, filename), dest)

# source = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/patch_nofire'
# dest = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/train/nofire'

# copy_files_by_number(source=source,dest=dest,trains=trains)

In [149]:
# # nofire
# # valids
# # items to move: 2328

# import os
# import shutil

# def copy_files_by_number(source, dest, valids):
#     """
#     Copies files from `source` to `dest` that have numbers in their filename
#     that match any element in `valids` list.
#     """
#     for filename in os.listdir(source):
#         # Get the number in the filename
#         file_num = "".join(filter(str.isdigit, filename))
#         # Check if the number is in the valids list
#         if file_num in valids:
#             # Copy the file to the destination folder
#             shutil.copy(os.path.join(source, filename), dest)

# source = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/patch_nofire'
# dest = '/Users/sra/Desktop/Data_Science_2023/_capstone/00_capstone_data/orthoimagery/patches/validation/nofire'

# copy_files_by_number(source=source,dest=dest,valids=valids)

### Basic `ImageDataGenerator`

In [4]:
# adapted from
# https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator

# import `ImageDataGenerator` to help facilitate 
# loading images directly from our computer
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Specify the dimensions for the target preprocess image size
height = 128 
width = 128
channels = 3

# instantiate training image data generator
train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation') # set as validation data

model.fit_generator(
    train_generator,
    steps_per_epoch = train_generator.samples // batch_size,
    validation_data = validation_generator, 
    validation_steps = validation_generator.samples // batch_size,
    epochs = nb_epochs)

In [4]:
# # Create validation image data generator
# # Only apply rescaling to our validation data
# validation_datagen = ImageDataGenerator(rescale=1./255)

# # Specify where the images should be loaded from,
# # as well as some additional attributes:
# train_generator=train_datagen.flow_from_directory('/Users/psmith/data_sources/images/monkeys/train/',
#                                                  target_size=(height,width),
#                                                  color_mode='rgb',
#                                                  batch_size=32,
#                                                  class_mode='categorical')

# validation_generator=validation_datagen.flow_from_directory('/Users/psmith/data_sources/images/monkeys/validation/',
#                                                  target_size=(height,width),
#                                                  color_mode='rgb',
#                                                  batch_size=32,
#                                                  class_mode='categorical')

FileNotFoundError: [Errno 2] No such file or directory: '/Users/psmith/data_sources/images/monkeys/train/'

### `VGG19`

In [5]:
from tensorflow.keras.applications import VGG19

height = 128 
width = 128
channels = 3

res_model = VGG19(weights='imagenet',
                   include_top=False,
                   input_shape=(height,width,channels))
res_model.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 128, 128, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 128, 128, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 64, 64, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 64, 64, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 64, 64, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 32, 32, 128)       0     

In [6]:
# Freeze all the layers in the base model
for layer in res_model.layers:
    layer.trainable = False

### `ResNet50`