In [24]:
import os
from glob import glob
import shutil

from PIL import Image 

from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image

import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline

## Data Preprocessing

### Split Data into Train and Test Sets

In [25]:
# Convert .JPG and .jpg files to .jpeg
# This assumes that image files and xml annotations are in the same folder named "images"

path = "/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/images"
for file in os.listdir(path):
    if file.endswith(".JPG") or file.endswith(".jpg"):
        img = Image.open(path + "/" + file)
        file_name, file_ext = os.path.splitext(file)
        new_name = file_name + ".jpeg"
        img.save(path + "/" + new_name)
        
# Delete the .JPG and.jpg files

for file in os.listdir(path):
    if file.endswith(".JPG") or file.endswith(".jpg"):
        path_to_file = os.path.join("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/images/", file)
        os.remove(path_to_file)

In [26]:
# Generate a list of the image files
image_files = glob("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/images/*.jpeg")

# Strip the extensions
image_names = [name.replace(".jpeg","") for name in image_files]

# Split into train and test
train_names, test_names = train_test_split(image_names, test_size=0.2)

def batch_move_files(file_list, source_path, destination_path):
    """Moves the jpeg and xml file pairs to destination path from source path"""
    
    for file in file_list:
        image = file + ".jpeg"
        xml = file + ".xml"
        shutil.move(os.path.join(source_path, image), destination_path)
        shutil.move(os.path.join(source_path, xml), destination_path)
    return

# Move the files - assumes empty train and test directories at the images directory level

source_dir = "/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/images/"
test_dir = "/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test/"
train_dir = "/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train/"
batch_move_files(test_names, source_dir, test_dir)
batch_move_files(train_names, source_dir, train_dir)

### Generate CSV Files from XML

In [27]:
%run /Users/johngalvin/Desktop/GitHub/Lucy_Detector/scripts/xml_to_csv.py

Successfully converted xml to csv.


In [28]:
# Change .JPG and .jpg to .jpeg within csv files

train_df = pd.read_csv("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train_labels.csv")
train_df["filename"] = train_df["filename"].str.replace(".jpg", ".jpeg")
train_df["filename"] = train_df["filename"].str.replace(".JPG", ".jpeg")
train_df.to_csv("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train_labels.csv")

test_df = pd.read_csv("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test_labels.csv")
test_df["filename"] = test_df["filename"].str.replace(".jpg", ".jpeg")
test_df["filename"] = test_df["filename"].str.replace(".JPG", ".jpeg")
test_df.to_csv("/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test_labels.csv")

  train_df["filename"] = train_df["filename"].str.replace(".jpg", ".jpeg")
  train_df["filename"] = train_df["filename"].str.replace(".JPG", ".jpeg")
  test_df["filename"] = test_df["filename"].str.replace(".jpg", ".jpeg")
  test_df["filename"] = test_df["filename"].str.replace(".JPG", ".jpeg")


### Generate the TFRecords

In [29]:
%cd /Users/johngalvin/Desktop/GitHub/Lucy_Detector/
!python generate_tfrecord.py --csv_input=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train_labels.csv --output_path=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train.record --image_dir=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train/
!python generate_tfrecord.py --csv_input=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test_labels.csv --output_path=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test.record --image_dir=/Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test/

/Users/johngalvin/Desktop/GitHub/Lucy_Detector
Successfully created the TFRecords: /Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/train.record
Successfully created the TFRecords: /Users/johngalvin/Desktop/GitHub/Lucy_Detector/data/test.record


### Instantiate the Model

In [None]:
# Create the base model

base_model = ResNet50(include_top=False,
                      weights="imagenet",
                      classifier_activation="softmax",
                      input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))

# Freeze layers

for layer in base_model.layers[:]:
    layer.trainable=False

# Create the custom model

input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
custom_model = base_model(input)
custom_model = GlobalAveragePooling2D()(custom_model)
custom_model = Dense(64, activation="relu")(custom_model)
custom_model = Dropout(0.5)(custom_model)
predictions = Dense(NUM_CLASSES, activation="softmax")(custom_model)
Model(inputs=input, outputs=predictions)

# Compile the model

model = custom_model
model.compile(loss="categorical_crossentropy",
             optimizer=tf.train.Adam(lr=0.01),
             metrics=["IoU"])