In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torchvision.models import vgg16, resnet50, ResNet50_Weights, VGG16_Weights
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, v2
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

import os
import pandas as pd
import numpy as np
from PIL import Image

import timm
from timm.data.loader import create_loader

#import torch.optim as optim
#import torch.nn.functional as F
#from torch.utils.data import DataLoader, Dataset, TensorDataset, Subset
#from torch.utils.tensorboard import SummaryWriter



Data Import and Processing

We want to read in the various data files and return a meaningful dataframe that can be fed into our classification model. Creating this dataframe will be done in three steps.

1 - establish all file paths needed

2 - crop images to the boundary of the aircraft and store cropped versions

3 - combine cropped images, image id, and label data into a dataframe

We will process the text files in batches by test, train, and validation. We'll then associate the cropped image files with the image data (labels, ID, etc.). The final step will be to create a dataframe of all the data so it can easily be used in our model. The input to this work is the various .txt and image files. The output is a dataframe with these columns: image, image id, manufacturer, family, variant. The manufacturer, family, and variant columns will be used as the labels for classification.

In [2]:
full_dataset_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data"
file_names = os.listdir(full_dataset_path)
#print(file_names)

In [3]:
#image paths
original_images_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images"
image_boxes_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_box.txt" 

#train data paths
train_ids_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_train.txt"
train_variant_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_variant_train.txt"
train_manufacturer_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_manufacturer_train.txt"
train_family_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_family_train.txt"

#test data paths
test_ids_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_test.txt"
test_variant_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_variant_test.txt"
test_manufacturer_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_manufacturer_test.txt"
test_family_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_family_test.txt"

#validate data paths
val_ids_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_val.txt"
val_variant_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_variant_val.txt"
val_manufacturer_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_manufacturer_val.txt"
val_family_path = "/Users/paigevauter/Documents/UM_MADS/SIADS699/fgvc-aircraft-2013b/data/images_family_val.txt"


In [4]:
def get_data_items(path): 
    """
    takes in a data path to a .txt file, reads in lines, and returns a list of lines
    
    args:
        path - file path to your .txt file
    """
    with open(path, "r") as file:
        lines = file.readlines()
        image_info = [line.strip() for line in lines]
        
        return image_info

In [5]:
def crop_images(original_image_folder, boundary_list):
    """
    opens images and crops them,
    returns a dict with image file name as the key, image as the value

    args:
        original_image_folder - folder where images are stored
        boundary_list - a list of the boundary box coordinates to crop images
    """
    img_boxes_list = get_data_items(boundary_list)
    all_img_box_dict = {}
    for img_box in img_boxes_list:
        box = img_box.split()[1:]
        img_id = img_box.split()[0]
        all_img_box_dict[img_id + ".jpg"] = tuple(map(int, box))
    #print(all_img_box_dict)

    images_dict = {}
    for image_file in os.listdir(original_image_folder):
        img_file_dict = {}
        img = Image.open(os.path.join(original_image_folder ,image_file))
        cropped_img = img.crop(all_img_box_dict[image_file])
        img_file_dict["file name"] = image_file
        img_file_dict["image"] = cropped_img
        
        images_dict[image_file.rstrip(".jpg")] = img_file_dict

    return images_dict

In [None]:
#Running this takes ~40 - 60s to get a dict of image dicts {'12345': {'file name': 12345.jpg, 'image': image}}
final_image_dict = crop_images(original_images_path, image_boxes_path)

print(len(final_image_dict))

10000


In [7]:
def add_image_labels(final_image_dict, label_list, split_label):
    """
    add additional data labels to the full image dictionary

    args: 
        final_image_dict - the final dict of all images we're building
        label_list - list of tuples for additional labels per image
        split_label - test, train, or validate
    """
    for img_tuple in label_list:
        if img_tuple[0] in final_image_dict:
            _entry = final_image_dict[img_tuple[0]]
            _entry.update({'split': split_label, 'manufacturer': img_tuple[1], 'family': img_tuple[2], 'variant': img_tuple[3]})

            final_image_dict[img_tuple[0]] = _entry

    return final_image_dict

In [8]:
def remove_leading_id(list_of_strings):
    """
    This function takes in a list of strings and removes the first word from each. For our dataset this is helpful because
    many of the labels come with the image id pre-apprended at the start and we don't need this. This function returns
    an updated list of strings without the first word (or image id).

    args:
        list_of_strings - a list of strings you want to edit
    """
    new_strings = []
    for st in list_of_strings:
        new_strings.append(" ".join(st.split(" ")[1:]))
    
    return(new_strings)

In [9]:
# training data: grab image info across each doc and return in a list of tuples
# each list item is a tuple of (image ID, manufacturer, family, variant)
train_ids = get_data_items(train_ids_path)
train_mans = remove_leading_id(get_data_items(train_manufacturer_path))
train_fams = remove_leading_id(get_data_items(train_family_path))
train_vars = remove_leading_id(get_data_items(train_variant_path))

zipped_train_images = zip(train_ids, train_mans, train_fams, train_vars)

train_all = list(zipped_train_images)
print(train_all)

[('1025794', 'Boeing', 'Boeing 707', '707-320'), ('1340192', 'Boeing', 'Boeing 707', '707-320'), ('0056978', 'Boeing', 'Boeing 707', '707-320'), ('0698580', 'Boeing', 'Boeing 707', '707-320'), ('0450014', 'Boeing', 'Boeing 707', '707-320'), ('1042824', 'Boeing', 'Boeing 707', '707-320'), ('0894380', 'Boeing', 'Boeing 707', '707-320'), ('1427680', 'Boeing', 'Boeing 707', '707-320'), ('0817494', 'Boeing', 'Boeing 707', '707-320'), ('0716386', 'Boeing', 'Boeing 707', '707-320'), ('0951982', 'Boeing', 'Boeing 707', '707-320'), ('0731614', 'Boeing', 'Boeing 707', '707-320'), ('0582363', 'Boeing', 'Boeing 707', '707-320'), ('1082409', 'Boeing', 'Boeing 707', '707-320'), ('2031775', 'Boeing', 'Boeing 707', '707-320'), ('0950991', 'Boeing', 'Boeing 707', '707-320'), ('0869722', 'Boeing', 'Boeing 707', '707-320'), ('0979376', 'Boeing', 'Boeing 707', '707-320'), ('1002439', 'Boeing', 'Boeing 707', '707-320'), ('0864665', 'Boeing', 'Boeing 707', '707-320'), ('1207591', 'Boeing', 'Boeing 707', '70

In [10]:
# testing data: grab image info across each doc and return in a list of tuples
# each list item is a tuple of (image ID, manufacturer, family, variant)
test_ids = get_data_items(test_ids_path)
test_mans = remove_leading_id(get_data_items(test_manufacturer_path))
test_fams = remove_leading_id(get_data_items(test_family_path))
test_vars = remove_leading_id(get_data_items(test_variant_path))

zipped_test_images = zip(test_ids, test_mans, test_fams, test_vars)

test_all = list(zipped_test_images)
#print(test_all)

In [11]:
# validation data: grab image info across each doc and return in a list of tuples
# each list item is a tuple of (image ID, manufacturer, family, variant)

val_ids = get_data_items(val_ids_path)
val_mans = remove_leading_id(get_data_items(val_manufacturer_path))
val_fams = remove_leading_id(get_data_items(val_family_path))
val_vars = remove_leading_id(get_data_items(val_variant_path))

zipped_val_images = zip(val_ids, val_mans, val_fams, val_vars)

val_all = list(zipped_val_images)
#print(val_all)

In [12]:
#update the dataframe with necessary data labels we created for each data type
update_with_train = add_image_labels(final_image_dict, train_all, 'train')
update_with_test = add_image_labels(update_with_train, test_all, 'test')
update_with_validate = add_image_labels(update_with_test, val_all, 'validate')

print(len(update_with_validate))

10000


In [13]:
#dataframe creation
image_df = pd.DataFrame.from_dict(update_with_validate, orient='index')
image_df.dtypes
image_df.head()

Unnamed: 0,file name,image,split,manufacturer,family,variant
1376762,1376762.jpg,<PIL.Image.Image image mode=RGB size=1183x312 ...,test,Embraer,Embraer Legacy 600,Embraer Legacy 600
813373,0813373.jpg,<PIL.Image.Image image mode=RGB size=989x227 a...,test,McDonnell Douglas,DC-9,DC-9-30
1398863,1398863.jpg,<PIL.Image.Image image mode=RGB size=1263x396 ...,train,Boeing,Boeing 737,737-400
2188154,2188154.jpg,<PIL.Image.Image image mode=RGB size=1179x453 ...,train,Boeing,Boeing 737,737-700
1668973,1668973.jpg,<PIL.Image.Image image mode=RGB size=995x250 a...,validate,Airbus,A330,A330-200


In [14]:
#check for any nan values
image_df.isnull().sum()

file name       0
image           0
split           0
manufacturer    0
family          0
variant         0
dtype: int64

In [15]:
#verify counts of data split
image_df['split'].value_counts()

split
train       3334
test        3333
validate    3333
Name: count, dtype: int64

Now that we have our dataframe, we need it in the right format for torch. The next few cells will prep the data.
First we'll encode the labels. Then we'll build a function that returns each image and it's label. Finally, we'll create data transformations and create the PyTorch tensors and data loader.

In [16]:
label_encoder = LabelEncoder()
image_df['manu_encoded'] = label_encoder.fit_transform(image_df['manufacturer'])
image_df['fam_encoded'] = label_encoder.fit_transform(image_df['family'])
image_df['var_encoded'] = label_encoder.fit_transform(image_df['variant'])

image_df.head()
#test - 0, train - 1, validate - 2

Unnamed: 0,file name,image,split,manufacturer,family,variant,manu_encoded,fam_encoded,var_encoded
1376762,1376762.jpg,<PIL.Image.Image image mode=RGB size=1183x312 ...,test,Embraer,Embraer Legacy 600,Embraer Legacy 600,13,42,70
813373,0813373.jpg,<PIL.Image.Image image mode=RGB size=989x227 a...,test,McDonnell Douglas,DC-9,DC-9-30,21,32,56
1398863,1398863.jpg,<PIL.Image.Image image mode=RGB size=1263x396 ...,train,Boeing,Boeing 737,737-400,4,15,4
2188154,2188154.jpg,<PIL.Image.Image image mode=RGB size=1179x453 ...,train,Boeing,Boeing 737,737-700,4,15,7
1668973,1668973.jpg,<PIL.Image.Image image mode=RGB size=995x250 a...,validate,Airbus,A330,A330-200,1,3,27


In [23]:
class ImageDataset(Dataset):
    """
    User defined class to build a datset using Pytorch class Dataset.

    args:
        Dataset - dataframe of images
    """
    def __init__(self, dataframe, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem(self, idx):
        image = self.df.loc[idx, "image"]
        label = self.df.loc[idx, "manu_encoded"]

        if self.transform:
            image = self.transform(image)

        return image, label

In [26]:
transform_train = transforms.Compose([transforms.Resize((224, 224)),
                                       transforms.ToTensor()])

transform_testval = transforms.Compose([transforms.Resize((224, 224)),
                                       transforms.ToTensor()])

In [27]:
batch_size=256

train_df = image_df.loc[image_df["split"] == "train"]
test_df = image_df.loc[image_df["split"] == "test"]
val_df = image_df.loc[image_df["split"] == "validate"]

train_set = ImageDataset(train_df, transform=transform_train)
test_set = ImageDataset(test_df, transform=transform_testval)
val_set = ImageDataset(val_df, transform=transform_testval)

train_loader = DataLoader(train_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)
val_loader = DataLoader(val_set, batch_size=batch_size)