In [3]:
import os
import requests
from io import BytesIO
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Step 1: Define GitHub Repo & Folder Path
GITHUB_REPO = "prattapong/Commercial-Airplane-Model-Image-Classification"
GITHUB_FOLDER = "images"
GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{GITHUB_FOLDER}"

# Step 2: Fetch Image URLs Automatically
def get_image_urls():
    response = requests.get(GITHUB_API_URL)
    if response.status_code != 200:
        raise Exception(f"Error fetching images: {response.json()}")

    image_urls = {}
    for folder in response.json():
        if folder["type"] == "dir":  # Ensure it's a folder (A350, B787, A320)
            class_name = folder["name"]
            image_urls[class_name] = []
            folder_url = folder["url"]

            # Fetch image files in each class folder
            folder_response = requests.get(folder_url)
            if folder_response.status_code == 200:
                for file in folder_response.json():
                    if file["name"].lower().endswith((".jpg", ".jpeg", ".png")):
                        image_urls[class_name].append(file["download_url"])

    return image_urls

# Step 3: Load Images Using Image.open()
def load_images(image_urls):
    IMG_SIZE = (224, 224)  # Resize all images to 224x224
    X, y = [], []

    for label, urls in image_urls.items():
        for url in urls:
            try:
                response = requests.get(url)
                img = Image.open(BytesIO(response.content)).convert("RGB")  # Load image
                img = img.resize(IMG_SIZE)  # Resize
                X.append(np.array(img) / 255.0)  # Normalize
                y.append(label)
            except Exception as e:
                print(f"Error loading {url}: {e}")

    return np.array(X), pd.Categorical(y).codes  # Convert labels to numeric

# Step 4: Data Augmentation
def augment_data(X_train):
    # Create an ImageDataGenerator for augmentation
    datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode="nearest"
    )

    # Fit the generator to the training data
    datagen.fit(X_train)

    return datagen

# Step 5: Train Model Using ResNet50 with Augmentation
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

    # Freeze pretrained layers
    for layer in base_model.layers:
        layer.trainable = False

    # Add classification layers
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(len(set(y)), activation="softmax")(x)  # Output layer for classification

    model = Model(inputs=base_model.input, outputs=x)

    # Build the model
    model.build((None, 224, 224, 3))  # Build the model with the input shape

    # Compile model
    model.compile(optimizer=Adam(learning_rate=0.0001),
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

    # Perform data augmentation
    datagen = augment_data(X_train)

    # Set up ModelCheckpoint to save the best model
    checkpoint = ModelCheckpoint('best_airplane_model.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

    # Train model using augmented data
    model.fit(datagen.flow(X_train, y_train, batch_size=32), validation_data=(X_test, y_test), epochs=10, callbacks=[checkpoint])

    # Load and print the best model
    best_model = tf.keras.models.load_model('best_airplane_model.h5')
    print("Best model saved as 'best_airplane_model.h5'")

    return best_model

# Run the process
image_urls = get_image_urls()  # Automatically fetch image URLs
X, y = load_images(image_urls)  # Load images using Image.open()
best_model = train_model(X, y)  # Train deep learning model with data augmentation and save best model


Exception: Error fetching images: {'message': 'Not Found', 'documentation_url': 'https://docs.github.com/rest/repos/contents#get-repository-content', 'status': '404'}