In [1]:
import tensorflow as tf
import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
# -------- CONFIG --------
IMG_SIZE = (1600, 3200)
BATCH_SIZE = 8  # tune this based on RAM/GPU
EPOCHS = 10

# -------- PATHS -------- (update for local paths)
train_csv = "train.csv"
val_csv = "valid.csv"
test_csv = "test.csv"

train_dir = r"C:\Users\vance\Desktop\project_folder\p3\USD.v3i.coco\train"
val_dir = r"C:\Users\vance\Desktop\project_folder\p3\USD.v3i.coco\valid"
test_dir = r"C:\Users\vance\Desktop\project_folder\p3\USD.v3i.coco\test"

AUTOTUNE = tf.data.AUTOTUNE


# -------- READ DATAFRAME --------
def get_dataset_from_csv(csv_file, image_dir):
    df = pd.read_csv(csv_file)
    df['file_path'] = df['file_name'].apply(lambda x: os.path.join(image_dir, x))
    file_paths = df['file_path'].values
    labels = df['category_id'].values
    return file_paths, labels


# -------- PREPROCESS FUNCTION --------
def process_path(file_path, label):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    img = img / 255.0
    return img, label


# -------- BUILD TF.DATA PIPELINE --------
def build_dataset(file_paths, labels, is_training=True):
    ds = tf.data.Dataset.from_tensor_slices((file_paths, labels))
    ds = ds.map(process_path, num_parallel_calls=AUTOTUNE)
    if is_training:
        ds = ds.shuffle(1000)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds


# -------- LOAD DATASETS --------
train_files, train_labels = get_dataset_from_csv(train_csv, train_dir)
val_files, val_labels = get_dataset_from_csv(val_csv, val_dir)
test_files, test_labels = get_dataset_from_csv(test_csv, test_dir)

num_classes = len(set(train_labels))

train_ds = build_dataset(train_files, train_labels, is_training=True)
val_ds = build_dataset(val_files, val_labels, is_training=False)
test_ds = build_dataset(test_files, test_labels, is_training=False)


In [3]:
print(f"Training samples: {len(train_files)}")
print(f"Validation samples: {len(val_files)}")
print(f"Test samples: {len(test_files)}")

Training samples: 1535
Validation samples: 435
Test samples: 204


In [None]:
# Check a batch from the training dataset
for images, labels in train_ds.take(1):  # This will take just 1 batch
    print(f"Batch shape: {images.shape}")  # Should be (BATCH_SIZE, IMG_SIZE[0], IMG_SIZE[1], 3)
    print(f"Labels: {labels.numpy()}")  # Should be a batch of labels (BATCH_SIZE,)
    
# Check a batch from the validation dataset
for images, labels in val_ds.take(1):
    print(f"Batch shape: {images.shape}")
    print(f"Labels: {labels.numpy()}")

# Check a batch from the test dataset
for images, labels in test_ds.take(1):
    print(f"Batch shape: {images.shape}")
    print(f"Labels: {labels.numpy()}")


In [None]:
def visualize_samples(dataset, num_samples=5):
    plt.figure(figsize=(10, 10))
    for i, (images, labels) in enumerate(dataset.take(1)):  # Only take the first batch
        for j in range(min(num_samples, BATCH_SIZE)):
            plt.subplot(1, num_samples, j + 1)
            plt.imshow(images[j])  # Display the image
            plt.title(f"Label: {labels[j].numpy()}")
            plt.axis("off")
    plt.show()

# Visualize some images from the training set
visualize_samples(train_ds)

# Visualize some images from the validation set
visualize_samples(val_ds)

# Visualize some images from the test set
visualize_samples(test_ds)


In [None]:
# Check the shape of the first batch
for images, labels in train_ds.take(1):
    print(f"Image batch shape: {images.shape}")  # Should be (BATCH_SIZE, 1600, 3200, 3)
    print(f"Labels batch shape: {labels.shape}")  # Should be (BATCH_SIZE,)


In [None]:
# -------- BUILD MODEL -------- (change the layers!)
# pre-this step- feed image data to NN train_ds, val_ds, test_ds

# IMG_SIZE = (1600, 3200)
# multiple image convolutions, subsampling
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])
# fully connected layer final

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


# -------- TRAIN --------
model.fit(train_ds,
          validation_data=val_ds,
          epochs=EPOCHS)

# -------- EVALUATE --------
loss, acc = model.evaluate(test_ds)
print(f"\n Test accuracy: {acc:.4f}")
# get output

In [None]:
# lighter-weight, alternative model

# also update in config:
if use_fast_model:
    IMG_SIZE = (400, 800)
    BATCH_SIZE = 32 

# IMG_SIZE = (400, 800)
model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    
    tf.keras.layers.Conv2D(8, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    
    tf.keras.layers.Dense(num_classes, activation='softmax')
])
# rest runs the same

In [None]:
print('Test loss:', test_eval[0])
print('Test accuracy:', test_eval[1])


In [None]:
accuracy = fashion_train.history['acc']
val_accuracy = fashion_train.history['val_acc']
loss = fashion_train.history['loss']
val_loss = fashion_train.history['val_loss']
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
