# W281 Final Project: Intel Image Classification Model #

In [15]:
import numpy as np
import pandas as pd
import os
import warnings
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from collections import Counter

from PIL import Image
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout,BatchNormalization,MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix

In [2]:
!pwd

/Users/zdbrown13/w281/w281_Final_Project_Brown_Benzoni_Olaya


In [3]:
def load_images(directory, num_files=5):
    images = []
    file_list = os.listdir(directory)[:num_files]  # Load only the first 5 files

    for filename in tqdm(file_list, desc=f"Loading images from {directory}"):
        img_path = os.path.join(directory, filename)
        img = Image.open(img_path)
        img = img.resize((150, 150))  # Resize image to 150 x 150
        images.append(img)

    return images

In [4]:
# Image Paths

buildings_train = './seg_train/buildings'
forest_train = './seg_train/forest'
glacier_train = './seg_train/glacier'
mountain_train = './seg_train/mountain'
sea_train = './seg_train/sea'
street_train = './seg_train/street'

buildings_test = './seg_test/buildings'
forest_test = './seg_test/forest'
glacier_test = './seg_test/glacier'
mountain_test = './seg_test/mountain'
sea_test = './seg_test/sea'
street_test = './seg_test/street'

In [5]:
# Load images from each category
buildings_img = load_images(buildings_train)
forest_img = load_images(forest_train)
glacier_img = load_images(glacier_train)
mountain_img = load_images(mountain_train)
sea_img = load_images(sea_train)
street_img = load_images(street_train)

Loading images from ./seg_train/buildings: 100%|██████████| 5/5 [00:00<00:00, 325.65it/s]
Loading images from ./seg_train/forest: 100%|██████████| 5/5 [00:00<00:00, 745.04it/s]
Loading images from ./seg_train/glacier: 100%|██████████| 5/5 [00:00<00:00, 731.86it/s]
Loading images from ./seg_train/mountain: 100%|██████████| 5/5 [00:00<00:00, 935.60it/s]
Loading images from ./seg_train/sea: 100%|██████████| 5/5 [00:00<00:00, 790.63it/s]
Loading images from ./seg_train/street: 100%|██████████| 5/5 [00:00<00:00, 712.66it/s]


In [6]:
buildings_img

[<PIL.Image.Image image mode=RGB size=150x150>,
 <PIL.Image.Image image mode=RGB size=150x150>,
 <PIL.Image.Image image mode=RGB size=150x150>,
 <PIL.Image.Image image mode=RGB size=150x150>,
 <PIL.Image.Image image mode=RGB size=150x150>]

## Load Data ##

In [9]:
def load_data(datasets):
    
    output = []

    for dataset in datasets:
        images, labels = [], []
        print(f"Loading {dataset}...")
        i = 0
        for folder in os.listdir(dataset):
            label = i # Converting word labels to int (i.e. buildings = 0)
            i = i+1
            folder_path = os.path.join(dataset, folder)

            for file in tqdm(os.listdir(folder_path), desc=f"Processing {folder}"):
                img_path = os.path.join(folder_path, file)

                image = Image.open(img_path).resize((150, 150))
                images.append(image)
                labels.append(label)
                
        images = np.stack(images)
        labels = np.array(labels, dtype='int32')
        output.append((images, labels))
        
    return output

In [10]:
datasets = ['/Users/zdbrown13/w281/w281_Final_Project_Brown_Benzoni_Olaya/seg_train', 
        '/Users/zdbrown13/w281/w281_Final_Project_Brown_Benzoni_Olaya/seg_test']

(train_images, train_labels), (test_images, test_labels) = load_data(datasets)

Loading /Users/zdbrown13/w281/w281_Final_Project_Brown_Benzoni_Olaya/seg_train...


Processing forest: 100%|██████████| 2271/2271 [00:01<00:00, 1179.29it/s]
Processing buildings: 100%|██████████| 2191/2191 [00:01<00:00, 1359.23it/s]
Processing glacier: 100%|██████████| 2404/2404 [00:01<00:00, 1372.44it/s]
Processing street: 100%|██████████| 2382/2382 [00:01<00:00, 1329.97it/s]
Processing mountain: 100%|██████████| 2512/2512 [00:01<00:00, 1470.92it/s]
Processing sea: 100%|██████████| 2274/2274 [00:01<00:00, 1416.80it/s]


Loading /Users/zdbrown13/w281/w281_Final_Project_Brown_Benzoni_Olaya/seg_test...


Processing forest: 100%|██████████| 474/474 [00:00<00:00, 1039.66it/s]
Processing buildings: 100%|██████████| 437/437 [00:00<00:00, 1304.24it/s]
Processing glacier: 100%|██████████| 553/553 [00:00<00:00, 1336.87it/s]
Processing street: 100%|██████████| 501/501 [00:00<00:00, 1220.35it/s]
Processing mountain: 100%|██████████| 525/525 [00:00<00:00, 1446.69it/s]
Processing sea: 100%|██████████| 510/510 [00:00<00:00, 1457.25it/s]


In [11]:
# Normalization
train_images = train_images / 255.0 
test_images = test_images / 255.0

In [30]:
# Training Data Distribution
unique, counts = np.unique(train_labels, return_counts=True)
print(dict(zip(unique, counts)))

{0: 2271, 1: 2191, 2: 2404, 3: 2382, 4: 2512, 5: 2274}


In [31]:
avg = sum(counts)/len(unique)
print(avg)

2339.0


In [32]:
# Test Data Distribution
test_unique, test_counts = np.unique(test_labels, return_counts=True)
print(dict(zip(test_unique, test_counts)))

{0: 474, 1: 437, 2: 553, 3: 501, 4: 525, 5: 510}


In [33]:
test_avg = sum(test_counts)/len(test_unique)
print(test_avg)

500.0
