In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [92]:
!pip install opencv-python

import os
import cv2

import pandas as pd
from PIL import Image
from typing import Any, Tuple, Optional, Callable
from torch.utils.data import Dataset

def read_csv(path: str) -> pd.DataFrame:
    '''
    Read a csv file.

    Args:
        path (str): Path to the csv file.

    Returns:
        pd.DataFrame: Dataframe with the csv file data.
    '''

    assert os.path.exists(path), f'CSV file not found: {path}!'
    assert os.path.splitext(path)[
    -1] == '.csv', f'Unsupported file type {os.path.splitext(path)[-1]}!'
    return pd.read_csv(path)

class ImageDataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, images_folder: str = '/content/drive/My Drive/images1000', transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None:
        '''
        Image dataset.

        Args:
            dataframe (pd.DataFrame): Dataframe with the image filenames and labels.
            images_folder (str): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
            target_transform (callable, optional): Optional transform to be applied on a target.
        '''
        assert 'Filename' in dataframe.columns, f'Filename column not found!'
        assert os.path.exists(images_folder), f'Image folder not found: {images_folder}!'

        self.dataframe = dataframe
        self.images_folder = images_folder
        self.transform = transform
        self.target_transform = target_transform

        data = []
        targets = []

        for i, sample in dataframe.iterrows():
            image = cv2.imread(os.path.join(images_folder, sample['Filename']))
            data.append(image)

            targets.append(int(sample['Label']) if 'Label' in sample else -1)

        self.data = data
        self.targets = targets

    def __len__(self) -> int:
        '''
        Returns:
            int: Length of the dataset.
        '''
        return len(self.data)

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        '''
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is class_index of the target class. For the public test set, target is a class from [0, 1, 2, 3, 4, 5, 6, 7, 8]. For the private test set (before releasing the test set labels), target is -1.
        '''
        img = self.data[index]
        target = self.targets[index]

        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target



In [93]:
public_dataframe = read_csv('assignment_7_public.csv')
public_dataset = ImageDataset(public_dataframe)

print('Image', type(public_dataset[0][0]), public_dataset[0][0].size) # Image <class 'PIL.Image.Image'> (28, 28)
print('Target', type(public_dataset[0][1])) # Target <class 'int'>
print('Length', len(public_dataset)) # Length 85744

'''
CODE HERE!
'''


Image <class 'PIL.Image.Image'> (28, 28)
Target <class 'int'>
Length 1001


'\nCODE HERE!\n'

In [96]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np

# Assuming the label column is named 'label' in public_dataframe
labels = np.array(public_dataframe['Label'])

# Extract the image data from the ImageDataset
images = np.asarray([np.array(sample[0]) for sample in public_dataset])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Normalize the input data (assuming it is in the range of 0-255)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

input_shape = (28, 28, 3)

# Define the model architecture
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(9, activation="softmax"),
    ]
)

model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 128
epochs = 100
# Train the model
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model on the test data
_, accuracy = model.evaluate(X_test, y_test)

# Print the accuracy
print("Accuracy:", accuracy)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [97]:
private_dataframe = read_csv('assignment_7_private.csv')
private_dataset = ImageDataset(private_dataframe)

print('Image', type(private_dataset[0][0])) # Image <class 'PIL.Image.Image'> (28, 28)
print('Length', len(private_dataset)) # Length 21436

import numpy as np

# remove and make your own predictions.
preds = np.full(len(private_dataset), -1,
                dtype=int)
'''
CODE HERE!
e.g.,
preds = np.full(len(X_private), -1, dtype=int)
'''

# Convert images to numpy arrays and normalize
predict_imgages = np.asarray([np.array(sample[0]) for sample in private_dataset])
predict_imgages = predict_imgages.astype('float32') / 255.0

# Make predictions using the trained model
predicted_labels = model.predict(predict_imgages)

# Get the predicted labels
preds = np.argmax(predicted_labels, axis=1)


submission = pd.DataFrame({'Label': preds})
submission.to_csv('assignment_7.csv', index=True, index_label='Id')

Image <class 'PIL.Image.Image'>
Length 200


In [98]:
# Assuming 'public_dataset' is the ImageDataset object
contains_none = False

for sample in public_dataset:
    if None in sample:
        contains_none = True
        break

if contains_none:
    print("The ImageDataset contains None values.")
else:
    print("The ImageDataset does not contain None values.")

The ImageDataset does not contain None values.
