# Deep Learning Homework
Authors: Gergály Anna, Mészáros Péter

## Downloading the datasets

Insert your Kaggle API keys, to download the datasets with the Kaggle API.
The first block sets the environment variables for the Kaggle API to work.
The second block downloads the two datasets.

In [None]:
import os
os.environ['KAGGLE_USERNAME'] = ''
os.environ['KAGGLE_KEY'] = ''

In [None]:
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
api.dataset_download_files('asdasdasasdas/garbage-classification', path="./garbage1", quiet=False, unzip=True)
api.dataset_download_files('mostafaabla/garbage-classification', path="./garbage2", quiet=False, unzip=True)

In [1]:
dataset1_directory='garbage1/Garbage classification/Garbage classification/'
dataset2_directory='garbage2/garbage_classification/'

Removing the last unnecessary classes from the second dataset, which can't be found in the first dataset.
Merging white-glass, brown-glass, green-glass classes into one class, named glass.

In [None]:
import shutil
import os
removable_classes=['battery', 'clothes', 'biological', 'shoes']
for label in removable_classes:
    shutil.rmtree(os.path.join(dataset2_directory,label))
os.rename('garbage2/garbage_classification/brown-glass', 'garbage2/garbage_classification/glass')
glasses = [os.path.join(dataset2_directory,'white-glass'), os.path.join(dataset2_directory,'green-glass')]
for glass_directory_name in glasses:
    for filename in os.listdir(glass_directory_name):
        shutil.move(os.path.join(glass_directory_name, filename), os.path.join('garbage2/garbage_classification/glass', filename))
    os.rmdir(glass_directory_name)

Importing libraries and setting parameter variables.

In [2]:
import tensorflow.keras as keras
from tensorflow.keras.preprocessing import image as image_utils
import tensorflow as tf
import numpy as np

class_names=['glass', 'paper', 'cardboard', 'trash', 'metal', 'plastic']
image_size=(256, 256)
validation_split=0.2
seed=111
batch_size=32

## Reading the datasets.
The datasets are split for training and validation in a 4:1 ratio.

In [None]:
train_1 = keras.utils.image_dataset_from_directory(
    dataset1_directory,
    labels='inferred',
    label_mode='categorical',
    batch_size=batch_size,
    image_size=image_size,
    validation_split=validation_split,
    seed=seed,
    subset='training'
)
val_1 = keras.utils.image_dataset_from_directory(
    dataset1_directory,
    labels='inferred',
    label_mode='categorical',
    batch_size=batch_size,
    image_size=image_size,
    validation_split=validation_split,
    seed=seed,
    subset='validation'
)
    
train_2 = keras.utils.image_dataset_from_directory(
    dataset2_directory,
    labels='inferred',
    label_mode='categorical',
    batch_size=batch_size,
    image_size=image_size,
    validation_split=validation_split,
    seed=seed,
    subset='training'
)
val_2 = keras.utils.image_dataset_from_directory(
    dataset2_directory,
    labels='inferred',
    label_mode='categorical',
    batch_size=batch_size,
    image_size=image_size,
    validation_split=validation_split,
    seed=seed,
    subset='validation'
)

Normalizing the images, and concatenating the 2 datasets.

In [4]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_1 = train_1.map(lambda x, y: (normalization_layer(x), y))
val_1 = val_1.map(lambda x, y: (normalization_layer(x), y))
train_2 = train_2.map(lambda x, y: (normalization_layer(x), y))
val_2 = val_2.map(lambda x, y: (normalization_layer(x), y))

In [5]:
train = train_1.concatenate(train_2)
val = val_1.concatenate(val_2)

In [None]:
#y_train = np.concatenate([y for x, y in train], axis=0)
x_train = np.empty(shape=(0,256,256,3))
for image, label in train:
    x_train = np.concatenate((x_train, image.numpy()), axis=0)