### Imports


In [2]:
import numpy as np
from PIL import Image
import glob
import os
import pickle


In [3]:
NUMBER_OF_CLASSES = 10


## Load a data from jpg to numpy arrays


In [4]:
def load_photos(set: str) -> tuple:
    """
    Loads photos from train, test or valid set.

    Parameters
    ----------
    set : string
        A set that you want to load from: 'train', 'test' or 'valid'

    Returns
    -------
    X : np.ndarray (number_of_photos, 30000)
        Preprocessed photos

    y : nd.ndarray (number_of_photos, )
        "True" labels
    """
    photos = [] # stores photos
    number_of_photos = [0 for i in range(NUMBER_OF_CLASSES)] # stores the number of photos of each class

    for number in range(NUMBER_OF_CLASSES):
        for filename in glob.glob(f'/home/matined/Desktop/Sign-Language-Digits-Recognition/data/{set}/A{number}/*.JPG'): 
            image = np.array(Image.open(filename).resize((100, 100)))
            photos.append(image)
            number_of_photos[number] += 1
    X = np.array(photos)    

    labels = []
    for i in range(NUMBER_OF_CLASSES):
        for j in range(number_of_photos[i]):
            labels.append(i)
    y = np.array(labels)

    # shuffle examples
    shuffler = np.random.permutation(X.shape[0])
    X = X[shuffler]
    y = y[shuffler]

    return X, y
    

### Save all preprocessed data into data dictionary for later usage

In [5]:
X_train, y_train= load_photos(set='train')
X_test, y_test = load_photos(set='test')
X_valid, y_valid = load_photos(set='valid')


In [6]:
data = {
    'X_train':  X_train,
    'y_train':  y_train,
    'X_test':   X_test,
    'y_test':   y_test,
    'X_valid':  X_valid,
    'y_valid':  y_valid
}


### Dump data into file

In [8]:
pickle.dump(data, open(os.path.abspath('data/data.pkl'), 'wb'))


### Funtion for loading the data

In [9]:
def get_data() -> dict:
    """
    Loads a dictionary of data.

    Returns
    -------
    data : dict
        Dictionary containing:
            X_train, y_train, X_test, y_test, X_valid, y_valid
    """
    data = pickle.load(open(os.path.abspath('../data/data.pkl'), 'rb'))
    return data
    