## MNIST DATASET PROJECT

### Import librares and packages

In [2]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### Data

In [3]:
# loading data
# loading info about the data
# supervised- input and output format
mnist_data, mnist_info = tfds.load(name="mnist", with_info = True, as_supervised=True)

### Train-Test Split

In [4]:
mnist_train, mnist_test = mnist_data["train"], mnist_data["test"]

### Validation Data
MNIST dataset doesn't have validation set but train set has 70000, test set has 1000 examples. So train set is big enough to split a validation set. 

In [5]:
# val_set sample num
# convert the number of samples into integer to divide train set with an integer
num_validation_samples = 0.1 * mnist_info.splits["train"].num_examples  
num_validation_samples = tf.cast(num_validation_samples, tf.int64)  

# do same thing to test sample
num_test_samples = mnist_info.splits["test"].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

### Data Scaling

In [6]:
def scale(image,label):
    # images are definedby 28*28 pixels and pixels has a value between 0-255
    # 0 is black, 255 is white
    
    image = tf.cast(image,tf.float32)
    image /=255.  # scaling data between 0-1
    return image,label

scaled_train_and_val_data = mnist_train.map(scale)

In [7]:
test_data = mnist_test.map(scale)

### Shuffling Data

In [8]:
BUFFER_SIZE = 10000
shuffled_train_and_val_data = scaled_train_and_val_data.shuffle(BUFFER_SIZE)

### Validation and train data

In [9]:
validation_data = shuffled_train_and_val_data.take(num_validation_samples)  # take the val set
train_data = shuffled_train_and_val_data.skip(num_validation_samples)  # skip over the val set and take the remais

### Batching Data

In [11]:
BATCH_SIZE = 100 
train_data = train_data.batch(BATCH_SIZE)  # we overrited it to train data.

# the model expects the validation set as a batch.
# in this way we'll create an new column in our tensor indicating 
# that the model should take the whole validation data at once when it utalizes it
validation_data = validation_data.batch(num_validation_samples) 

# no need to batch the test set. but convert it to as a batch.
test_data = test_data.batch(num_test_samples)

In [14]:
validation_inputs, validation_targets = next(iter(validation_data))  
# make val set iterable and convert it 
# to the same format with train and test set