# Project Description

## Scene Classification

> ### The Goal


  The goal is to classify each scene and predict what it is

> ### Dataset info's

This dataset contains about ~25k images from a wide range of natural    scenes from all around the world. The task is to identify which kind of scene can the image be categorized into.


### *There are 6 possible labels*

* Buildings
* Forests
* Mountains
* Glacier
* Street
* Sea



> Dataset Source

Scene Classification | Kaggle - https://www.kaggle.com/nitishabharathi/scene-classification


# Importing tools

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import os
import sklearn
from sklearn.model_selection import  train_test_split

In [None]:
arr = pd.read_csv('../input/scene-classification/train-scene classification/train.csv')
arr

In [None]:
# Create filepaths
filepaths = ['../input/scene-classification/train-scene classification/train/' + name for name in arr['image_name']]
filepaths[:10]

In [None]:
# Instances per each label
arr['label'].value_counts()

In [None]:
# Turn labels in an array
labels = arr['label'].to_numpy()

# Extract just the unique labels
unique_labels = np.unique(labels)

In [None]:
# Turning each label into a boolean array
boolean_labels = [label == unique_labels for label in labels]
boolean_labels[17033]

In [None]:
np.argmax([False, False,  True, False, False, False])

In [None]:
# Images experimentation number
NUM_IMAGES = 1000

In [None]:
# Spliting the images and labels apart
X = filepaths
y = boolean_labels

In [None]:
# Spliting the experimentation data in to a training and validation split
X_train, X_val, y_train, y_val = train_test_split(X[:NUM_IMAGES],
                                                  y[:NUM_IMAGES],
                                                  test_size=0.2,
                                                  random_state=42)

In [None]:
X_train[:10], y_train[:10]

# Preprocessing Images (Turning them into Tensors)

The steps that we have to follow to preprocess images



1.   Take an image filepath
2.   Use TensorFlow to read the file and save it toa variable
3.   Turn the images into Tensors
4.   Normalize the image
5.   Resize the image
6.   Return modified image



In [None]:
# Image Size
IMG_SIZE = 224

# processing images
def process_images(image_path, img_size=IMG_SIZE):
  """
  Takes a file path as an input and turns image into Tensors
  """
  # Read an image file
  image = tf.io.read_file(image_path)
  # Turns image to 3 color channels (RGB)
  image = tf.image.decode_jpeg(image, channels=3)
  # Convert color values from 0-255 to 0-1 values
  image = tf.image.convert_image_dtype(image, tf.float32)
  # Resize image to (224, 224)
  image = tf.image.resize(image, size=[IMG_SIZE, IMG_SIZE])

  return image

In [None]:
# Simple function that returns a tuple of (image, label)
def get_image_label(image_path, label):
  """
  Processes image file path and returns a tuple of (image, label)
  """
  image = process_images(image_path)
  return image, label

In [None]:
# Demo
(process_images(X[10]), tf.constant(y[10]))

# Turning the data into Batches


In [None]:
# Batch size
BATCH_SIZE = 32

# Turn data into batches
def create_data_batches(X, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):

  """
  Create batches of data out of image (X) and labels (y) pairs.
  Shuffles the data if its training data but doesn't shuffle if it's validation data.
  Also accepts test data as an input (no labels).
  """

  # If the data is a test data, no labels
  if test_data: 

    print('Creating test data batches.........')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X))) # only filepaths (no labels)
    data_batch = data.map(process_images).batch(BATCH_SIZE)
    return data_batch
  
  # If the data is a validation dat, no need to shuffle it
  elif valid_data:

    print('Creating validation data batches......')
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X), # filepaths
                                               tf.constant(y))) # labels
    data_batch = data.map(get_image_label).batch(BATCH_SIZE)
    return data_batch
  
  # if it's a training data
  else:

    print('Creating training data batches.......')
    # Turn filepaths and labels into Tensors
    data = tf.data.Dataset.from_tensor_slices((tf.constant(X), # filepaths
                                              tf.constant(y))) # labels
    # shuffling pathnames before mapping them
    data = data.shuffle(buffer_size=len(X))
    # turn each image into tensors and return 32 tuple of (image, label) in each interation 
    data_batch = data.map(get_image_label).batch(BATCH_SIZE)

  return data_batch

In [None]:
# Create a training and validation batches
train_data = create_data_batches(X_train, y_train)
val_data = create_data_batches(X_val, y_val, valid_data=True)

In [None]:
train_data.element_spec, val_data.element_spec

# Build Model

In [None]:
# Input Shape
INPUT_SHAPE = [None, IMG_SIZE, IMG_SIZE, 3] # batch, height, width, color channels

# Output shape
OUTPUT_SHAPE = len(unique_labels)

# Model URL
MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/classification/4"

**Model**

In [None]:
# A function which builds a Keras model
def create_model(input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE, model_url=MODEL_URL):

  print('Building model with:', MODEL_URL)

  #  Setup the model layers
  model = tf.keras.Sequential([
                               hub.KerasLayer(MODEL_URL), # Layer 1 (input layer)
                               tf.keras.layers.Dense(units=OUTPUT_SHAPE,
                                                     activation='softmax') # Layer 2 (output layer)
  ])

  # Compile the model
  model.compile(
      loss=tf.keras.losses.CategoricalCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(),
      metrics=['accuracy']
  )

  # Build the model
  model.build(INPUT_SHAPE)

  # Returns the built model
  return model


In [None]:
model = create_model()
model.summary()

## Creating Callbacks

Callbacks are helpers functions that help a model during training, save its progress, checks the progess or stop training early if a its stops improving.

we will create one for tensorboard to visualize the progress of the model after it finishes training and the other for early stopping which helps prevent our model from training too long.


## Early Stopping Callback

In [None]:
# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                  patience=3)

In [None]:
NUM_EPOCHS = 100

## Train Model

Let's create a function which:



*   Create a model using 
*   Setup a TensorBoard callback using
*   Fit the training set, validation set, number epochsto trian and calllbacks
*   Return the model



In [None]:
def train_model():
  """
  Trains a given model and returns the trained version.
  """
  # Create a model
  model = create_model()

  # Fit the model to the data passing it the callbacks we created 
  model.fit(
      x=train_data,
      epochs=NUM_EPOCHS,
      validation_data=val_data,
      validation_freq=1,
      callbacks=[early_stopping]
  )

  # Return the fitted model
  return model

In [None]:
# Traning the 1000 images model
model = train_model()

In [None]:
# Making prediction on the validation data
predictions = model.predict(val_data, verbose=1)
predictions 

## Unbatichying the validation data

In [None]:
# The labels were not in the right order, so I had to reorder them
classes = ['Buildings', 'Forests','Glacier','Mountains','Sea','Street']  

In [None]:
# Unbatching a batch dataset
def unbatchify(data):
  """
  Takes a btached dataset of (image, label) Tensors and returns
  separate arrays of images and labels.
  """

  images = []
  labels = []
  # Loop through unbatched data
  for image, label in data.unbatch().as_numpy_iterator():
    images.append(image)
    labels.append(classes[np.argmax(label)])
  return images, labels

# Unbatchify the validation data
val_images, val_labels = unbatchify(val_data)
val_images[2], val_labels[2]

In [None]:
classes[np.argmax(predictions[2])]

In [None]:
# A function that gets the predicted label
def get_pred_label(prediction_probabilities):
  """
  Turns an array of prediction probabilitties into a label
  """
  return classes[np.argmax(prediction_probabilities)]

# Get a predicted label based on an array of prediction probabilites
pred_label = get_pred_label(predictions[20])
pred_label

## Visualizing the predictions

In [None]:
def plot_pred(prediction_probabilities, labels, images, n=1):
  """
  View the prediction, ground truth and image for simple n
  """
  pred_prob, true_label, image = prediction_probabilities[n], labels[n],images[n]

  # Get the pred label
  pred_label = get_pred_label(pred_prob)

  # Plot image &remove ticks 
  plt.imshow(image)
  plt.xticks([])
  plt.yticks([])

  # Change the color of the title to green if it's a correct prediction otherwise red
  if pred_label == true_label:
    color = "green"
  else:
    color = "red" 

  plt.title("It think it's a {} {:2.0f}% true value: {}".format(pred_label,
                                    np.max(pred_prob)*100,
                                    true_label),
                                    color=color)

In [None]:
plot_pred(prediction_probabilities=predictions,
          labels=val_labels,
          images=val_images,
          n=36)

In [None]:
predictions[36]

# Train the full model

In [None]:
# Spliting the data in to a training and validation split
X_train, X_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  test_size=0.2,
                                                  random_state=42)

In [None]:
# Creating batches of data
train_data = create_data_batches(X=X_train, y=y_train)
val_data = create_data_batches(X=X_val, y=y_val)

In [None]:
# Training on the full model
model = train_model()

In [None]:
test_set = pd.read_csv('../input/scene-classification/test_WyRytb0.csv')

In [None]:
test_set

In [None]:
# Create filepaths
test_filepaths = ['../input/scene-classification/train-scene classification/train/' + name for name in test_set['image_name']]
test_filepaths[:5]

In [None]:
test_set = create_data_batches(X=test_filepaths, test_data=True)
test_set

In [None]:
test_predictions = model.predict(test_set,verbose=1)

In [None]:
test_predictions