In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras import optimizers
from keras.utils import np_utils
from keras.layers import Dense, Conv2D, Embedding, Activation, MaxPooling2D, Dropout
from keras.layers import Flatten, LSTM, ZeroPadding2D, BatchNormalization, MaxPooling2D

%matplotlib inline
import matplotlib.pyplot as plt

## Part I: Image data preprocessing.

In this part, you will use the popular package “skimage” to preprocess and augment an image before sending it to a neural network coded in Keras.

**Question 1**:  Use skimage to load your “iguana.jpg” and display it in your notebook.

In [None]:
from skimage import io

# Loading the image
### START CODE HERE ###
image = io.imread("iguana.jpg")
plt.imshow(image)
### END CODE HERE ###

**Question 2**:  Use skimage to zoom on the face of the iguana. Display the image.

In [None]:
# Zoom image
### START CODE HERE ###
image_zoom = image[10:300, 300:700]
plt.imshow(image_zoom)
### END CODE HERE ###

**Question 3**:  Use skimage to rescale the image to 20% of the initial size of the image. Display the image. Rescaling means lowering the resolution of the image. Remember that in class we talked about finding the computation/accuracy trade-off by showing different resolutions of the same image to humans and figuring out what is the minimum resolution leading to the maximum human accuracy.

In [None]:
# Rescale image to 25% of the initial size
### START CODE HERE ###
from skimage.transform import rescale
image_rescaled = rescale(image, 1.0 / 5.0)
plt.imshow(image_rescaled)
### END CODE HERE ###

**Question 4**:  Use skimage to add random noise to the image. Display the image.

In [None]:
# Add random noise
### START CODE HERE ###
from skimage.util import random_noise
image_random_noise = random_noise(image)
plt.imshow(image_random_noise)
### END CODE HERE ###

**Question 5**:  Use skimage to rotate the image by 45 degrees.

In [None]:
# Rotate
### START CODE HERE ###
from skimage.transform import rotate
image_rotated = rotate(image, 45)
plt.imshow(image_rotated)
### END CODE HERE ###

**Question 6**:  Use skimage to flip the image horizontaly and verticaly. Display the image.


In [None]:
# Horizontal flip
### START CODE HERE ###
image_hflip = image[:, ::-1]
plt.imshow(image_hflip)
### END CODE HERE ###

In [None]:
# Vertical flip
### START CODE HERE ###
image_vflip = image[::-1, :]
plt.imshow(image_vflip)
### END CODE HERE ###

**Question 7**: (Optional) Use skimage to (i) blur the image, (ii) enhance its contrast, (iii) convert to grayscale, (iv) invert colors…

In [None]:
# Blur image
### START CODE HERE ###
from scipy import ndimage
image_blured = ndimage.uniform_filter(image, size=(15, 15, 1))
plt.imshow(image_blured)
### END CODE HERE ###

# Convert to grayscale
### START CODE HERE ###
from skimage.color import rgb2gray
image_grayscale = rgb2gray(image)
plt.imshow(image_grayscale)
### END CODE HERE ###

# Enhance contrast
### START CODE HERE ###
from skimage import exposure
image_equalized = exposure.equalize_hist(image)
plt.imshow(image_equalized)
### END CODE HERE ###

# Color inversion
### START CODE HERE ###
from skimage import util
import numpy as np
image_color_inv = util.invert(image)
plt.imshow(image_color_inv)
### END CODE HERE ###

Skimage is a popular package for customized data preprocessing and augmentation. However, deep learning frameworks such as Keras often incorporate functions to help you preprocess data in a few lines of code. 

**Question 8**: Read and run the Keras code for image preprocessing. It will save augmented images in a folder called “preview” on the notebook’s directory.

In [None]:
# Image preprocessing in Keras

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=45,
        width_shift_range=0.3,
        height_shift_range=0.3,
        shear_range=0.3,
        zoom_range=0.3,
        horizontal_flip=True,
        fill_mode='nearest')

img = load_img('iguana.jpg')  # this is a PIL image
x = img_to_array(img)  # convert image to numpy array 
x = x.reshape((1,) + x.shape)  # reshape image to (1, ..,..,..) to fit keras' standard shape

# Use flow() to apply data augmentation randomly according to the datagenerator
# and saves the results to the `preview/` directory
num_image_generated = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='cat', save_format='jpeg'):
    num_image_generated += 1
    if num_image_generated > 20:
        break # stop the loop after num_image_generated iterations

**Question 9** : Observe images generated in the preview folder. Similarly to "iguana.jpeg", augment the image "lion.jpeg". Save the images in a "preview_lion" folder.

**Question 10** : We will know show you a trick to create a numpy array of images and labels (0 for non-iguana, 1 for iguana). We will then pass the numpy arrays to the CNN to train in mini-batches of size 5.

In [None]:
# Code that generates the labels.
import os
from scipy import ndimage, misc

images = []
labels = []
for root, dirnames, filenames in os.walk("preview/"):
    for filename in filenames:
        if re.search("\.(jpg|jpeg|png|bmp|tiff)$", filename):
            filepath = os.path.join(root, filename)
            image = ndimage.imread(filepath, mode="RGB")
            image_resized = misc.imresize(image, (100, 100))
            images.append(image_resized)
            labels.append(float(filename[0]))
            
images = np.array(images)
labels = np.array(labels)

In [None]:
print(images.shape)
print(labels.shape)

**Question 11**: (Optional) Evaluate the model.

## Part II: Text data preprocessing.
**Question 1**: Go on any static website online. Click right and select “View Page Source”. Copy a complicated part of the html code. Paste it in the notebook in the variable “html_page”.

In [None]:
### START CODE HERE ###
html_txt = """<div class="jumbotron jumbotron-fluid">
      <div class="container">
      <h1 class="display-5">CS230: Deep Learning</h1>
      <h2 class="display-7">Spring 2018</h2><h3>Instructors</h3>
      <div class="row">
        <div class="instructor">
          <a href="#">
            <img class="headshot" src="static/andrew.jpg" style="text-align:center;">
            <div style="text-align:center;"><a href="http://www.andrewng.org/">Andrew Ng</a></div>
          </a>
        </div>
        <div class="instructor">
          <a href="#">
            <img class="headshot" src="static/kian.jpg" style="text-align:center;">
            <div style="text-align:center;"><a href="https://www.linkedin.com/in/kiankatan/">Kian Katanforoosh</a></div>
          </a>
        </div>
      </div>
      <br />
      <p><strong>Course Description</strong>&nbsp;&nbsp; Deep Learning is one of the most highly sought after skills in AI. We will help you become good at Deep Learning. In this course, you will learn the foundations of Deep Learning, understand how to build neural networks, and learn how to lead successful machine learning projects. You will learn about Convolutional networks, RNNs, LSTM, Adam, Dropout, BatchNorm, Xavier/He initialization, and more. You will work on case studies from healthcare, autonomous driving, sign language reading, music generation, and natural language processing. You will master not only the theory, but also see how it is applied in industry. You will practice all these ideas in Python and in TensorFlow, which we will teach. 
      After this course, you will likely find creative ways to apply it to your work. This class is taught in the flipped-classroom format. You will watch videos and complete in-depth programming assignments and online quizzes at home, then come to class for advanced discussions and work on projects.
      This class will culminate in an open-ended final project, which the teaching team will help you on. </p>
    </div>

  <div style="text-align:center; padding:40px 0px 0px 0px;">
      <a href="./syllabus.html">
      <button type="button" class="btn btn-success btn-lg">Schedule</button>
      </a>
      <a href="http://piazza.com/stanford/spring/cs230">
      <button type="button" class="btn btn-danger btn-lg">Piazza Forum</button>
      </a>
      <a href="./calendar.html">
      <button type="button" class="btn btn-info btn-lg">OH Calendar</button>
      </a>
      <!-- <a href="#">
      <button type="button" class="btn btn-info btn-lg">Calendar</button>
      </a> -->
  </div>
  </div>"""
### END CODE HERE ###

print(html_txt)

**Question 2**: Use *BeautifulSoup* to parse the html_txt. Print the html_txt.

In [None]:
from bs4 import BeautifulSoup

# Parse the html input
### START CODE HERE ###
def parse_html(html_page):
    soup = BeautifulSoup(html_page, "html.parser")
    return soup.get_text()
html_txt = parse_html(html_txt)
### END CODE HERE ###

print(html_txt)

**Question 3**: Use *re* to remove meta-characters such as squared brackets and anything between them. Print the html_txt.

In [None]:
import re, string, unicodedata
# Remove meta characters and things between them.
### START CODE HERE ###
def remove_between_square_brackets(html_page):
    return re.sub('\[[^]]*\]', '', html_page)

html_txt = remove_between_square_brackets(html_txt)
### END CODE HERE ###

print(html_txt)

**Question 4**: Using the Natural Language ToolKit (nltk), separate the text into a list of words.

In [None]:
import nltk
from nltk import word_tokenize, sent_tokenize

# Separate text into words
### START CODE HERE ###
words = nltk.word_tokenize(html_txt)
print(words)
### END CODE HERE ###

**Question 5**: (Optional) Remove non ASCII characters. Convert to Lower case. Remove punctuation, stopwords, …

In [None]:
### START CODE HERE ###

### END CODE HERE ###

A machine will not be able to read this list strings, you need to build a vocabulary and tokenize your words.

**Question 6**: Build the vocabulary from the list of words.

In [None]:
# Build Vocabulary
### START CODE HERE ###
vocabulary = sorted(set(words))
print(vocabulary)
print(len(vocabulary))
### END CODE HERE ###

**Question 7**: Build word to integer mapping in Python. It should be sorted.

In [None]:
# Build word to integer mapping in Python. It should be sorted.
### START CODE HERE ###
word_to_int = dict((c, i) for i, c in enumerate(sorted(words)))
print(word_to_int)
### END CODE HERE ###

**Question 8**: Tokenize your text. 

In [None]:
# Convert list of words into list of tokens using this mapping
### START CODE HERE ###
tokens = [ word_to_int[x] for x in words]
print(tokens)
### END CODE HERE ###

**Question 9**: Read and run the Keras code for text preprocessing. It uses the Tokenizer Function.

In [None]:
# Preprocess text with Keras for Sentiment classification
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

examples = ['You are amazing!','It is so bad','Congratulations','You suck bro','Awesome dude!']
Y = [1, 0, 1, 0, 1]

# Define Tokenizer
t = Tokenizer()
# Fit Tokenizer on text (Build vocab etc..)
t.fit_on_texts(examples)
# Convert texts to sequences of integers
X = t.texts_to_sequences(examples)
# Pad sequences of integers
X = pad_sequences(X, padding = 'post')

# Get the vocabulary size, useful for the embedding layer.
vocab_size = len(t.word_index) + 1
print(vocab_size)
print(X)

**Question 10**: (Optional) Train the RNN coded for you in the notebook on the sentiment classification class (with 5 examples). Evaluate the mode.

## Appendix: Models and training codes

In [None]:
# CNN
model_CNN = Sequential()
model_CNN.add(Conv2D(32, (7, 7), strides = (3, 3), name = 'conv0', input_shape = images[0].shape))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn0'))
model_CNN.add(Activation('relu'))
model_CNN.add(Conv2D(32, (5, 5), strides = (3, 3), name = 'conv1', input_shape = images[0].shape))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn1'))
model_CNN.add(Activation('relu'))
model_CNN.add(Conv2D(32, (3, 3), strides = (2, 2), name = 'conv2', input_shape = images[0].shape))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn2'))
model_CNN.add(Activation('relu'))
model_CNN.add(MaxPooling2D((2, 2), name='max_pool'))
model_CNN.add(Flatten())
model_CNN.add(Dense(1, activation='sigmoid', name='fc'))

In [None]:
# RNN
model_RNN = Sequential()
model_RNN.add(Embedding(vocab_size, 128))
model_RNN.add(LSTM(128))
model_RNN.add(Dense(1, activation='sigmoid'))

In [None]:
# training code for CNN
sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9)
model_CNN.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_CNN.fit(images, labels, epochs = 10, batch_size = 5)

In [None]:
# training code for RNN
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9)
model_RNN.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
model_RNN.fit(np.array(X), np.array(Y), epochs=1000)

In [None]:
# testing code for CNN
print(model_CNN.predict(images))

## Appendix 2: Example of use of a data generator in Keras

Code from the official Keras documentation.

In [None]:
from keras.datasets import cifar10
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [None]:
# CNN for datagen
model_CNN = Sequential()
model_CNN.add(Conv2D(32, (7, 7), strides = (3, 3), name = 'conv0', input_shape = (100,100,3)))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn0'))
model_CNN.add(Activation('relu'))
model_CNN.add(Conv2D(32, (5, 5), strides = (3, 3), name = 'conv1'))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn1'))
model_CNN.add(Activation('relu'))
model_CNN.add(Conv2D(32, (3, 3), strides = (2, 2), name = 'conv2'))
model_CNN.add(BatchNormalization(axis = 3, name = 'bn2'))
model_CNN.add(Activation('relu'))
model_CNN.add(MaxPooling2D((2, 2), name='max_pool'))
model_CNN.add(Flatten())
model_CNN.add(Dense(1, activation='sigmoid', name='fc'))

In [None]:
num_classes = 10
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(x_train)

# fits the model on batches with real-time data augmentation:
epochs = 10
model_CNN.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
                    steps_per_epoch=len(x_train) / 32, epochs=epochs)

# here's a more "manual" example
for e in range(epochs):
    print('Epoch', e)
    batches = 0
    for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32):
        model_CNN.fit(x_batch, y_batch)
        batches += 1
        if batches >= len(x_train) / 32:
            # we need to break the loop by hand because
            # the generator loops indefinitely
            break