# Iris Flower classifier

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# SciPy
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import os
import pickle
import requests as req
from io import BytesIO
import boto3

# MachineLearning
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
# from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from keras.utils.np_utils import to_categorical

# DeepLearning
import keras
from keras import models
from keras import layers

# Main modules
import predictions as pr
import aws_utils as au

In [None]:
BUCKET_NAME = 'models-in-prod'
FOLDER = 'dist'

In [None]:
iris = sns.load_dataset('iris')

In [None]:
iris.info()

In [None]:
# Without normalize and with the target on text
iris.sample(5)

In [None]:
sns.pairplot(iris, hue='species', size=3.4);

## Primer modelo

In [None]:
def fit_normalizer(input_data):
    """
        Fit a Sklearn Normalizer based on input_data.
    """    
    scaler = StandardScaler()
    print('Fitting a Normalizer with given input')    
    scaler.fit(input_data)
    
    file_name = 'normalizer.pkl'
    with open(os.path.join(FOLDER, file_name), 'wb') as f:
        pickle.dump(scaler, f)
    
    # TODO Part 2
    # au.upload_to_s3(BUCKET_NAME, FOLDER, file_name)        
    
    print('Normalizer saved')
    return scaler

In [None]:
normalizer = fit_normalizer(iris.drop("species", axis=1))

In [None]:
X = normalizer.transform(iris.drop("species", axis=1))

In [None]:
def fit_encoder(target_list):
    """
        Fit a Sklearn LabelEncoder based on target_list.
    """    
    encoder = LabelEncoder()
    print('Fitting a LabelEncoder with given target')
    encoder.fit(target_list)    
    
    print('Found classes', encoder.classes_)
    print('Testing encoder', encoder.transform(encoder.classes_))
    
    file_name = 'encoder.pkl'
    with open(os.path.join(FOLDER, file_name), 'wb') as f:
        pickle.dump(encoder, f)

    # TODO Part 2        
    # au.upload_to_s3(BUCKET_NAME, FOLDER, file_name)        
    
    print('Encoder saved')
    return encoder

In [None]:
# All the target column
target_list = iris['species'].tolist()
encoder = fit_encoder(target_list)

In [None]:
def encode_and_one_hot_target(target_list, encoder):
    print('Encoding target with given encoder')
    targets_encoded = encoder.transform(target_list)

    print('Convert encoded classes integers to dummy variables')
    targets_one_hot = to_categorical(targets_encoded)
    
    print('Target final shape', targets_one_hot.shape)
    return targets_one_hot

In [None]:
Y = encode_and_one_hot_target(target_list, encoder)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1, test_size = .33)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

In [None]:
def fit_knn(X_train, y_train):
    """
        Fits a KNN model, saves the model weights and return it.
    """
    # Instantiate the model learning model (k=3)
    knn = KNeighborsClassifier(n_neighbors=3)
    
    # Fitting the model
    print('Fitting model')
    model = knn.fit(X_train, y_train)
    
    file_name = 'model.pkl'
    with open(os.path.join(FOLDER, file_name), 'wb') as f:
        pickle.dump(model, f)
      
    # TODO Part 2    
    # au.upload_to_s3(BUCKET_NAME, FOLDER, file_name)        

    print('Model saved')
    return model

In [None]:
model = fit_knn(X_train, y_train)

In [None]:
Y_pred = model.predict(X_test)

In [None]:
accuracy_score(y_test, Y_pred)

## Modelo exportado

![Exported models](img/exported-models.png)

## flask: predictions.predict()

In [None]:
# From package
versicolor = [6.3, 2.5, 4.9, 1.5]
virginica = [7.9, 3.8, 6.4, 2.0]
pr.predict(versicolor)

## flask: app.classify()

```bash
# Run flask!
python app.py
```

In [None]:
# From Browser
req.get('http://localhost:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

In [None]:
# Show server log

## Packaging & Deploy

- Exportar todas las dependencias utilizadas a un archivo requirements.txt (con versión)  
![requirements.txt](img/requirements.txt.png)
- Armar y probar la aplicación en un virtual environment
- ¿Que es un virtual environment? Permite empaquetar el ejecutable de python y los módulos instalados.
- Todo esto prepara la aplicación para correr en la nube.

```bash
# Install virtual environments
pip install virtualenv

# Create virtual environment with new python executable
virtualenv env --python=python3.7

# Activate it, prompt changes
source env/bin/activate

# Install all dependencies in requirements.txt
pip install -r requirements.txt

# Test the app inside virtual environment, stop the above
python app.py

# Zip the app, without virtual environment
zip -r models-in-production.zip app.py predictions.py aws_utils.py config.py requirements.txt dist
```

## EC2 Tutorial

![ec2-step-1](img/ec2-step-1.png)  
![ec2-step-2](img/ec2-step-2.png)  
![ec2-step-3](img/ec2-step-3.png)  
![ec2-step-4](img/ec2-step-4.png)  
![ec2-step-5](img/ec2-step-5.png)  
![ec2-step-6](img/ec2-step-6.png)  
![ec2-step-7](img/ec2-step-7.png)  
![ec2-step-8](img/ec2-step-8.png)  
![ec2-step-9](img/ec2-step-9.png)  
![ec2-step-10](img/ec2-step-10.png)  

## Obtener el DNS público de la instancia

![Public DNS](img/public-dns.png)

![connect-to-instance](img/connect-to-instance.png)

```bash
# Cloud: Connect to instance
ssh -i ~/.ssh/ramiro.savoie.pem ubuntu@ec2-35-167-97-63.us-west-2.compute.amazonaws.com

# Cloud: Install python 3.7, pip. virtualenv and unzip
sudo add-apt-repository ppa:deadsnakes/ppa && sudo apt-get update && sudo apt-get install -y unzip python3.7 python3-pip virtualenv

# Upload the packaged app
scp models-in-production.zip ubuntu@ec2-35-167-97-63.us-west-2.compute.amazonaws.com:/home/ubuntu

# Cloud
unzip -o models-in-production.zip

# Cloud, same steps as local
virtualenv env --python=python3.7
source env/bin/activate
pip install -r requirements.txt
python app.py
```

```bash
# Extra
# nohup means: do not terminate this process even when the stty is cut off.
# & at the end means: run this command as a background task
nohup python app.py &
ps -ef | grep app.py
# https://stackoverflow.com/questions/17385794/how-to-get-the-process-id-to-kill-a-nohup-process
# https://stackoverflow.com/questions/23029443/run-python-flask-on-ec2-in-the-backgroud
```

In [None]:
# Browser with Public DNS.
req.get('http://ec2-35-167-97-63.us-west-2.compute.amazonaws.com:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

## Tutorial de S3

![s3-overview](img/s3-overview.png)  

## Modelo dinámico

¿Que es más probable que cambie, el modelo o la API?

In [None]:
## Create a bucket named `models-in-prod`

In [None]:
## Upload model with S3 UI

In [None]:
## Present aws_utils.py

In [None]:
## Uncomment TOOO Part 2 in predictions.fetch_pickle()

In [None]:
## Delete dist content

```bash
# Stop service in instance

# Zip the new app
zip -r models-in-production.zip app.py predictions.py aws_utils.py config.py requirements.txt dist

# Upload the new packaged app
scp models-in-production.zip ubuntu@ubuntu@ec2-3-16-22-139.us-east-2.compute.amazonaws.com:/home/ubuntu

# Cloud
unzip -o models-in-production.zip

# Cloud, restart service
python app.py
```

In [None]:
# Browser with Public DNS.
req.get('http://ec2-35-167-97-63.us-west-2.compute.amazonaws.com:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

## Segundo modelo

In [None]:
def fit_random_forest(X_train, y_train):
    """
        Fits a Random Forest model, saves the model weights, uploads to S3 and return it.
    """    
    rfc = RandomForestClassifier(n_jobs=-1, criterion = 'entropy', random_state = 0, max_features= 'sqrt')  

    param_grid = {  
               "n_estimators" : [5, 8, 10],  
               "max_depth" : [4, 5, 6],  
               "min_samples_leaf" : [8, 10, 12]}  

    CV_rf = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5)
    
    CV_rf.fit(X_train, y_train)    
    print(f'best_params: {CV_rf.best_params_}')
    
    file_name = 'model.pkl'
    with open(os.path.join(FOLDER, file_name), 'wb') as f:
        pickle.dump(model, f)
    
    # TODO Part 2, uncomment all Step 2 above in fit_normalizer() and fit_encoder() and run
    # au.upload_to_s3(BUCKET_NAME, FOLDER, file_name)   
    
    return CV_rf

In [None]:
model = fit_random_forest(X_train, y_train)

In [None]:
# Updated in S3 UI

In [None]:
y_pred = model.predict(X_test)

In [None]:
accuracy_score(y_test, Y_pred)

In [None]:
# Browser with Public DNS.
req.get('http://ec2-35-167-97-63.us-west-2.compute.amazonaws.com:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

## Tercer Modelo

In [None]:
x_train, x_test, Y_train, Y_test = train_test_split(X, Y, random_state=1, test_size = .33)

In [None]:
def print_figure(training_values, validation_values, metric):
    epochs = range(1, len(training_values) + 1)
    
    plt.clf()
    plt.plot(epochs, training_values, 'bo', label='Training ' + metric) # bo es blue dot
    plt.plot(epochs, validation_values, 'b', label='Validation ' + metric)
    plt.title('Training and validation ' + metric)
    plt.xlabel('Epochs')
    plt.ylabel(metric)
    plt.legend()

    plt.show()   

In [None]:
def print_loss(history):
    history_dict = history.history
    loss_values = history_dict['loss']
    val_loss_values = history_dict['val_loss']
    
    print_figure(loss_values, val_loss_values, 'Loss')

In [None]:
def print_acc(history):
    history_dict = history.history
    acc_values = history_dict['accuracy']
    val_acc_values = history_dict['val_accuracy']
    
    print_figure(acc_values, val_acc_values, 'Accuracy')

In [None]:
# Simple Neural Network
model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(4,)))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
x_val = x_train[:50]
partial_x_train = x_train[50:]

y_val = Y_train[:50]
partial_y_train = Y_train[50:]

In [None]:
partial_x_train.shape

In [None]:
partial_y_train.shape

In [None]:
%time history = model.fit(partial_x_train, partial_y_train, epochs=30, batch_size=10, validation_data=(x_val, y_val))

In [None]:
# Serialize model to JSON
file_name = 'model.json'
model_json = model.to_json()

In [None]:
# Network architecture
model_json

In [None]:
with open(os.path.join(FOLDER, file_name), "w") as json_file:
    json_file.write(model_json)
au.upload_to_s3(BUCKET_NAME, FOLDER, file_name) 
print("Saved model definition to disk")

In [None]:
# Serialize weights to HDF5
file_name = 'model.h5'
model.save_weights(os.path.join(FOLDER, file_name))
au.upload_to_s3(BUCKET_NAME, FOLDER, file_name) 
print("Saved model weights to disk")

In [None]:
# Check in S3 UI

In [None]:
print_loss(history)

In [None]:
print_acc(history)

In [None]:
# TODO Part 3, uncomment all Part 3 in flask predictions.predict()

In [None]:
# Browser
req.get('http://localhost:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

```bash
# Stop service in instance

# Local: Zip the new app
zip -r models-in-production.zip app.py predictions.py aws_utils.py config.py requirements.txt dist

# Local: Upload the new packaged app
scp models-in-production.zip ubuntu@ubuntu@ec2-3-16-22-139.us-east-2.compute.amazonaws.com:/home/ubuntu

# Cloud
unzip -o models-in-production.zip

# Cloud: restart service
python app.py
```

In [None]:
# Browser with Public DNS.
req.get('http://ec2-35-167-97-63.us-west-2.compute.amazonaws.com:5000/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()

## Serverless

```bash
# Clean Virtual Environment
rm -rf env

# Sk-learn version
git reset --hard
git checkout zappa

# New  Virtual Environment
virtualenv env --python=python3.7
source env/bin/activate
pip3 install -r requirements.txt

# Inside virtual environment
pip install zappa

# Generate config file
zappa init

# Add
# "s3_bucket": "models-in-prod",
# "slim_handler": true

# First deploy
zappa deploy dev

# Later deploys
zappa update dev
```

![zappa-live](img/zappa-live-white.png)

In [None]:
%%time
req.get('https://4j4x4rgjkd.execute-api.us-east-2.amazonaws.com/dev/classify?sepal_length=5.1&sepal_width=3.5&petal_length=1.4&petal_width=0.2').json()