# Neural Nets and Deep Learning
#### Slides:
http://bit.ly/NeuralNets_Dojo_ML

## 1. Installation Section
### 1.1 TensorFlow 

In [None]:
%%sh
pip -V

In [None]:
%%sh
# easy_install --upgrade pip
# easy_install --upgrade six 

In [None]:
%%sh
pip install tensorflow

In [None]:
#Validate your Installation
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))


### 1.2 Keras

In [None]:
%%sh
pip install keras

In [None]:
# By default, Keras will use TensorFlow as its tensor manipulation library :) 

### 1.3 Scikit learn

In [None]:
%%sh
pip install sklearn

### 1.4 matplotlib

In [None]:
%%sh
pip install matplotlib

## 2. Neural Nets Examples
### 2.1 Diabetes - Pima Dataset

Dataset Description:

5. Number of Instances: 768

6. Number of Attributes: 8 plus class 

7. For Each Attribute: (all numeric-valued)
   1. Number of times pregnant
   2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test
   3. Diastolic blood pressure (mm Hg)
   4. Triceps skin fold thickness (mm)
   5. 2-Hour serum insulin (mu U/ml)
   6. Body mass index (weight in kg/(height in m)^2)
   7. Diabetes pedigree function
   8. Age (years)
   9. Class variable (0 or 1)

8. Missing Attribute Values: Yes

9. Class Distribution: (class value 1 is interpreted as "tested positive for
   diabetes")

   Class Value  Number of instances
   0            500
   1            268


In [None]:
seed = 10

(=>) Download the dataset from this url and save the file as "pima.csv"
http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data

In [None]:
from keras.models import Sequential
from keras.layers import Dense  #Just Dense or fully connected neurons
import numpy

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score, f1_score
import matplotlib.pyplot as plt

In [None]:
numpy.random.seed(seed)

In [None]:
def plotPerformance(history):
    # summarize history for accuracy  

    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

    # summarize history for loss  

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
# load pima indians dataset
dataset = numpy.loadtxt("pima.csv", delimiter=",")
dataset.shape

In [None]:
print(dataset)

In [None]:
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]


In [None]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer="uniform", activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

In [None]:
# Compile model
optimizer='sgd' #adam, sgd
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [None]:
# Fit the model
history = model.fit(X, Y, validation_split=0.20,epochs=140, batch_size=10,  verbose=2)


In [None]:
# evaluate the model
loss, acc_t = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], acc_t*100))

In [None]:
# calculate predictions
probabilities = model.predict(X)
predictions = [float(round(x[0])) for x in probabilities]
accuracy = numpy.mean(predictions == Y)
print("Prediction Accuracy: %.2f%%" % (accuracy*100))

In [None]:
 print(history.history.keys())  

In [None]:
# Let's create a plot to evaluate the performance
print (model.optimizer)
print("Accuracy: %.2f%%" % (acc_t*100))
plotPerformance(history)

##### Let's try a new optimizer 
##### Let's play with optimization algorithms and epoch numbers! 

### 2.2 MNIST

In [None]:
import numpy as np
import keras
import pandas as pd
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import matplotlib.pyplot as plt

In [None]:
#10 Digits to predict
num_classes = 10  

batch_size = 128

epochs = 2

In [None]:
# input image dimensions
img_rows, img_cols = 28, 28

In [None]:
# Train and test dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# Explore the data, let's create some plots
plt.subplot(221)
plt.imshow(x_train[80], cmap=plt.get_cmap('gray'))
plt.subplot(222)
plt.imshow(x_train[89], cmap=plt.get_cmap('gray'))
plt.subplot(223)
plt.imshow(x_train[14], cmap=plt.get_cmap('gray'))
plt.subplot(224)
plt.imshow(x_train[3], cmap=plt.get_cmap('gray'))
# show the plot
plt.show()

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.DataFrame(x_train[14])

In [None]:
# Reduce the dataset
# numberOfExamplesTrain = 5000
# numberOfExamplesTest = 1000
# x_train = x_train[0:numberOfExamplesTrain]
# y_train = y_train[0:numberOfExamplesTrain]
# x_test = x_test[0:numberOfExamplesTest]
# y_test = y_test[0:numberOfExamplesTest]

In [None]:
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)


In [None]:
# Normalize data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


In [None]:
print("Train Set classes frequency")
unique, counts = np.unique(y_train, return_counts=True)
print(np.asarray((unique, counts)).T)

print("Test Set classes frequency")
unique, counts = np.unique(y_test, return_counts=True)
print(np.asarray((unique, counts)).T)

In [None]:
# convert class vectors to binary class matrices
y_test_bk = y_test.copy()
y_train_bk = y_train.copy()

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
y_train.view()

In [None]:
# Neural net definition
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

In [None]:
model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_auc_score, f1_score
predictions = model.predict_classes(x_test)
pred_probs = model.predict_proba(x_test)


print("\nMatriz de confusion")
print(confusion_matrix(y_test_bk, predictions, labels=range(0,10)))
print("\nAccuracy Score")
print(accuracy_score(y_test_bk, predictions))