<a href="https://colab.research.google.com/github/sherna90/inteligencia_artificial/blob/master/7.-redes_neuronales_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

In [None]:
print(tf.__version__)

In [None]:
import numpy as np
import random 
import matplotlib.pyplot as plt


# Training data
X = np.linspace(0,11,num=100)
y = np.asarray([6*x**2 + 8*x + 2 for x in X]) # y = 6x^2 + 8x + 2
#y=np.asarray([np.sin(x)+np.random.normal(0,0.2) for x in X]) # y = 6x^2 + 8x + 2

plt.plot(X,y)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
len(X_test)

In [None]:
plt.scatter(X_train,y_train)

In [None]:
plt.scatter(X_test,y_test,c='blue')

In [None]:
# Trainable variables
theta=[tf.Variable(np.random.normal(0,0.1), trainable=True,dtype=tf.float64) for i in range(3)]
# Loss function

def loss(real_y, pred_y,theta):
    return tf.reduce_mean(tf.sqrt((real_y - pred_y)**2))

# Step function
def step(real_x, real_y):
    with tf.GradientTape(persistent=True) as tape:
        # Make prediction
        pred_y=tf.math.polyval(theta, real_x)
        # Calculate loss
        poly_loss = loss(real_y, pred_y,theta)
    
    # Calculate gradients
    grads = tape.gradient(poly_loss, theta)
    # Update variables
    for par,grad in zip(theta,grads):
        par.assign_sub(grad * 0.001)
    return poly_loss.numpy()

# Training loop
for i in range(10000):
    iter_loss=step(X_train, y_train)
    if i%1000==0:
        print('iteration : {0}, loss : {1} '.format(i,iter_loss))
print('------------------------------------')
print(f'y ≈ {theta[0].numpy()}x^2 + {theta[1].numpy()}x + {theta[2].numpy()}')


La solucion del problema de estimacion de coeficientes puede ser calculada en forma cerrada mediante minimos cuadrados (https://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html)

In [None]:
from sklearn.preprocessing import PolynomialFeatures

X_mat = np.flip(PolynomialFeatures(degree=2).fit_transform(X_train.reshape(-1,1)).transpose(),0)
inv_mat=np.linalg.inv(np.matmul(X_mat,X_mat.T))
theta_hat=np.matmul(inv_mat,np.matmul(X_mat,y_train))
print('y ≈ {0:0.2f}x^2 + {1:0.2f}x + {2:0.2f}'.format(*theta_hat.tolist()))

Podemos utlizar los coeficientes obtenidos mediante minimos cuadrados

In [None]:
y_pred_exact=[]
for x in X_test:
    y_hat=np.polyval(theta_hat,x)
    y_pred_exact.append(y_hat)

Se compara con las predicciones obtenidas mediante descenso del gradiente

In [None]:
y_pred=[]
for x in X_test:
  y_hat=tf.math.polyval(theta, x)
  y_pred.append(y_hat.numpy())

In [None]:
plt.figure(figsize=(15,8))
plt.subplot(1, 2, 1)
plt.scatter(X_test,y_pred_exact,c='green')
plt.title('forma cerrada')
plt.subplot(1, 2, 2)
plt.scatter(X_test,y_pred,c='blue')
plt.title('descenso del gradiente')

El método del descenso del gradiente es numericamente inestable cuando crece el orden del polinomio. Por lo tanto, se necesitan mecanismos de regularización o bien mecanismos de optimización más robustos (Nesterov)

https://stats.stackexchange.com/questions/350130/why-is-gradient-descent-so-bad-at-optimizing-polynomial-regression

In [None]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,Input
from tensorflow.keras.optimizers import RMSprop,SGD
from tensorflow.keras import regularizers

model = Sequential()
model.add(Input(1))
model.add(Dense(1,activation='linear'))
model.summary()

In [None]:
epochs=500
opt = RMSprop(1e-3)
model.compile(loss='mean_squared_error',
              optimizer=opt,
              metrics=['mean_squared_error'])
history = model.fit(X_train, y_train,epochs=epochs, verbose=0)
score = model.evaluate(X_test, y_test, verbose=0)

In [None]:
plt.plot(history.history["loss"])

In [None]:
y_pred=model.predict(X_test)

In [None]:
plt.figure(figsize=(15,8))
plt.subplot(1, 2, 1)
plt.scatter(X_test,y_pred_exact,c='green')
plt.title('forma cerrada')
plt.subplot(1, 2, 2)
plt.scatter(X_test,y_pred,c='blue')
plt.title('descenso del gradiente keras')

# Redes Neuronales Tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop,SGD

# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train=x_train.reshape((-1,784))
x_test=x_test.reshape((-1,784))
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)


In [None]:
from tensorflow.keras import regularizers

model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(784,)))
model.add(Dense(10, activation='softmax',kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-2)))
model.summary()

epochs=100
sgd = SGD(lr=0.01, decay=0, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])
history = model.fit(x_train, y_train,
                    batch_size=128,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)

In [None]:
print(score)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

N = np.arange(0, epochs)
plt.style.use("ggplot")

fig = plt.figure(figsize=(15,7))
fig.subplots_adjust(hspace=0.4, wspace=0.4)

plt.subplot(1, 2, 1)
plt.plot(N, history.history["loss"], label="train_loss")
plt.plot(N, history.history["val_loss"], label="val_loss")
plt.title("Train/Val Loss (Simple NN)")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(N, history.history["accuracy"], label="train_acc")
plt.plot(N, history.history["val_accuracy"], label="val_acc")
plt.title("Train/Val Accuracy (Simple NN)")
plt.xlabel("Epoch #")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    #print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

In [None]:
y_pred_nn=model.predict(x_test)
print(classification_report(y_test.argmax(axis=1), y_pred_nn.argmax(axis=1)))

In [None]:
plot_confusion_matrix(y_test.argmax(axis=1), y_pred_nn.argmax(axis=1),classes=np.arange(num_classes))