In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

In [1]:
N = 200 #Number of observations per class
D = 2   #Number of features
K = 3   #Number of classes
X = np.zeros((N * K, D))
y = np.zeros(N * K, dtype = 'uint8')
for j in range(K):
    ix = range(N * j, N * (j + 1))
    r = np.linspace(0, 1, N) 
    np.random.seed(j)
    t = np.linspace(j * 4,(j + 1) * 5, N) + np.random.randn(N) * 0.25 
    X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
    y[ix] = j

#Plot Data
cdict = {0: 'red', 1: 'blue', 2: 'green'}
plt.figure(figsize = (8, 8))
for i in np.unique(y):
    indices = np.where(y == i)
    plt.scatter(x = X[indices, 0], y = X[indices, 1], c = cdict[i], 
                label = i, marker = "o", alpha = 0.7)
plt.legend()

In [1]:
X.shape

# Devide your dataset into train(80%) and test(20%).


In [1]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123)
print(f'The shape of X_train is: {X_train.shape}')
print(f'The shape of y_train is: {y_train.shape}')
print(f'The shape of X_test is: {X_test.shape}')
print(f'The shape of y_test is: {y_test.shape}')

In [1]:
pd.Series(y_train).value_counts()
# It seems that in each category the distribution of data is balanced.

In [1]:
pd.Series(y_test).value_counts()
# It seems that in each category the distribution of data is balanced.

In [1]:
# Visualize the train and test dataset to see how data are distributed.
cdict = {0: 'red', 1: 'blue', 2: 'green'}
plt.figure(figsize = (8, 8))
for i in np.unique(y_train):
    indices = np.where(y_train == i)
    plt.scatter(x = X_train[indices, 0], y = X_train[indices, 1], c = cdict[i], 
                label = i, marker = "o", alpha = 0.7)
plt.legend()
plt.title('X_train', size = 20)

In [1]:
# Visualize the train and test dataset to see how data are distributed.
cdict = {0: 'red', 1: 'blue', 2: 'green'}
plt.figure(figsize = (8, 8))
for i in np.unique(y_test):
    indices = np.where(y_test == i)
    plt.scatter(x = X_test[indices, 0], y = X_test[indices, 1], c = cdict[i], 
                label = i, marker = "o", alpha = 0.7)
plt.legend()
plt.title('X_test', size = 20)

### Q1: Create softmax regression to classify the observations on train dataset.
###    Use the model to predict on test dataset. Report the accuracy of your model.
###    Visualize your results.

# Softmax Regression

In [1]:
type(X_train)

In [1]:
features_train = tf.convert_to_tensor(X_train, dtype = tf.float32)
features_train

In [1]:
response_train = tf.convert_to_tensor(y_train, dtype = tf.float32)
response_train

In [1]:
n_features = features_train.shape[1]
n_outcomes = len(np.unique(response_train))
print(f'The Number of Features : {n_features}')
print(f'The Number of Outcomes : {n_outcomes}')


In [1]:
# One-hot Encoding for Categorical Variable
response_one_hot = tf.one_hot(response_train.numpy(), depth = n_outcomes)
response_one_hot

In [1]:
# Reading Data in Batch
def read_batch(batch_size, X, y):
    sample_size = X.shape[0]
    indices = list(range(sample_size))
    np.random.shuffle(indices)           #read data at random
    for i in range(0, sample_size, batch_size):
        batch_indices = tf.constant(indices[i : min(i + batch_size, sample_size)])
        yield tf.gather(X, batch_indices), tf.gather(y, batch_indices)
        #use yield to iterate over a sequence, but not to store the entire sequence in memory

In [1]:
#Initialize Model Parameters
w = tf.Variable(tf.random.normal(shape = (n_features, n_outcomes), mean = 0, stddev = 0.01))
b = tf.Variable(tf.zeros(n_outcomes))

In [1]:
# Softmax Function
def softmax(X):
    return tf.exp(X) / tf.reduce_sum(tf.exp(X), 1, keepdims = True) #reduction over rows

In [1]:
# Prediction Function
def pred_func(w, b , X):
    return softmax(tf.matmul(tf.reshape(X, (-1, w.shape[0])), w) + b)

In [1]:
# Define the Optimization Algorithm
def sgd(params, grads, learning_rate, batch_size):
    # Batch Stochastic Gradient Descent
    for param, grad in zip(params, grads):
        param.assign_sub(learning_rate * grad / batch_size)

In [1]:
batch_size = 30
learning_rate = 0.3
num_epochs = 100
losses = []
for epoch in range(num_epochs):
    for X, y in read_batch(20, features_train, response_train):
        #Compute Gradients and Update Parameters
        with tf.GradientTape() as g:
            #One-hot Encoding for Categorical Variable
            y_one_hot = tf.one_hot(y.numpy(), depth = n_outcomes)
            y_pred = pred_func(w, b, X)
            loss   = tf.keras.losses.categorical_crossentropy(y_one_hot, y_pred)
            dloss_w, dloss_b = g.gradient(loss, [w, b])
        #Update parameters using their gradients
        sgd([w, b], [dloss_w, dloss_b], learning_rate, batch_size)
    train_l = tf.keras.losses.categorical_crossentropy(response_one_hot, pred_func(w, b, features_train))
    losses.append(float(tf.reduce_mean(train_l)))
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)): 0.4f}')

In [1]:
plt.plot(losses);

In [1]:
#Prediction on Train 
y_pred_train = pred_func(w = w, b = b, X = features_train)
y_pred_train

In [1]:
y_pred_train = np.argmax(y_pred_train, axis = 1)
y_pred_train

In [1]:
from sklearn.metrics import accuracy_score, confusion_matrix
print(f'The accuracy is : {accuracy_score(y_pred_train, y_train)} \n\n The confusion matrix is :\n\n {confusion_matrix(y_pred_train, y_train)} ')

## Prediction On Test

In [1]:
features_test = tf.convert_to_tensor(X_test, dtype = tf.float32)
features_test

In [1]:
response_test = tf.convert_to_tensor(y_test, dtype = tf.float32)
response_test

In [1]:
#Prediction on Test
y_pred_test = pred_func(w = w, b = b, X = features_test)
y_pred_test

In [1]:
y_pred_test = np.argmax(y_pred_test, axis = 1)
y_pred_test

In [1]:
from sklearn.metrics import accuracy_score, confusion_matrix
print(f'The accuracy is : {accuracy_score(y_pred_test, y_test)} \n\n The confusion matrix is :\n\n {confusion_matrix(y_pred_test, y_test)} ')

In [1]:
#Plot Data
cdict = {0: 'red', 1: 'blue', 2: 'green', 3: 'yellow'}
plt.figure(figsize = (6, 6))
for i in np.unique(response_test):
    indices = np.where(response_test == i)
    plt.scatter(x = features_test.numpy()[indices, 0], y = features_test.numpy()[indices, 1], 
                c = cdict[i], label = i,
                marker = "o", alpha = 0.7)
plt.legend()

#Plot Regions
x1, x2 = np.meshgrid(np.linspace(features_test.numpy().min() - 1, features_test.numpy().max() + 1, 500), 
                     np.linspace(features_test.numpy().min() - 1, features_test.numpy().max() + 1, 500))
grids = np.array((x1.ravel(), x2.ravel())).T
grids = tf.convert_to_tensor(grids, dtype = tf.float32)
region_pred = pred_func(w = w, b = b, X = grids)
region_color = np.argmax(region_pred, axis = 1)
region_color = region_color.reshape(500, 500)
plt.contourf(x1, x2, region_color, alpha = 0.1, levels = [0, 0.5, 1, 2, 3], 
             colors = ['red', 'blue', 'green', 'yellow'])

# Q2: Create a multi layer perceptron to classify the observations on train dataset.
* **Use the model to predict on test dataset. Report the accuracy of your model.**
* **Visualize your results.**

# Multi-layer perceptron

In [1]:
from keras import Sequential
from keras.layers import Dense

In [1]:
#Scale Features
from sklearn.preprocessing import StandardScaler
#Initialize the scaler
scaler = StandardScaler()
# fit Scaler on features
X_train = scaler.fit_transform(X_train)

In [1]:
y_train = keras.utils.to_categorical(y_train)
y_train

In [1]:
model = Sequential()
model.add(Dense(6, activation = 'tanh', input_shape = (2, )))
model.add(Dense(6, activation = 'tanh'))
model.add(Dense(6, activation = 'tanh'))
model.add(Dense(6, activation = 'tanh'))
model.add(Dense(3, activation = 'softmax'))

#Configure the Model
opt = keras.optimizers.RMSprop() 
model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [1]:
#Train the Model
history = model.fit(X_train, y_train, epochs = 500, batch_size = 32, verbose = 1, validation_split  = 0.2)

In [1]:
#Loss - Epochs
plt.figure(figsize = (8, 6))
plt.plot(model.history.history['loss'], label = 'train')
plt.plot(model.history.history['val_loss'], alpha = 0.7, label = 'test')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.grid()

In [1]:
#Loss - Epochs
plt.figure(figsize = (8, 6))
plt.plot(model.history.history['accuracy'], label = 'train')
plt.plot(model.history.history['val_accuracy'], alpha = 0.7, label = 'test')
plt.ylabel('Accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc = 'upper left')
plt.grid()

In [1]:
y_pred = model.predict(X_train)
y_pred

In [1]:
y_pred = np.argmax(y_pred, axis = 1)
y_pred

In [1]:
accuracy_score(y_pred, np.argmax(y_train, axis = 1))

In [1]:
#Scale Features
from sklearn.preprocessing import StandardScaler
#Initialize the scaler
scaler = StandardScaler()
# fit Scaler on features
X_test = scaler.fit_transform(X_test)

In [1]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis = 1)

In [1]:
accuracy_score(y_pred, y_test)

In [1]:
confusion_matrix(y_pred, y_test)