# Assignment 3

You are given the a code that creates a dataset of two moons surrounded by a circle.
Each moon and the circle represent a different class.
And the lecture code that find linear separation of two moons.

* **You are required to** modify this code to find a non-linear  seperation between the three classes.

* Submissions with maximum validation accuracy **higher than 50%** will be eligible to receive **bonus credits**.

In [None]:
from sklearn.datasets import make_moons
from math import floor
import numpy as np
from matplotlib.pyplot import figure
def create_dataset(n=100, noise=0.05, random_state=5, center=(0.5, 0), radius=1.5):
    def make_circle(center, radius, num_points):
        theta = np.linspace(0, 2*np.pi, num_points)
        r = np.full_like(theta, radius)
        cx, cy = center
        x = cx + r * np.cos(theta)
        y = cy + r * np.sin(theta)
        data = np.array([x, y]).T
        return data, np.array([0 for _ in range(len(data))])
    xm, ym = make_moons(n_samples=n*2, shuffle=True, noise=noise, random_state=random_state)
    xc, yc = make_circle(center=center, radius=radius,num_points=n)
    yc[yc==0] = 2
    y = np.vstack((ym.reshape(len(ym), 1), yc.reshape(len(yc), 1)))
    x = np.vstack((xm, xc))
    np.random.seed(random_state)
    perm_indices = np.random.permutation(len(y))
    return x[perm_indices], y[perm_indices]
N = 100
NOISE = 0
TRAIN_PERCENT = 0.8
SEED = 5
CENTER = (0.5, 0)
RADIUS = 1.5

x, y = create_dataset(n=N, noise=NOISE, random_state=SEED, center=CENTER, radius=RADIUS)
x_train, y_train = x[:floor(TRAIN_PERCENT*N*3), :], y[:floor(TRAIN_PERCENT*N*3)]
x_valid, y_valid = x[floor(TRAIN_PERCENT*N*3):, :], y[floor(TRAIN_PERCENT*N*3):]
fig = figure(figsize = (10, 10))
ax = fig.add_subplot(1, 1, 1)
train_zero_indices = np.argwhere(y_train == 0)
train_one_indices = np.argwhere(y_train == 1)
train_two_indices = np.argwhere(y_train == 2)
ax.plot(x_train[train_zero_indices, 0], x_train[train_zero_indices, 1], "g.")
ax.plot(x_train[train_one_indices, 0], x_train[train_one_indices, 1], "k.")
ax.plot(x_train[train_two_indices, 0], x_train[train_two_indices, 1], "r.")

### General notes:
* Make sure you tagged the cell that contain your solution with "solution" tag
* Make sure this cell result is independent of any other cells. In other words, it will work even if it was the only cell in the notebook.

In [None]:
def solution(x_train, y_train, x_valid, y_valid, EPOCHS=50):
    # Don't change the input parameters
    """
    x_train and x_valid are numpy arrays with shape (N, 2)
    y_train, y_valid are numpy arrays with shape (N,)
    Each y has three target classes (0, 1, 2)
    representing the two moons and the circle

    The code below only works with two moons problems
    represented in the lecture.

    You need to modify this code to adapt to the three classes case.
    """
    import torch
    def loss_fn(x, y):
        y_hat = predict(x)
        return torch.mean(-(y*torch.log(y_hat)).sum(dim = 1))
    x_train, y_train = torch.tensor(x_train), torch.tensor(y_train)
    x_valid, y_valid = torch.tensor(x_valid), torch.tensor(y_valid)

    y_train = torch.nn.functional.one_hot(y_train, num_classes = 2)
    y_valid = torch.nn.functional.one_hot(y_valid, num_classes = 2)

    PROJECTION_DIM = 2

    torch.manual_seed(5)
    w_input = torch.randn(size = (PROJECTION_DIM, 2))
    w_input = w_input.to(torch.float64)
    w_input.requires_grad = True
    w_input.retain_grad()

    b_input = torch.randn(size = (1, PROJECTION_DIM))
    b_input = b_input.to(torch.float64)
    b_input.requires_grad = True
    b_input.retain_grad()

    # if added extra layers of weights
    # add them to the lists below
    # so that they will be used with the predict method.
    w = [w_input]
    b = [b_input]

    def predict(x, w, b):
        w_input = w[0]
        b_input = b[0]
        a = x @ w_input + b_input
        # you will need to edit the weights shape
        # and you may add extra layers for better accuracy
        # so, modify this predict function accordingly
        # but don't change the return
        return torch.softmax(a, dim = 1)

    DELTA = 0.1
    train_loss, valid_loss = [], []
    train_accuracy, valid_accuracy = [], []
    for _ in range(EPOCHS):
        curr_train_loss = loss_fn(x_train, y_train)
        curr_train_loss.backward()
        train_loss.append(curr_train_loss.data)
        # write code here
        # that calculates the training accuracy and
        # appends it to the training accuracy list
        with torch.no_grad():
            curr_valid_loss = loss_fn(x_valid, y_valid)
            valid_loss.append(curr_valid_loss.data)
            # write code here
            # that calculates the validation accuracy and
            # appends it to the validation accuracy list
            w_input -= DELTA * w_input.grad.data
            b_input -= DELTA * b_input.grad.data
            w_input.grad.zero_() #inplace
            b_input.grad.zero_()

    """
    you should return 4 python lists
    train_loss, valid_loss, train_accuracy, valid_accuracy
    and the two weights lists w_inputs, b_inputs
    finally it returns the predict function
    so that it can be used along with your w_inputs and b_inputs
    to predict any new datapoints.
    """
    return w, b, train_loss, valid_loss, train_accuracy, valid_accuracy, predict