# Goal:

- provide a plaintext interface to analyze step-by-step what is happening in the encrypted code

- Used as a Python sanity check because I'm not that familiar with R.

In [None]:
import numpy as np

np.random.seed(42)
import pandas as pd
from sklearn.metrics import log_loss


# Load and Process the Data

In [None]:

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def fwd(train_x, betas, dbg=False):
    preds = train_x @ betas
    if dbg:
        print(f"Logits: {preds}")
    return np.expand_dims(sigmoid(preds), -1)


def calculate_gradient(train_x, train_y, betas, fwd, dbg):
    preds = fwd(train_x, betas, dbg)
    gradient = -train_x.T @ (train_y - preds) / len(train_y)
    return gradient


def cost(x, y, theta):
    m = x.shape[0]
    h = sigmoid(np.matmul(x, theta))

    t1 = np.matmul(-y.T, np.log(h))

    t2_a = (1 - y.T)
    t2_b = np.log(np.clip(1 - h, 0.000000000000001, np.max(1 - h)))  # Used to get numerical issues
    t2 = np.matmul(t2_a, t2_b)

    return ((t1 - t2) / m)[0]


def load_data():
    """
    Load data to compare against our reference Python (which was validated against R
    """

    x_file = "../train_data/X_norm_1024.csv"
    y_file = "../train_data/y_1024.csv"
    train_x = pd.read_csv(x_file)
    train_y = pd.read_csv(y_file)
    train_y = train_y.to_numpy()
    return train_x, train_y


def nesterov(betas, epochs, lr, mu, train_x, train_y, breakout=True):
    import copy

    phi = copy.deepcopy(betas)
    theta = copy.deepcopy(betas)

    nesterov_loss = [0 for _ in range(epochs)]
    # for i in tqdm.trange(epochs):
    for i in range(epochs):
        gradient = calculate_gradient(train_x, train_y, theta, fwd, dbg=False)

        phi_prime = theta - lr * np.squeeze(gradient)

        if i == 0:
            theta = phi_prime
        else:
            theta = phi_prime + mu * (phi_prime - phi)

        phi = phi_prime
        loss = cost(train_x, train_y, theta)
        nesterov_loss[i] = loss
    return nesterov_loss



In [None]:
train_x, train_y = load_data()
lr = 0.1
mu = 0.1

# Same shape as Marcelo's reference code
betas = np.zeros((10, ))

# Analysis

1) We got the `loss_exact` and `loss_estimates` by modifying the relevant parts of Marcelo's code. Specifically the `cheb.gr` function in `chebyshev_approx.R`, 

```python
p = cheb.sigmoid_approx(z, degree, range)
# -- or --
p = cheb.sigmoid(z)
```

where one gave us the exact NAG and the other was the approximate

In [None]:
class Config():
    def __init__(self, name, degree):
        self.name = name
        self.degree = degree
        self.data = None

    def load_data(self):
        with open(f"raw_data/nag_{self.degree}_loss.csv", "r") as f:
            data = f.read()
            train_losses = []
            for i, ln in enumerate(data.split("\n")):
                if i == 0 or i == len(data.split("\n")) - 1:
                    continue
                else:
                    train_losses.append(float(ln.split(",")[1]))
        self.data = np.asarray(train_losses)

data_list = [
    # Config("Degree16", -16, 16, 32)
    Config("Degree128", 128),
    Config("Degree119", 119)
]

for el in data_list:
    el.load_data()

In [None]:
num_epochs = min([len(data.data) for data in data_list])

loss_my_nesterov = nesterov(betas, num_epochs, lr, mu, train_x, train_y, breakout=False)

# Plots

## Plot the exact losses

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 10]

xs = range(num_epochs)
plt.plot(xs, loss_my_nesterov, color='r', label='Python Exact NAG Losses')
for data in data_list:
    
    plt.plot(xs, data.data, label=f'Train losses for Approx(degree={data.degree}')
    final_loss = data.data[-1]
    plt.scatter(xs[-1], final_loss)
    plt.annotate(f"{final_loss: .8f}", (xs[-1]-5, final_loss), annotation_clip=False)

# Naming the x-axis, y-axis and the whole graph
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss against Epoch for Various Implementations")
plt.grid()
# Adding legend, which helps us recognize the curve according to it's color
plt.legend()

plt.savefig("train_loss_plots/losses")
    
plt.show()

## Plot the differences

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 10]


xs = range(num_epochs)
for data in data_list:
    
    plt.plot(xs, loss_my_nesterov - data.data, label=f'Python(NAG) - Approx(degree={data.degree})')
    
    final_loss = loss_my_nesterov[-1] - data.data[-1]
    plt.scatter(xs[-1], final_loss)
    plt.annotate(f"{final_loss: .8f}", (xs[-1]-5, final_loss-0.000025), annotation_clip=False)
  
# Naming the x-axis, y-axis and the whole graph
plt.xlabel("Epoch")
plt.ylabel("Error")
plt.title("Epoch against Error")
plt.grid()
# Adding legend, which helps us recognize the curve according to it's color
plt.legend()

plt.savefig("train_loss_plots/losses_diff")
    
plt.show()