#### Timing for data prep and NN Prox Op calculation

Compared to timing for exact prox op function

#### System Set-Up

In [None]:
import numpy as np
from matplotlib import pyplot as plt 
import time as time
import sys
import platform, psutil
from numba import jit, prange

import torch
from torch import nn

from prox_op import prox_op
from data_fcns import generate_raw_data, vanilla_scaling, compute_features

In [None]:
# python version

print(sys.version)

In [None]:
# get CPU info

print(platform.processor())
print(platform.machine())
print(platform.version())
print(platform.platform())
print(platform.uname())
print(platform.system())
print(str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB")


In [None]:
# get GPU info

print(torch.cuda.is_available())

if torch.cuda.is_available():
    print(torch.cuda.device_count())
    print(torch.cuda.current_device())
    print(torch.cuda.device(0))
    print(torch.cuda.get_device_name(0))

#### Generate Raw Data

In [None]:
nn_type = "feature"  # vanilla or feature
data_dist = "both"   # norm, unif, or both
unif_min = 0
unif_max = 1
min_len = 100000
max_len = 100000
num_vec = 10000
seed = 1
num_moments = 10

X, lengths, alphas, taus = generate_raw_data(data_dist, min_len, max_len, num_vec, unif_min, unif_max, seed)

### Exact Prox Op Times

In [None]:
# SERIAL

t1 = time.perf_counter()

PROX_OG = np.zeros(X.shape)
for i in range(PROX_OG.shape[0]):
    PROX_OG[i,:] = prox_op(X[i,:], alphas[i])[0]

t2 = time.perf_counter()    

print(f"Total Time: {t2-t1}")
print(f"Average Time per Vector: {(t2-t1)/X.shape[0]}")

In [None]:
@jit(nopython=True)
def prox_op_jit(x, alpha):
    
    # same function as prox_op in prox_op.py -- just with a jit decorator
    
    m = len(x)
    
    if alpha >= np.linalg.norm(x,1):
        tstar = 0
        istar = m
        prox = np.zeros(x.shape)
    else:
        # permute x to be in decreasing order in abs value
        s = np.sort(np.abs(x))[::-1]
        s = np.append(s,0)
    
        # find value for minimizer    
        tstar = 0
        istar = m
        s_sum = 0
        i = 0
        while i < m:  # len(x) = m
            s_i = s[i]
            s_sum = s_sum + s_i
            
            # check for repeated elements
            j = 1
            while (i+j < m) and s[i+j] == s_i:  
                s_sum = s_sum + s_i
                j = j+1
            
            i = i + (j-1)

            t0 = (s_sum - alpha)/(i+1)  # minimizer

            if (t0 <= s[i]) and (t0 > s[i+1]): 
                tstar = t0
                istar = i+1
                break

            i = i+1
        # end while
        
        # compute proximal operator
        prox = x.copy()
        idx = (np.abs(x) > tstar)
        prox[idx] = np.sign(x[idx])*tstar
            
    return prox, tstar, istar

In [None]:
@jit(nopython=True, parallel=True)
def compute_prox_op_og_jit(X,pred_tau):
    
    PROX = np.zeros(X.shape)
    for i in prange(PROX.shape[0]):
        PROX[i,:] = prox_op_jit(X[i,:], alphas[i])[0]

    return PROX

In [None]:
t1 = time.perf_counter()
compute_prox_op_og_jit(X,alphas)
t2 = time.perf_counter()

t3 = time.perf_counter()
P0 = compute_prox_op_og_jit(X,alphas)
t4 = time.perf_counter()

print(f"Total Time + COMPILATION: {t2-t1}")
print(f"Average Time per Vector + COMPILATION: {(t2-t1)/X.shape[0]}")

print(f"Total Time: {t4-t3}")
print(f"Average Time per Vector: {(t4-t3)/X.shape[0]}")

In [None]:
print(np.array_equal(PROX_OG, P0))

### Use Features NN

#### Compute Features -- TIMED (see below)

1. Serial version
2. parallel (jit) version -- TIMED
3. Checked that the serial and parallel versions give the same results

In [None]:
# SERIAL

t1 = time.perf_counter()
M2, yhat2, mus2, zero_idx2 = compute_features(X, lengths, alphas, taus, num_moments)
t2 = time.perf_counter()

print(f"Total Time: {t2-t1}")
print(f"Average Time per Vector: {(t2-t1)/M2.shape[0]}")

In [None]:
sum(zero_idx2)

In [None]:
@jit(nopython=True, parallel=True)
def compute_features_jit(X, len_v, alphas, taus, num_moments):

    # same function as compute_features in data_fcns.py -- exceptions: 1) uses a jit decorator, 
    # 2) variable vector length not needed, 3) no need to return yhat 
    
    num_obs = X.shape[0]
    M = np.zeros((num_obs, num_moments+3))
    #yhat = np.zeros(num_obs)
    zero_idx = np.zeros(num_obs)#, dtype=bool)
    mus = np.zeros(num_obs)
    
    for i in prange(num_obs):
        
        #if i%1000 == 0:
        #    print(i)
        
        #len_v = lengths[i]
        x = X[i,:]  # X[i,0:len_v]
        alpha = alphas[i]
        
        w = np.abs(x)/alpha
        w_1norm = np.linalg.norm(w,1) 

        if w_1norm > 1:

            mu = w_1norm/len_v
            v = w - mu 

            m = np.zeros(num_moments+3) # min, max, moments, length
            m[0] = np.min(v)
            m[1] = np.max(v)
            m[2] = np.linalg.norm(v,1)/len_v  # L1  

            # second moment: sum(x^2)
            v_power = np.square(v)
            m[3] = np.sqrt( np.sum(v_power)/len_v )

            # jth moment: sum(x^i)
            for j in range(3, num_moments+1): 
                v_power = v_power*v  # v^j
                mom = np.sum(v_power)/len_v
                if j % 2 == 1: # odd moment      
                    m[j+1] = np.sign(mom)*np.power(abs(mom), 1/j)
                else: # even moment
                    m[j+1] = np.power(mom, 1/j)

            m[2+num_moments] = np.log(len_v)

            M[i,:] = m
            
            # transform y (tau) 
            #yhat[i] = (taus[i]/alpha) - mu
            
            mus[i] = mu
        
        else:
            print(f'Zero index {i}')
            zero_idx[i] = True

    return M, mus, zero_idx # M, yhat, mus, zero_idx   
                    

In [None]:
t1 = time.perf_counter()
compute_features_jit(X, min_len, alphas, taus, num_moments)
t2 = time.perf_counter()
    
t3 = time.perf_counter()
M, mus, zero_idx = compute_features_jit(X, min_len, alphas, taus, num_moments)
t4 = time.perf_counter()
    
print(f"Total Time + COMPILATION: {t2-t1}")
print(f"Average Time per Vector + COMPILATION: {(t2-t1)/M.shape[0]}")

t_compute_feat = t4-t3
print(f"Total Time: {t4-t3}")
print(f"Average Time per Vector: {(t4-t3)/M.shape[0]}")

In [None]:
sum(zero_idx)

In [None]:
# check if the parallel and serial versions of feature computation are equal - YES

print(np.array_equal(M, M2))
#print(np.array_equal(yhat, yhat2))
print(np.array_equal(mus, mus2))
print(np.array_equal(zero_idx, zero_idx2))

print(np.allclose(M, M2))
#print(np.allclose(yhat, yhat2))
print(np.allclose(mus, mus2))

In [None]:
mus-mus2

In [None]:
M-M2

In [None]:
# remove any observations from dataset that have tau = 0 -- NOT NEEDED FOR TIMED VERSIONS
# if there are zeros in the dataset, use a different dataset

#zero_idx = zero_idx.astype(bool)

if sum(zero_idx) > 0:
    
    M = M[~zero_idx,:]
    #yhat = yhat[~zero_idx]
    mus = mus[~zero_idx]
    alphas = alphas[~zero_idx]
    taus = taus[~zero_idx]
    
    # for compute prox ops
    X = X[~zero_idx,:]
    #lengths = lengths[~zero_idx]

In [None]:
sum(zero_idx) > 0

#### Load NN

In [None]:
np.random.seed(0)
torch.manual_seed(0)

In [None]:
# first layer number of inputs
device = "cpu"
layer1_size = M[0].shape[0]
layer1_size

In [None]:
# set NN based on layer1_size

if layer1_size == (num_moments+3):  # features NN
    
    class NeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear_relu_stack = nn.Sequential(
                nn.Linear(num_moments+3, 25),  
                nn.ReLU(),    
                nn.Linear(25, 10),
                nn.ReLU(),
                nn.Linear(10, 1)
            )

        def forward(self, x):
            tau = self.linear_relu_stack(x) 
            return tau
        
elif (layer1_size == 2000) or (layer1_size == 100000):   # vanilla NN
    
    class NeuralNetwork(nn.Module):
        def __init__(self):
            super().__init__()
            self.linear_relu_stack = nn.Sequential(
                nn.Linear(layer1_size, 200),   
                nn.ReLU(),  
                nn.Linear(200, 100),
                nn.ReLU(),
                nn.Linear(100, 50), 
                nn.ReLU(),
                nn.Linear(50, 1) 
            )

        def forward(self, x):
            tau = self.linear_relu_stack(x)
            return tau
    
else:
    pass

model = NeuralNetwork().to(device)
print(model)

In [None]:
# load model

model = NeuralNetwork()
#model.load_state_dict(torch.load("models/features/gaussian/len_1000_2000/epoch_4421_nn.pt"))
#model.load_state_dict(torch.load("models/features/gaussian/len_1000_100000/epoch_4695_nn.pt"))
#model.load_state_dict(torch.load("models/features/uniform_0_1/len_1000_2000/epoch_4897_nn.pt"))
#model.load_state_dict(torch.load("models/features/uniform_0_1/len_1000_100000/epoch_4928_nn.pt"))
#model.load_state_dict(torch.load("models/features/both/len_1000_2000/epoch_4570_nn.pt"))
model.load_state_dict(torch.load("models/features/both/len_1000_100000/epoch_4792_nn.pt"))

model.eval()

#### NN Inference -- TIMED

In [None]:
with torch.no_grad():
    t1 = time.perf_counter()
    pred_tau_hat = model(torch.Tensor(M))
    t2 = time.perf_counter()

t_nn_inf = t2-t1
print(f"Total Time: {t2-t1}")
print(f"Average Time per Vector: {(t2-t1)/M.shape[0]}")

In [None]:
# time tau and prox op computations

t1 = time.perf_counter()

# transform tau back: tau = alpha(tau_hat + mu)
pred_tau_hat = pred_tau_hat.squeeze().numpy()
pred_tau = np.add(pred_tau_hat, mus)    
pred_tau = np.multiply(alphas, pred_tau)

t2 = time.perf_counter()

t_tau_from_tauhat = t2-t1
print(f"Total Time: {t2-t1}")
print(f"Average Time per Vector: {(t2-t1)/M.shape[0]}")

#### Compute Prox Op with Predicted Taus -- TIMED

1. serial version
2. parallel (jit) version -- TIMED
3. Check that serial and parallel versions give the same result

In [None]:
# SERIAL

def compute_prox_op(X,pred_tau):
    
    PROX = np.copy(X)
    
    for i in range(PROX.shape[0]):
        idx = (np.abs(X[i,:]) > pred_tau[i])
        PROX[i,idx] = np.sign(X[i,idx])*pred_tau[i]

    return PROX

In [None]:
@jit(nopython=True, parallel=True)
def compute_prox_op_jit(X,pred_tau):
    
    PROX = np.zeros(X.shape)
    
    for i in prange(PROX.shape[0]):
        PROX[i,:] = X[i,:].copy()
        idx = (np.abs(X[i,:]) > pred_tau[i])
        PROX[i,idx] = np.sign(X[i,idx])*pred_tau[i]

    return PROX

In [None]:
t5 = time.perf_counter()
P2 = compute_prox_op(X,pred_tau)
t6 = time.perf_counter()

t1 = time.perf_counter()
compute_prox_op_jit(X,pred_tau)
t2 = time.perf_counter()

t3 = time.perf_counter()
P = compute_prox_op_jit(X,pred_tau)
t4 = time.perf_counter()

t_prox_op = t4-t3

print(f"SERIAL Total Time: {t6-t5}")
print(f"SERIAL Average Time per Vector: {(t6-t5)/M.shape[0]}")

print(f"Total Time + COMPILATION: {t2-t1}")
print(f"Average Time per Vector + COMPILATION: {(t2-t1)/M.shape[0]}")

print(f"Total Time: {t4-t3}")
print(f"Average Time per Vector: {(t4-t3)/M.shape[0]}")

In [None]:
# check if the parallel and serial versions of feature computation are equal - YES

print(np.array_equal(P, P2))
print(np.allclose(P, P2))

#### Calculate total prox op time

1. compute tau from tau_hat
2. compute prox op with tau

In [None]:
t_temp = t_tau_from_tauhat + t_prox_op

print(f"Prox Op Time: {t_temp}")
print(f"Average Time per Vector: {t_temp/M.shape[0]}")

#### Calculate total time

In [None]:
t_total = t_compute_feat + t_nn_inf + t_tau_from_tauhat + t_prox_op

print(f"Total Time: {t_total}")
print(f"Average Time per Vector: {t_total/M.shape[0]}")

In [None]:
t_compute_feat/M.shape[0]

In [None]:
t_nn_inf/M.shape[0]