In [1]:
import pandas as pd
import numpy as np
import torch
import math

In [2]:
np.random.seed(1)
exponents = np.arange(1,8,.5)
exponents

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. ,
       7.5])

In [3]:
num_data_points = []
data_list = []
for i in range(len(exponents)):
    num_data_points.append(math.ceil(10**exponents[i]))
    if num_data_points[i] % 2 != 0:
        num_data_points[i] = num_data_points[i] - 1
    tensor = torch.from_numpy(np.random.standard_normal(num_data_points[i]))
    data_list.append(tensor.type(torch.DoubleTensor))
print(num_data_points)

[10, 32, 100, 316, 1000, 3162, 10000, 31622, 100000, 316228, 1000000, 3162278, 10000000, 31622776]


In [4]:
label_list = []
for j in num_data_points:
    temp = torch.from_numpy(np.repeat([0,1], j/2))
    print(len(temp))
    label_list.append(temp.type(torch.DoubleTensor))

10
32
100
316
1000
3162
10000
31622
100000
316228
1000000
3162278
10000000
31622776


In [5]:
# time_exp = np.arange(-5,0.5,0.5)
# time_exp
# time_limits = []
# for k in time_exp:
#     time_limits.append(10**k)
time_limit = 5

In [6]:
from square_loss import square_loss
from squared_hinge_loss import squared_hinge_loss
from naive_square_loss import naive_square_loss
from naive_square_hinge_loss import naive_square_hinge_loss
from torch.nn import BCEWithLogitsLoss as logisitic_loss

In [7]:
log_loss = logisitic_loss()
loss_dict ={
    "naive_square": naive_square_loss,
    "naive_square_hinge": naive_square_hinge_loss,
    "square_loss":square_loss,
    "squared_hinge_loss": squared_hinge_loss,
    "logistic_loss": log_loss
}

In [8]:
import time

In [10]:
times_dict_list = []
for loss_name, loss_fun in loss_dict.items():
    print(loss_name)
    for x in range(len(data_list)):
        time_vec = np.zeros(1)
        for iter in range(1):
            start_time = time.time()
            if loss_name not in ["logistic_loss"]:
                loss = loss_fun(data_list[x], label_list[x], 1)
            else:
                loss = loss_fun(data_list[x], label_list[x])
            time_vec[iter] = time.time() - start_time
        if (np.median(time_vec) < time_limit):
            max_dict = {
                "loss_name": loss_name,
                "lower_quantile": np.quantile(time_vec, 0.25),
                "upper_quantile": np.quantile(time_vec, 0.75),
                "median_time": np.median(time_vec),
                "data_size": num_data_points[x]
            }
            times_dict_list.append(pd.DataFrame(max_dict,index=[0]))
        else:
            break


naive_square
naive_square_hinge
square_loss
squared_hinge_loss
logistic_loss


In [11]:
full_dt = pd.concat(times_dict_list)

In [12]:
full_dt

Unnamed: 0,loss_name,lower_quantile,upper_quantile,median_time,data_size
0,naive_square,0.003816,0.003816,0.003816,10
0,naive_square,0.014084,0.014084,0.014084,32
0,naive_square,0.043099,0.043099,0.043099,100
0,naive_square,0.340692,0.340692,0.340692,316
0,naive_square,3.30123,3.30123,3.30123,1000
0,naive_square_hinge,0.000748,0.000748,0.000748,10
0,naive_square_hinge,0.004477,0.004477,0.004477,32
0,naive_square_hinge,0.042531,0.042531,0.042531,100
0,naive_square_hinge,0.418707,0.418707,0.418707,316
0,naive_square_hinge,4.122578,4.122578,4.122578,1000


In [13]:
full_dt.to_csv("timing_data.csv")