In [1]:
import sys, os
import utils

sys.path.insert(0, os.path.abspath('../DeepSplines'))
from deepsplines.ds_modules.deepBspline import DeepBSpline
from models import LinearBSpline


opt_params = {
        'size': 3,
        'range_': 1,
        'init': 'relu',
        'save_memory': False
}

bspline = DeepBSpline('fc', 8, **opt_params) # create a single BSpline Layer

locations = bspline.grid_tensor.detach()
coefficients = bspline.coefficients_vect.view(bspline.num_activations, bspline.size).detach()

# print(locations)
# print(coefficients)

# utils.plot_bspline(locs=locations[0].numpy(), coeffs=coefficients[0].numpy)


In [2]:
import linspline
import matplotlib.pyplot as plt
import numpy as np
import torch

import importlib

importlib.reload(linspline)

linspline = linspline.LinearSpline(locations[0], coefficients[0])

(locs, coeffs) = linspline.get_locs_coeffs()

x_plot = x_plot = torch.from_numpy(np.linspace(-1, 1, 400))
y_plot = linspline.forward(x_plot)


plt.figure(figsize=(8, 6))
plt.plot(x_plot, y_plot, label='Linear Spline', color='blue')
plt.scatter(locs, coeffs, color='red', label='Knot Points')
plt.title('Linear Spline Interpolation using NumPy.interp')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()


AttributeError: module 'linspline' has no attribute 'LinearSpline'

Now copy model to model

In [3]:
model = LinearBSpline([8], 3, 1, 'relu') # create a BSpline based model

for layer in range(len(model.get_deepspline_activations())):
    locs = model.get_deepspline_activations()[layer]['locations']
    coeffs = model.get_deepspline_activations()[layer]['coefficients']

In [None]:
# ! test timing on single bspline vs. linear spline vs. relu 

In [4]:
import linspline
import importlib

importlib.reload(linspline)

lin_model = linspline.LSplineFromBSpline(model.get_layers())

print("\n----\n")
print(lin_model.get_layers())


----

Sequential(
  (0): Linear(in_features=8, out_features=8, bias=True)
  (1): LinearSplineLayer(locs=Tensor(shape=(8, 3)), coeffs=Tensor(shape=(8, 3)), mode='fc')
  (2): Linear(in_features=8, out_features=1, bias=True)
)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

from datetime import datetime
import time

from omegaconf import OmegaConf, ListConfig

from models import LinearReLU, LinearBSpline

from torch.utils.data import TensorDataset, DataLoader

import string, random, os

class Config:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def to_dict(self):
        def convert(value):
            if isinstance(value, ListConfig):
                return OmegaConf.to_object(value)
            return value

        return {key: convert(value) for key, value in self.__dict__.items()}

def training_run(mparams, tparams, X, y):

    # train-test split of the dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.85, shuffle=True)

    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

    # using a dataloader to randomize batching
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=tparams.batch_size, shuffle=True)
    
    loss_fn = nn.MSELoss()  # mean square error
    cr_fwd_lat = -1

    model_save_code = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
    
    for arch in ["relu", "bspline"]:
        
        # set specifications based on the architecture
        if(arch == "relu"):
            if(tparams.relu_epochs == 0):
                continue
            model = LinearReLU(mparams.layers)
            epochs = tparams.relu_epochs

            optimizer = optim.Adam(model.parameters(), lr=tparams.lr_wb)
            
            if(tparams.lrs == "steplr"):
                scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=tparams.lrs_stepsize, gamma=tparams.lrs_gamma)

        elif(arch == "bspline" or arch == "both"):
            if(arch == "bspline"):
                if(tparams.spline_epochs == 0):
                    continue
                if(tparams.relu_epochs > 0): # if we pretrained on ReLU, load those weights
                    model = LinearBSpline(mparams.layers, mparams.cpoints, mparams.range_)
                    model.load_state_dict(torch.load(f"./temp_models/{model_save_code}.pt", weights_only=True), strict=False)
                else:
                    model = LinearBSpline(mparams.layers, mparams.cpoints, mparams.range_)
                epochs = tparams.spline_epochs

            if(arch == "both"):
                if(tparams.both_epochs == 0):
                    continue
                elif(tparams.spline_epochs == 0):
                    if(tparams.relu_epochs > 0): # if we pretrained on ReLU, load those weights
                        model = LinearBSpline(mparams.layers, mparams.cpoints, mparams.range_)
                        model.load_state_dict(torch.load(f"./temp_models/{model_save_code}.pt", weights_only=True), strict=False)
                    else:
                        model = LinearBSpline(mparams.layers, mparams.cpoints, mparams.range_)
                # else we just continue training on the same model
                epochs = tparams.both_epochs

            
            optimizer = optim.Adam(model.parameters_no_deepspline(), lr=tparams.lr_wb)
            aux_optimizer = optim.Adam(model.parameters_deepspline(), lr=tparams.lr_bs)

            if(tparams.lrs == "steplr"):
                # resetting scheduler
                scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=tparams.lrs_stepsize, gamma=tparams.lrs_gamma)
                aux_scheduler = torch.optim.lr_scheduler.StepLR(aux_optimizer, step_size=tparams.lrs_stepsize, gamma=tparams.lrs_gamma)
            
            lmbda = 1e-4 # regularization weight
            lipschitz = False # lipschitz control
        
        # training loop
        length = epochs + (tparams.comp_relu if arch=="relu" else 0)

        for epoch in range(length):
            
            # train the model
            model.train()
            epoch_loss = 0
            epoch_start = datetime.now()

            # train over batches
            for X_batch, y_batch in train_loader:

                # forward pass + get loss
                y_pred = model(X_batch)
                loss = loss_fn(y_pred, y_batch)
                if(arch == "relu" or arch == "both"):
                    optimizer.zero_grad()
                if(arch == "bspline" or arch == "both"):
                    aux_optimizer.zero_grad()
                    if lipschitz is True:
                        loss = loss + lmbda * model.BV2()
                    else:
                        loss = loss + lmbda * model.TV2()
                epoch_loss += float(loss) * len(X_batch)

                # compute gradient and step on the optimizer
                loss.backward()
                if(arch == "relu" or arch == "both"):
                    optimizer.step()
                if(arch == "bspline" or arch == "both"):
                    aux_optimizer.step()                
            
            # step the LR scheduler
            if(tparams.lrs != "none" and tparams.lrs != "None"):
                if((arch == "bspline" or arch == "both") and scheduler.get_last_lr()[0] * tparams.lrs_gamma > 0.0001):
                    scheduler.step()
                if((arch=="bspline" or arch=="both") and aux_scheduler.get_last_lr()[0] * tparams.lrs_gamma > 0.00001):
                    aux_scheduler.step()

            # validation loss (on the whole val dataset)
            model.eval()
            y_pred = model(X_test) # pass in all the validation data
            loss = float(loss_fn(y_pred, y_test)) #! could move this to after the next chunk to track times more accurately, but would be annoying to refactor for doing val loss after that

            if(epoch % 5 == 0):
                print(f"Val loss on epoch {epoch}: {loss}")

            if(tparams.comp_relu > 0 and arch=="relu"): #
                if(epoch == length - tparams.comp_relu - 1): # if we're at the switch point, save the model
                    torch.save(model.state_dict(), f"./temp_models/{model_save_code}.pt")
        
        if(tparams.comp_relu == 0): # if not comp_relu, save at the end of the round of training
            torch.save(model.state_dict(), f"./temp_models/{model_save_code}.pt")
        
        elif(arch == "relu"): # if it is comp_relu and we're on relu 
            start_time = time.perf_counter()
            _ = model(X_test) # model output is irrelevant
            end_time = time.perf_counter()
            cr_fwd_lat = (end_time - start_time) / len(X_test) * 1000 * 1000 # per sample latency: seconds -> nanoseconds
            cr_fwd_lat = round(cr_fwd_lat, 4)

    # compute forward latency of end model
    start_time = time.perf_counter()
    _ = model(X_test) # model output is irrelevant
    end_time = time.perf_counter()
    fwd_lat = (end_time - start_time) / len(X_test) * 1000 * 1000 # per sample latency: seconds -> nanoseconds
    fwd_lat = round(fwd_lat, 4)

    final_locs = None
    final_coeffs = None
    if(tparams.spline_epochs > 0):
        final_locs = model.get_deepspline_activations()[0]['locations']
        final_coeffs = model.get_deepspline_activations()[0]['coefficients']

    os.remove(f"./temp_models/{model_save_code}.pt")

    return model, fwd_lat, final_locs, final_coeffs, cr_fwd_lat



# By default, PyTorch attempts to use all available CPU cores for intra-op parallelism. Set threads = cpu cores
torch.set_num_threads(1)

# Load the data
housing = fetch_california_housing()
X, y = housing.data, housing.target

mparams = Config(
    layers = [8],
    cpoints = 3,
    range_ = 1,
)
        
tparams = Config(
    relu_epochs = 50,
    spline_epochs = 10,
    both_epochs = 0,
    comp_relu = 0,

    batch_size = 10,
    lr_wb = 0.001,
    lr_bs = 0.0001,
    lrs = 'none',
    lrs_gamma = .9,
    lrs_stepsize = 1,
)

model, fwd_lat, final_locs, final_coeffs, cr_fwd_lat = training_run(mparams, tparams, X, y)

Val loss on epoch 0: 0.9271897673606873
Val loss on epoch 5: 0.9456093311309814
Val loss on epoch 10: 0.6865884065628052
Val loss on epoch 15: 1.0029758214950562
Val loss on epoch 20: 0.6661977767944336
Val loss on epoch 25: 0.6564297080039978
Val loss on epoch 30: 0.859581470489502
Val loss on epoch 35: 0.5596232414245605
Val loss on epoch 40: 0.5278910398483276
Val loss on epoch 45: 0.5143812894821167
Val loss on epoch 0: 0.5405814051628113
Val loss on epoch 5: 0.506903886795044


In [13]:
import linspline
import importlib

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.85, shuffle=True)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

loss_fn = nn.MSELoss()

model.eval()

start_time = time.perf_counter()
loss = float(loss_fn(model(X_test), y_test))

end_time = time.perf_counter()
fwd_lat = (end_time - start_time) / len(X_test) * 1000 * 1000 #! NOT per sample latency: seconds -> nanoseconds
fwd_lat = round(fwd_lat, 4)

print(f"BSpline loss: {loss}, fwd lat: {fwd_lat}")

# ~ ---------

lin_model = linspline.LSplineFromBSpline(model.get_layers())

start_time = time.perf_counter()
loss = float(loss_fn(lin_model(X_test), y_test))

end_time = time.perf_counter()
fwd_lat = (end_time - start_time) / len(X_test) * 1000 * 1000 #! NOT per sample latency: seconds -> nanoseconds
fwd_lat = round(fwd_lat, 4)
print(f"LSpline loss: {loss}, fwd lat: {fwd_lat}")

# may need to continue training on the linear splines...

BSpline loss: 0.49540719389915466, fwd lat: 0.3711
LSpline loss: 4.261738300323486, fwd lat: 0.4131


In [14]:
sys.path.insert(0, os.path.abspath('../DeepSplines'))
from deepsplines.ds_modules.deepBspline import DeepBSpline
import deepsplines
import utils

layer_locs = []
layer_coeffs = []
for layer in model.get_layers():
    if(type(layer) is deepsplines.ds_modules.deepBspline.DeepBSpline):
        layer_locs.append(layer.grid_tensor.detach())
        layer_coeffs.append(layer.coefficients_vect.view(layer.num_activations, layer.size).detach())

In [15]:
import linspline

layer_locs2 = []
layer_coeffs2 = []
for layer in lin_model.get_layers():
    if(type(layer) is not torch.nn.modules.linear.Linear):
        print(type(layer))
        (l2, c2) = layer.get_locs_coeffs()
        
        layer_locs2.append(l2)
        layer_coeffs2.append(c2)


<class 'linspline.LinearSplineLayer'>


## Here

In [1]:
import time

def run_layer(layer, input):
    print(layer)
    start_time = time.perf_counter()
    _ = layer(input)
    print((time.perf_counter() - start_time) * 1000 * 1000)
    print(_)

In [4]:
from models import LinearReLU, LinearBSpline
# importlib.reload(linspline)
import linspline
import torch


input = torch.tensor([[-5, -1, 0, .5, 1, 5, 8, 10]]) # 1 input per activation function

reluModel = LinearReLU([8])
layer = reluModel.layers[1]
run_layer(layer, input)

print("\n ---- \n")

bsplineModel = LinearBSpline([8], 3, 1, "relu")
layer = bsplineModel.get_layers()[1]
run_layer(layer, input)

print("\n ---- \n")

linModel = linspline.LSplineFromBSpline(bsplineModel.get_layers())
layer = linModel.get_layers()[1]
run_layer(layer, input)

ReLU()
486.4820512011647
tensor([[ 0.0000,  0.0000,  0.0000,  0.5000,  1.0000,  5.0000,  8.0000, 10.0000]])

 ---- 

DeepBSpline(mode=fc, num_activations=8, init=relu, size=3, grid=1.0.)
337.2259670868516
tensor([[ 0.0000,  0.0000,  0.0000,  0.5000,  1.0000,  5.0000,  8.0000, 10.0000]],
       grad_fn=<ViewBackward0>)

 ---- 

LinearSplineLayer(8 locs, 8 coeffs, mode='fc')
133.8960137218237
tensor([[ 0.0000,  0.0000,  0.0000,  0.5000,  1.0000,  5.0000,  8.0000, 10.0000]])


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

from models import LinearBSpline

from torch.utils.data import TensorDataset, DataLoader

# By default, PyTorch attempts to use all available CPU cores for intra-op parallelism. Set threads = cpu cores
torch.set_num_threads(1)

# Load the data
housing = fetch_california_housing()
X, y = housing.data, housing.target

# train-test split of the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.85, shuffle=True)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

# using a dataloader to randomize batching
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)

loss_fn = nn.MSELoss()  # mean square error

lmbda = 1e-4 # regularization weight

bsm = LinearBSpline([2], 3, 1, "relu")
optimizer = optim.Adam(bsm.parameters_no_deepspline(), lr=0.001)
aux_optimizer = optim.Adam(bsm.parameters_deepspline(), lr=0.0001)

print("Init complete")

bsm.train()
for epoch in range(20):
    rloss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        aux_optimizer.zero_grad()

        y_pred = bsm(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss2 = loss + lmbda * bsm.TV2()
        loss2.backward()
        optimizer.step()
        aux_optimizer.step()
        
        rloss += loss
    if(epoch % 5 == 0):
        print(f"Epoch {epoch}: {rloss}")


Init complete
Epoch 0: 775293.4375
Epoch 5: 2252.551025390625
Epoch 10: 1283.8199462890625
Epoch 15: 1187.6798095703125


In [12]:
import time, importlib, utils
importlib.reload(linspline)
importlib.reload(utils)

def run_layer(layer, input):
    # print(layer)
    start_time = time.perf_counter()
    _ = layer(input)
    print("time: ", (time.perf_counter() - start_time) * 1000 * 1000)
    print(_)

# 3 inputs: [spline1, spline2]
input = torch.tensor([[-2, -2], [0, 0], [2, 2]])

print("Trained BSpline")
layer = bsm.get_layers()[1]

layer_locs = layer.grid_tensor.detach()[0]
layer_coeffs = layer.coefficients_vect.view(layer.num_activations, layer.size).detach()[0]

# print(layer_locs)
# print(layer_coeffs)

# utils.plot_bspline(layer_locs, layer_coeffs, hide_bases=True)

print(layer)

run_layer(layer, input)

Trained BSpline
DeepBSpline(mode=fc, num_activations=2, init=relu, size=3, grid=1.0.)
time:  439.2910050228238
tensor([[-0.2737, -0.0244],
        [ 0.0382,  0.0082],
        [ 1.8457,  0.0412]], grad_fn=<ViewBackward0>)


In [18]:
importlib.reload(linspline)

print("Transferred LSpline")
linModel = linspline.LSplineFromBSpline(bsm.get_layers())

lin_layer = linModel.get_layers()[1]
(linlocs, lincoeffs) = lin_layer.get_locs_coeffs()

run_layer(lin_layer, input)
# utils.plot_bspline(linlocs[0], lincoeffs[0], hide_bases=True)


Transferred LSpline
time:  293.79199258983135
tensor([[-0.2737, -0.0244],
        [ 0.0382,  0.0082],
        [ 1.8457,  0.0412]])


In [19]:
lin_optimizer = optim.Adam(linModel.parameters(), lr=0.001)

print("Init complete")

linModel.train()
for epoch in range(10):
    rloss = 0
    for X_batch, y_batch in train_loader:
        lin_optimizer.zero_grad()

        y_pred = linModel(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        rloss += loss
    print(f"Epoch {epoch}: {rloss}")

layer = linModel.get_layers()[1]
run_layer(layer, input)

Init complete
Epoch 0: 1173.4237060546875
Epoch 1: 1279.8961181640625
Epoch 2: 1145.614990234375
Epoch 3: 1148.69677734375
Epoch 4: 1183.909423828125
Epoch 5: 1157.4105224609375
Epoch 6: 1178.8897705078125
Epoch 7: 1164.2030029296875
Epoch 8: 1176.5655517578125
Epoch 9: 1152.094482421875
time:  320.02292573451996
tensor([[-0.2737, -0.0244],
        [ 0.0382,  0.0082],
        [ 1.8457,  0.0412]])


In [None]:
from models import LinearRelu

relu = LinearRelu([8])
optimizer = optim.Adam(bsm.parameters_no_deepspline(), lr=0.001)

print("Init complete")

relu.train()
for epoch in range(100):
    rloss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()

        y_pred = bsm(X_batch)
        loss = loss_fn(y_pred, y_batch)
        loss2 = loss + lmbda * bsm.TV2()
        loss2.backward()
        optimizer.step()
        
        rloss += loss
    if(epoch % 5 == 0):
        print(f"Epoch {epoch}: {rloss}")

In [None]:
import matplotlib.pyplot as plt

x = torch.linspace(-1, 1, 1000, dtype=torch.float)

# Compute y values based on the new function
y1 = layer(x)
y2 = lin_layer(x)

# # Create the plot
plt.figure(figsize=(8, 6))
plt.plot(x, y1, label='BSpline', color='red', alpha = .5)
plt.plot(x, y2, label='LSpline', color='blue', alpha = .5)
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.legend()
plt.show()