In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [2]:
import torch
from tensor.node import TensorNode

# At a low level it is easy to define nodes and connect them.

a_tensor = torch.randn((1, 4, 4, 10))

a = TensorNode(a_tensor, dim_labels=('r0', 'u1', 'p1', 'r1'), name='a')
b = TensorNode((10, 1, 4, 1), dim_labels=('r1', 'u2', 'p2', 'r2'), name='b')
print("a looks like this:")
print(a)
print("b looks like this:")
print(b)
print()

# We connect b with a
b.connect(a, 'r1')

# And they can now be contracted
c = b.contract_with(a)
print("The contraction of a and b looks like this:")
print(c)
print()

# If some dimensions are unitary these can be squeezed
print("Squeezing a and b gives us:")
print(a.squeeze())
print(b.squeeze())
print()

# Then if we contract again we just get the relevant non-unitary dimensions
d = b.squeeze().contract_with(a.squeeze())
print("The contraction of a and b after squeezing looks like this:")
print(d)

a looks like this:
TensorNode(name=a, shape=torch.Size([1, 4, 4, 10]), labels=['r0', 'u1', 'p1', 'r1'])
b looks like this:
TensorNode(name=b, shape=torch.Size([10, 1, 4, 1]), labels=['r1', 'u2', 'p2', 'r2'])

The contraction of a and b looks like this:
TensorNode(name=<b-r1-a>, shape=torch.Size([4, 1, 4, 4, 1, 1]), labels=['p1', 'r2', 'p2', 'u1', 'r0', 'u2'])

Squeezing a and b gives us:
TensorNode(name=a, shape=torch.Size([4, 4, 10]), labels=['u1', 'p1', 'r1'])
TensorNode(name=b, shape=torch.Size([10, 4]), labels=['r1', 'p2'])

The contraction of a and b after squeezing looks like this:
TensorNode(name=<b-r1-a>, shape=torch.Size([4, 4, 4]), labels=['p2', 'u1', 'p1'])


In [3]:
from tensor.network import TensorNetwork

# You can make a simple tensor train like this:
p = 10 # Number of features (physical dimension)
r = 6 # Bond dimension
# In this case 1 is a placeholder for the sample dimension, which can be any size
x1 = TensorNode((1, p), dim_labels=('s', 'p'), name='x1')
x2 = TensorNode((1, p), dim_labels=('s', 'p'), name='x2')
x3 = TensorNode((1, p), dim_labels=('s', 'p'), name='x3')

A1 = TensorNode((1, p, r), dim_labels=('o1', 'p', 'r1'), name='A1', r='r1') # The 'o1' label is the output label, which is not connected to anything
A2 = TensorNode((r, p, r), dim_labels=('r1', 'p', 'r2'), name='A2', l='r1', r='r2') # Left and right labels make computation more efficient
A3 = TensorNode((r, p), dim_labels=('r2', 'p'), name='A3', l='r2')

# Connect the tensors
A1.connect(A2, 'r1')
A2.connect(A3, 'r2')

x1.connect(A1, 'p')
x2.connect(A2, 'p')
x3.connect(A3, 'p')

# To then turn this into a TensorNetwork we just use the class for it
TN = TensorNetwork(input_nodes=[x1,x2,x3], main_nodes=[A1,A2,A3], sample_dim='s')

# You can then contract the full network on any tensor with shape (N, p) where N does not necessarily need to be the same as s
N = 32
X = torch.randn((N, p))
result = TN.forward(X)
print("The result of contracting the tensor network with input X is:")
print(result)
print()
print("With values:")
print(result.tensor)

The result of contracting the tensor network with input X is:
TensorNode(name=<<A1-p-x1>-r1-<<<A3-p-x3>-r2-A2>-p-x2>>, shape=torch.Size([1, 32]), labels=['o1', 's'])

With values:
tensor([[ 0.2584,  0.0174,  0.2231, -0.0284,  0.1558, -0.0350, -0.0469,  0.1758,
          0.1439,  0.1962,  0.0897,  0.1150,  0.5099, -0.0462,  0.0498, -0.0448,
          0.0603,  0.0444,  0.3883,  0.1197, -0.0068,  0.3930,  0.1274,  0.1191,
          0.0280, -0.3200,  0.0582, -0.0361, -0.1270, -0.0379, -0.0327, -0.5584]])


In [None]:
from tensor.bregman import SquareBregFunction

# By default the main nodes are trained, but you can also specify which nodes to train
TN = TensorNetwork(input_nodes=[x1, x2, x3], main_nodes=[A1, A2, A3], train_nodes=[A1, A2, A3], output_labels=('s', 'o1'), sample_dim='s')

# To train we need to use a loss function, here we will use the squared loss
loss_fn = SquareBregFunction()

# Define X and y
N = 1024 # Number of samples

X = torch.randn((N, p-1))
y = (X**3).sum(-1, keepdims=True)  # Target values

X = torch.cat((X, torch.ones((N, 1))), dim=-1)  # Add a bias term

# We can then train the network using the loss function
TN.accumulating_swipe(X, y, loss_fn, batch_size=128, num_swipes=3, method='exact', verbose=2)

  0%|          | 0/3 [00:00<?, ?it/s]

Left to right pass (A1):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 0, Left loss (A1): 143.1507167816162  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 0, Left loss (A2): 57.84726285934448  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A3):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 0, Left loss (A3): 8.578864991664886  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 1, Right loss (A2): 8.572382152080536  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A1):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 1, Right loss (A1): 8.565950512886047  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 2, Left loss (A2): 8.548484206199646  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A3):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 2, Left loss (A3): 8.531060516834259  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 3, Right loss (A2): 8.52535492181778  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A1):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 3, Right loss (A1): 8.519686102867126  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 4, Left loss (A2): 8.503355205059052  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A3):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 4, Left loss (A3): 8.487060964107513  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A2):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 5, Right loss (A2): 8.481568813323975  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A1):   0%|          | 0/8 [00:00<?, ?it/s]

NS: 5, Right loss (A1): 8.476108610630035  (eps: 1e-12)  (eps_r: 0.0)


True

In [5]:
# To speed up the design of the network, we define many different layers
from tensor.layers import TensorTrainLayer

# This is the default Tensor Train Layer, which just specifies the number of blocks and the bond dimension
layer = TensorTrainLayer(num_carriages=3, bond_dim=12, input_features=p, output_shape=1)

# We can then fit the network of this layer
layer.tensor_network.accumulating_swipe(X, y, loss_fn, batch_size=-1, num_swipes=2, method='exact', verbose=2) # eps defines the ridge regularization parameter

  0%|          | 0/2 [00:00<?, ?it/s]

Left to right pass (A1):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A1): 142.64816284179688  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A2): 37.362281799316406  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A3):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A3): 1.3799099178868346e-06  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A2): 1.3467490589391673e-07  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A1):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A1): -2.3721628394923755e-07  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 2, Left loss (A2): -6.023663559062697e-07  (eps: 1e-12)  (eps_r: 0.0)


Left to right pass (A3):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 2, Left loss (A3): -4.996932148060296e-07  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 3, Right loss (A2): 1.3276509491788602e-07  (eps: 1e-12)  (eps_r: 0.0)


Right to left pass (A1):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 3, Right loss (A1): -4.5693741412833333e-07  (eps: 1e-12)  (eps_r: 0.0)


True

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from tensor.bregman import XEAutogradBregman
from torch.nn import functional as F
# Load the iris dataset
iris = load_iris()
X_iris = iris.data
y_iris = iris.target

# Convert to PyTorch tensors
X_iris = torch.tensor(X_iris, dtype=torch.float32, device='cuda')
y_iris = torch.tensor(y_iris, dtype=torch.long, device='cuda')

# One-hot encode labels
y_iris = F.one_hot(y_iris, num_classes=3).to(dtype=torch.float64)

# Add a bias term
X_iris = torch.cat((X_iris, torch.ones((X_iris.shape[0], 1), device='cuda')), dim=-1)

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

# Create a Tensor Train Layer for the iris dataset
layer_iris = TensorTrainLayer(num_carriages=8, bond_dim=48, input_features=X_iris.shape[1], output_shape=(y_iris.shape[1]-1,)).cuda()

# Here we use the cross-entropy loss function
loss_fn_iris = XEAutogradBregman()

# Define convergence criterion with test accuracy calculation
def convergence_criterion():
    y_pred_test = layer_iris(X_test)
    y_pred_test = torch.cat((y_pred_test, torch.zeros_like(y_pred_test[:, :1])), dim=1)
    accuracy_test = balanced_accuracy_score(y_test.argmax(dim=-1).cpu().numpy(), y_pred_test.argmax(dim=-1).cpu().numpy())
    print('Test Acc:', accuracy_test)
    return False

# Train the Tensor Train Layer on the iris dataset
layer_iris.tensor_network.accumulating_swipe(X_train, y_train, loss_fn_iris, batch_size=-1, num_swipes=1, method='ridge_cholesky', eps=1.0, verbose=2, convergence_criterion=convergence_criterion)

  0%|          | 0/1 [00:00<?, ?it/s]

Left to right pass (A1):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A1): 1.2262459993362427  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.6666666666666666


Left to right pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A2): 0.9178369641304016  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9393939393939394


Left to right pass (A3):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A3): 0.5418416261672974  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9393939393939394


Left to right pass (A4):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A4): 0.3250484764575958  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9696969696969697


Left to right pass (A5):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A5): 0.19925251603126526  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 1.0


Left to right pass (A6):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A6): 0.13349130749702454  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 1.0


Left to right pass (A7):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A7): 0.10782388597726822  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9326599326599326


Left to right pass (A8):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 0, Left loss (A8): 0.18231026828289032  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9629629629629629


Right to left pass (A7):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A7): 0.8034853935241699  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9629629629629629


Right to left pass (A6):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A6): 0.5904362797737122  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.872053872053872


Right to left pass (A5):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A5): 0.3605460524559021  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.872053872053872


Right to left pass (A4):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A4): 0.221048966050148  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9090909090909092


Right to left pass (A3):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A3): 0.143207386136055  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 1.0


Right to left pass (A2):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A2): 0.10436733067035675  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 1.0


Right to left pass (A1):   0%|          | 0/1 [00:00<?, ?it/s]

NS: 1, Right loss (A1): 0.11280881613492966  (eps: 1.0)  (eps_r: 0.0)
Test Acc: 0.9090909090909092


True