## Create a StaticEntitySet from a Tensor + Labels

A static entity set is a container for a table of categorical data with many to many relationships between categories. Categories are unordered on construction and values are assigned system ids. Any pair of categories may be extracted and ordered to generate an incidence matrix and construct a hypergraph. Construction is done once when needed and stored in a state dictionary for reuse.

In [29]:
from hypernetx import *
import matplotlib.pyplot as plt
from collections import OrderedDict,defaultdict,namedtuple
from scipy.sparse import coo_matrix
import pandas as pd
import numpy as np
import itertools as it
import random as rand
from untitled_StaticEntity import *

## Create an example tensor given in coordinate format + label dictionary

In [2]:
# alphabet = list('abcdefghijklmnopqrstuvwxyz')
# labels = dict()
# for idx in range(3):
#     n = rand.choice(range(3,8))
#     labels[idx] = ['0'] + rand.sample(alphabet,n)

In [3]:
## modes of the tensor are 0,1,2 - each position corresponds to a label
labels = {0: ['0', 'n', 'z', 'v', 'a'],
 1: ['0', 'z', 'p', 'y', 'l', 'i'],
 2: ['0', 'm', 'w', 'z']}

In [4]:
## coordinates of the nonzero entries in the tensor
dims = (len(labels[0]),len(labels[1]),len(labels[2]))
# tot = np.product(np.array(dims))
# coords = it.product(range(dims[0]),range(dims[1]),range(dims[2]))
# data = np.array(rand.sample(list(coords),int(.40*tot)))

In [5]:
data = np.array([[1, 0, 0],[1, 0, 3],[1, 2, 2],[3, 3, 2],[0, 3, 3],[0, 1, 0],[3, 2, 3],[0, 4, 0],[4, 5, 2],[1, 3, 2],[2, 1, 1],[2, 4, 1],[2, 1, 2],[1, 3, 3],[3, 4, 2],[1, 0, 2],[1, 4, 2],[3, 3, 3],[1, 1, 0],[1, 5, 1],[1, 4, 0],[2, 1, 0],[4, 2, 3],[0, 4, 1],[4, 0, 1],[1, 4, 3],[0, 0, 0],[4, 4, 1],[1, 1, 3],[1, 0, 1],[4, 3, 3],[1, 1, 2],[2, 3, 2],[0, 2, 0],[3, 5, 0],[0, 3, 0],[4, 3, 0],[2, 0, 0],[4, 4, 0],[0, 3, 2],[2, 4, 2],[3, 3, 0],[0, 2, 3],[0, 1, 1],[3, 1, 1],[2, 0, 3],[3, 4, 0],[2, 0, 1]])

In [6]:
print(f'This tensor has {data.shape[0]} entries out of {np.product(np.array(dims))} possible')

This tensor has 48 entries out of 120 possible


In [7]:
data

array([[1, 0, 0],
       [1, 0, 3],
       [1, 2, 2],
       [3, 3, 2],
       [0, 3, 3],
       [0, 1, 0],
       [3, 2, 3],
       [0, 4, 0],
       [4, 5, 2],
       [1, 3, 2],
       [2, 1, 1],
       [2, 4, 1],
       [2, 1, 2],
       [1, 3, 3],
       [3, 4, 2],
       [1, 0, 2],
       [1, 4, 2],
       [3, 3, 3],
       [1, 1, 0],
       [1, 5, 1],
       [1, 4, 0],
       [2, 1, 0],
       [4, 2, 3],
       [0, 4, 1],
       [4, 0, 1],
       [1, 4, 3],
       [0, 0, 0],
       [4, 4, 1],
       [1, 1, 3],
       [1, 0, 1],
       [4, 3, 3],
       [1, 1, 2],
       [2, 3, 2],
       [0, 2, 0],
       [3, 5, 0],
       [0, 3, 0],
       [4, 3, 0],
       [2, 0, 0],
       [4, 4, 0],
       [0, 3, 2],
       [2, 4, 2],
       [3, 3, 0],
       [0, 2, 3],
       [0, 1, 1],
       [3, 1, 1],
       [2, 0, 3],
       [3, 4, 0],
       [2, 0, 1]])

In [8]:
ctr = [HNXCount() for c in range(len(labels))]
ldict = OrderedDict()
rdict = OrderedDict()
for c in range(len(labels)):
    ldict[c] = defaultdict(ctr[c])
    rdict[c] = dict()
    ldict[c]['0']
    for k in labels[c]:
        ldict[c][k]
    ldict[c] = dict(ldict[c])
    for k,v in ldict[c].items():
        rdict[c][v] = k

In [9]:
## labeldata replaces numbers in data with their corresponding labels
labeldata = np.array(data,dtype=str)
m,n = data.shape
for rdx in range(m):
    for cdx in range(n):
        labeldata[rdx,cdx] = rdict[cdx][data[rdx,cdx]]
print(f'{labeldata[:,:][:5]}\n\n{data[:,:][:5]}\n\n',dims)


[['n' '0' '0']
 ['n' '0' 'z']
 ['n' 'p' 'w']
 ['v' 'y' 'w']
 ['0' 'y' 'z']]

[[1 0 0]
 [1 0 3]
 [1 2 2]
 [3 3 2]
 [0 3 3]]

 (5, 6, 4)


## Issues
The problem with setting up a tensor to hold all of the relationships is that projecting onto two dimensions for a hypergraph could be problematic. 

For now we only need two dimensions so stick to 2-tensors

In [10]:
tmat = np.zeros(dims,dtype=int)
for i,j,k in data:
    tmat[i,j,k] = 1

In [11]:
np.sum([tmat[i] for i in range(dims[0])],axis=0)

array([[3, 3, 1, 2],
       [3, 3, 2, 1],
       [1, 0, 1, 3],
       [3, 0, 4, 4],
       [4, 3, 3, 1],
       [1, 1, 1, 0]])

In [13]:
# Ndx,Edx = 0,1
# flat = 
# incmat = np.zeros((dims[Ndx],dims[Edx]),dtype=int)
# for i in range(dims[Ndx]):
#     for j in range(dims[Edx]):
#         incmat = np.sum(tmat[])
        
    

## StaticEntity = NamedTuple?

In [14]:
Ndx = 0; Edx = 1
labeldata[:,[0,1]]
data[:,[0,1]]
mat = np.zeros((dims[Ndx],dims[Edx]))
for x,y in data[:,[Ndx,Edx]]:
    mat[x,y] += 1 

In [15]:
imat = np.array(mat[1:,1:],dtype=bool )*1
imat

array([[1, 1, 1, 1, 1],
       [1, 0, 1, 1, 0],
       [1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1]])

In [16]:
rdict[0]

{0: '0', 1: 'n', 2: 'z', 3: 'v', 4: 'a'}

In [18]:
s = tuple(slice(d,imat.shape[d]) for d in range(2))
imat[s]

array([[1, 1, 1, 1],
       [0, 1, 1, 0],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [19]:
t = np.where()

TypeError: where() missing 1 required positional argument: 'condition'

In [20]:
arr = data[:4]
print(arr.shape)
arrd = np.sum(arr,axis=0)
arr,arrd

(4, 3)


(array([[1, 0, 0],
        [1, 0, 3],
        [1, 2, 2],
        [3, 3, 2]]),
 array([6, 5, 7]))

In [21]:
arr = np.arange(27).reshape(3,3,3)
# arrd = np.sum(arr,axis=1)
arr[:,0] #,arrd

array([[ 0,  1,  2],
       [ 9, 10, 11],
       [18, 19, 20]])

In [22]:
arr

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [23]:
np.sum(arr,axis=1)

array([[ 9, 12, 15],
       [36, 39, 42],
       [63, 66, 69]])

In [24]:
ttmat = tmat[:3,:4,:2]
ttmat

array([[[1, 0],
        [1, 1],
        [1, 0],
        [1, 0]],

       [[1, 1],
        [1, 0],
        [0, 0],
        [0, 0]],

       [[1, 1],
        [1, 1],
        [0, 0],
        [0, 0]]])

In [25]:
np.sum(ttmat,axis=(2,1))

array([5, 3, 4])

In [26]:
ttmat.shape

(3, 4, 2)

In [31]:
ttmat,ttmat.shape

(array([[[1, 0],
         [1, 1],
         [1, 0],
         [1, 0]],
 
        [[1, 1],
         [1, 0],
         [0, 0],
         [0, 0]],
 
        [[1, 1],
         [1, 1],
         [0, 0],
         [0, 0]]]),
 (3, 4, 2))

In [32]:
np.sum(ttmat,axis=0)

array([[3, 2],
       [3, 2],
       [1, 0],
       [1, 0]])

In [33]:
np.sum(ttmat,axis=(0,2))

array([5, 5, 1, 1])