## Create a Static EntitySet and then a Static Hypergraph from the Harry Potter character data

In [55]:
from hypernetx import *
import matplotlib.pyplot as plt
from collections import OrderedDict,defaultdict
from scipy.sparse import coo_matrix
import pandas as pd
import numpy as np
import itertools as it
import untitled_StaticEntity as us

### Read Dataset from kaggle

In [56]:
## Read dataset in using pandas. Fix index column or use default pandas index. 
harrydata = pd.read_csv('HarryPotter/datasets/Characters_edit.csv',encoding='unicode_escape').set_index('Id')
harrydata = pd.DataFrame(harrydata)
harrydata

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Death
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,"Thursday, July 31, 1980",
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,"Saturday, March 1, 1980",
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,"30 June, 1997"
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human,Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Hogwarts kitchens worker (December 1994 - 1998)""",,,,House elf,,Green,,,"A type of magic specific to house-elves, perfo...",44010,"Late March, 1998",,,
140,Kreacher,Male,,,,,,,,,,,,
"Black family's house-elf (?-1996),",,,,,,,,,,,,,,
"Harry Potter's house-elf,",,,,,,,,,,,,,,


In [57]:
## Choose string to fill NaN. These will be set to 0 in system id = sid
harry = harrydata[['House','Blood status','Species','Hair colour','Eye colour']].fillna("Unknown")
for c in harry.columns:
    harry[c] = harry[c].apply(lambda x : x.replace('\xa0',' '))

In [58]:
harry.iloc[:10]

Unnamed: 0_level_0,House,Blood status,Species,Hair colour,Eye colour
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Gryffindor,Half-blood,Human,Black,Bright green
2,Gryffindor,Pure-blood,Human,Red,Blue
3,Gryffindor,Muggle-born,Human,Brown,Brown
4,Gryffindor,Half-blood,Human,Silver| formerly auburn,Blue
5,Gryffindor,Part-Human,Half-Human/Half-Giant,Black,Black
6,Gryffindor,Pure-blood,Human,Blond,Unknown
7,Gryffindor,Pure-blood,Human,Red,Brown
8,Gryffindor,Pure-blood,Human,Red,Brown
9,Gryffindor,Pure-blood,Human,Red,Bright brown
10,Gryffindor,Muggle-born,Human,Black,Brown


### Generate a counter for each column 
- Assign a sid to each value in that column
- Create a reverse counter to grab name from sid

In [61]:
ctr = [HNXCount() for c in range(5)]
ldict = OrderedDict()
rdict = OrderedDict()
for idx,c in enumerate(harry.columns):
    ldict[c] = defaultdict(ctr[idx])
    rdict[c] = OrderedDict()
    ldict[c]['Unknown']
    rdict[c][0] = 'Unknown'
    for k in harry[c]:
        ldict[c][k]
        rdict[c][ldict[c][k]] = k
    ldict[c] = dict(ldict[c]) 
dims = tuple([len(ldict[c]) for c in harry.columns])



(902880, (8, 11, 10, 38, 27))

In [6]:
rdict['Eye colour'].values()

odict_values(['Unknown', 'Bright green', 'Blue', 'Brown', 'Black', 'Bright brown', 'Hazel', 'Grey', 'Green', 'Dark', 'Pale silvery', 'Silvery', 'Gooseberry', 'Scarlet ', 'Pale, freckled', 'Astonishingly blue', 'Variable', 'One dark, one electric blue', 'Yellowish', 'Ruddy', 'Grey/Blue[', 'Dark blue[', 'Bright Blue', 'Dark Grey', 'Pale', 'Yellow', '44010'])

### Create an array of tuples giving positions of 1's in incidence Tensor
- The tuples indicate one point across the possible node/edge assignments
- The dimensions of the tuple give the number of unique labels in potential nodes/columns

In [7]:
m = len(harry)
n = len(harry.columns)
data = np.zeros((m,n),dtype=int)
for rid in range(m):
    for cid in range(n):
        c = harry.columns[cid]
        data[rid,cid] = ldict[c][harry.iloc[rid][c]]
dims = tuple([len(ldict[c]) for c in harry.columns])
print(f'{data[:5]}\n\nDimensions = {dims}')

[[1 1 1 1 1]
 [1 2 1 2 2]
 [1 3 1 3 3]
 [1 1 1 4 2]
 [1 4 2 1 4]]

Dimensions = (8, 11, 10, 38, 27)


In [8]:
ldict['House'].keys()

dict_keys(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff', 'Beauxbatons Academy of Magic', 'Durmstrang Institute', 'House elf'])

In [9]:
## labeldata replaces numbers in data with their corresponding labels
labeldata = np.array(data,dtype=str)
for rdx in range(m):
    for cdx in range(n):
        c = harry.columns[cdx]
        labeldata[rdx,cdx] = rdict[c][data[rdx,cdx]]
## Make a hypergraph from the House and Blood Status
labeldata[:,:][:5],harrydata['Name'][:5]

(array([['Gryffindor', 'Half-blood', 'Human', 'Black', 'Bright green'],
        ['Gryffindor', 'Pure-blood', 'Human', 'Red', 'Blue'],
        ['Gryffindor', 'Muggle-born', 'Human', 'Brown', 'Brown'],
        ['Gryffindor', 'Half-blood', 'Human', 'Silver| formerly aubu',
         'Blue'],
        ['Gryffindor', 'Part-Human', 'Half-Human/Half-Giant', 'Black',
         'Black']], dtype='<U21'),
 Id
 1                         Harry James Potter
 2                      Ronald Bilius Weasley
 3                      Hermione Jean Granger
 4    Albus Percival Wulfric Brian Dumbledore
 5                              Rubeus Hagrid
 Name: Name, dtype: object)

## Create Incidence Tensor

In [10]:
imat = np.zeros(dims,dtype=int)
for d in data:
    imat[tuple(d)] +=1
imat.shape

(8, 11, 10, 38, 27)

In [11]:
np.sum(imat)

144

In [12]:
Ndx,Edx = 1,0
nodenames,edgenames = ldict[harry.columns[Ndx]],ldict[harry.columns[Edx]]

In [13]:

names = {idx:harry.columns[idx] for idx in [Ndx,Edx]}
## Project onto single matrix for hypergraph
axes = tuple([x for x in range(len(harry.columns)) if x not in [Ndx,Edx]])
dimat = np.sum(imat,axis=axes).transpose(Ndx,Edx)## this is the weighted incidence matrix
incdict = np.array(dimat,dtype=bool)*1  
incdict,rdict[names[Ndx]],rdict[names[Edx]]

(array([[1, 1, 1, 0, 1, 0, 0, 1],
        [1, 1, 1, 1, 1, 0, 0, 0],
        [1, 1, 0, 1, 1, 0, 0, 0],
        [0, 1, 1, 0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0, 1, 0, 0],
        [1, 1, 1, 1, 1, 0, 1, 0],
        [0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0]]),
 OrderedDict([(0, 'Unknown'),
              (1, 'Half-blood'),
              (2, 'Pure-blood'),
              (3, 'Muggle-born'),
              (4, 'Part-Human'),
              (5, 'Pure-blood or half-blood'),
              (6, 'Part-Goblin'),
              (7, 'Muggle-born or half-blood'),
              (8, 'Quarter-Veela'),
              (9, 'Muggle'),
              (10, 'Squib')]),
 OrderedDict([(0, 'Unknown'),
              (1, 'Gryffindor'),
              (2, 'Ravenclaw'),
              (3, 'Slytherin'),
              (4, 'Hufflepuff'),
              (5, 'Beauxbatons Academy of Magic'),
              (6, 'Durmst

In [14]:
## Logic to retrieve pieces of header
arr=np.array(tuple(ldict.keys())).transpose()
np.where(arr=='House')

(array([0]),)

## Create StaticEntity 

In [15]:
E = us.StaticEntity(imat, rdict)
E.dimensions

(8, 11, 10, 38, 27)

In [16]:
E.elements_by_level(0,3)

OrderedDict([(0,
              array([ 0,  1,  2,  3,  5, 10, 15, 16, 18, 19, 20, 21, 25, 26, 27, 29, 30,
                     31])),
             (1,
              array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 15, 19, 33])),
             (2, array([ 0,  1,  3,  5, 10, 12, 13, 14, 15])),
             (3, array([ 0,  1,  3, 10, 15, 16, 17, 18, 19, 24, 34, 35])),
             (4, array([ 0,  2,  3,  5, 10, 15, 22, 23, 32])),
             (5, array([ 3, 28])),
             (6, array([10])),
             (7, array([36, 37]))])

In [17]:
level1,level2 = 0,3
mat = E.incidence_matrix(level1, level2)
mat

array([[1, 1, 1, 1, 1, 0, 0, 0],
       [1, 1, 1, 1, 0, 0, 0, 0],
       [1, 1, 0, 0, 1, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 0, 1, 0],
       [0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 0, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1,

In [18]:
E.incidence_matrix(1,2)

array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [19]:
E.incidence_dict(level1=3,level2=0)

OrderedDict([(0, array([0, 1, 2, 3, 4])),
             (1, array([0, 1, 2, 3])),
             (2, array([0, 1, 4])),
             (3, array([0, 1, 2, 3, 4, 5])),
             (4, array([1])),
             (5, array([0, 1, 2, 4])),
             (6, array([1])),
             (7, array([1])),
             (8, array([1])),
             (9, array([1])),
             (10, array([0, 1, 2, 3, 4, 6])),
             (11, array([1])),
             (12, array([2])),
             (13, array([2])),
             (14, array([2])),
             (15, array([0, 1, 2, 3, 4])),
             (16, array([0, 3])),
             (17, array([3])),
             (18, array([0, 3])),
             (19, array([0, 1, 3])),
             (20, array([0])),
             (21, array([0])),
             (22, array([4])),
             (23, array([4])),
             (24, array([3])),
             (25, array([0])),
             (26, array([0])),
             (27, array([0])),
             (28, array([5])),
             (29, arr

In [21]:
E._labs(0)

OrderedDict([(0, 'Unknown'),
             (1, 'Gryffindor'),
             (2, 'Ravenclaw'),
             (3, 'Slytherin'),
             (4, 'Hufflepuff'),
             (5, 'Beauxbatons Academy of Magic'),
             (6, 'Durmstrang Institute'),
             (7, 'House elf')])

## Select columns to use from data and create hypergraph

In [22]:
Edx = 1;Ndx = 2
Ename = harry.columns[Edx];Nname = harry.columns[Ndx]

In [23]:
## Add weights to hypergraph 
thisdata = data[:,[Ndx,Edx]].astype(int)
sp,counts = np.unique(thisdata,axis=0,return_counts=True)
sp,counts

(array([[ 0,  0],
        [ 1,  0],
        [ 1,  1],
        [ 1,  2],
        [ 1,  3],
        [ 1,  5],
        [ 1,  7],
        [ 1,  8],
        [ 2,  4],
        [ 3,  0],
        [ 3,  1],
        [ 4,  2],
        [ 5,  6],
        [ 6,  0],
        [ 6,  3],
        [ 6,  5],
        [ 7,  0],
        [ 8,  0],
        [ 8,  1],
        [ 8,  2],
        [ 8,  3],
        [ 8,  5],
        [ 8,  8],
        [ 8,  9],
        [ 8, 10],
        [ 9,  1]]),
 array([ 6,  7, 19, 32,  5, 40,  1,  1,  2,  1,  1,  1,  1,  3,  1,  2,  1,
         4,  3,  1,  1,  4,  1,  4,  1,  1]))

In [24]:
mat = np.zeros((dims[Ndx],dims[Edx]),dtype=int)
for d in range(len(counts)):
    mat[sp[d][0],sp[d][1]] = counts[d]

In [25]:
## the matrix will generate a hypergraph. Non zero entries will be 1's
print(mat[1:,1:])

[[19 32  5  0 40  0  1  1  0  0]
 [ 0  0  0  2  0  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0  0  0]
 [ 0  1  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  1  0  0  0  0]
 [ 0  0  1  0  2  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0]
 [ 3  1  1  0  4  0  0  1  4  1]
 [ 1  0  0  0  0  0  0  0  0  0]]


In [26]:
## Generate weights for each of the nodes based on counts relative to the whole
M = mat[1:,1:]
tot = np.sum(M)
radii = dict(zip([rdict[Nname][r] for r in range(1,len(rdict[Nname]))],[0.5+3*np.sum(M[r],axis=0)/tot for r in range(dims[Ndx]-1)]))
radii

{'Human': 2.9098360655737703,
 'Half-Human/Half-Giant': 0.5491803278688525,
 'Werewolf': 0.5245901639344263,
 'Human (Werewolf traits)': 0.5245901639344263,
 'Human(goblin ancestry)': 0.5245901639344263,
 'Ghost': 0.5737704918032787,
 'Centaur': 0.5,
 'Human ': 0.8688524590163935,
 'Human (Metamorphmagus)': 0.5245901639344263}

In [27]:
## Generate weights for each of the edges in the same way
Mt = M.transpose()
tott = np.sum(Mt)
radiit = OrderedDict(zip([rdict[Ename][r] for r in range(1,len(rdict[Ename]))],[0.5+3*np.sum(Mt[r],axis=0)/tot for r in range(dims[Edx]-1)]))
radiit

OrderedDict([('Half-blood', 1.0901639344262295),
             ('Pure-blood', 1.3360655737704918),
             ('Muggle-born', 0.6721311475409836),
             ('Part-Human', 0.5491803278688525),
             ('Pure-blood or half-blood', 1.6311475409836065),
             ('Part-Goblin', 0.5245901639344263),
             ('Muggle-born or half-blood', 0.5245901639344263),
             ('Quarter-Veela', 0.5491803278688525),
             ('Muggle', 0.5983606557377049),
             ('Squib', 0.5245901639344263)])

In [28]:
rdict[Nname],rdict[Ename]

(OrderedDict([(0, 'Unknown'),
              (1, 'Human'),
              (2, 'Half-Human/Half-Giant'),
              (3, 'Werewolf'),
              (4, 'Human (Werewolf traits)'),
              (5, 'Human(goblin ancestry)'),
              (6, 'Ghost'),
              (7, 'Centaur'),
              (8, 'Human '),
              (9, 'Human (Metamorphmagus)')]),
 OrderedDict([(0, 'Unknown'),
              (1, 'Half-blood'),
              (2, 'Pure-blood'),
              (3, 'Muggle-born'),
              (4, 'Part-Human'),
              (5, 'Pure-blood or half-blood'),
              (6, 'Part-Goblin'),
              (7, 'Muggle-born or half-blood'),
              (8, 'Quarter-Veela'),
              (9, 'Muggle'),
              (10, 'Squib')]))

In [29]:
edges = [rdict[Ename][k] for k in range(1,dims[Edx])]
nodes = [rdict[Nname][k] for k in range(1,dims[Ndx])]
hmat = np.where(M >0, 1,0)

In [30]:
hmat

array([[1, 1, 1, 0, 1, 0, 1, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 1, 1, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [31]:
harryshyp = hnx.Hypergraph.from_numpy_array(hmat,node_names=nodes,edge_names=edges)

In [32]:
m,rd,cd = harryshyp.incidence_matrix(index=True)  ## how to force the incidence matrix to agree with order of input?

In [33]:
m.todense()

matrix([[1, 0, 1, 1, 0, 1, 1, 1, 0, 0],
        [1, 0, 0, 1, 0, 1, 1, 1, 1, 1],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]])

In [34]:
rdict['Blood status'],rd

(OrderedDict([(0, 'Unknown'),
              (1, 'Half-blood'),
              (2, 'Pure-blood'),
              (3, 'Muggle-born'),
              (4, 'Part-Human'),
              (5, 'Pure-blood or half-blood'),
              (6, 'Part-Goblin'),
              (7, 'Muggle-born or half-blood'),
              (8, 'Quarter-Veela'),
              (9, 'Muggle'),
              (10, 'Squib')]),
 {0: 'Human',
  1: 'Human ',
  2: 'Human(goblin ancestry)',
  3: 'Ghost',
  4: 'Half-Human/Half-Giant',
  5: 'Human (Werewolf traits)',
  6: 'Werewolf',
  7: 'Human (Metamorphmagus)'})

<img src="HarryPotter/bloodstatus-house.png">

In [35]:
mat = harryshyp.incidence_matrix()
mat

<8x10 sparse matrix of type '<class 'numpy.int64'>'
	with 20 stored elements in Compressed Sparse Row format>

In [36]:
E.level('Bright green')

(4, 1)

In [66]:
E._arr

array([[[[[4, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],

         [[1, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],

         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],

         ...,

         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],

         [[1, 0, 0, ..., 0, 0, 0],
          [1, 0, 0, ..., 0, 0, 0],
       

In [38]:
rlabels = defaultdict(dict)
for e in E.labels:
    for k,v in E.labels[e].items():
        rlabels[e][v] = k
        

In [46]:
harryshyp

Hypergraph({'Half-blood': Entity(Half-blood,['Human', 'Human ', 'Werewolf', 'Human (Metamorphmagus)'],{}), 'Pure-blood': Entity(Pure-blood,['Human (Werewolf traits)', 'Human', 'Human '],{}), 'Muggle-born': Entity(Muggle-born,['Human', 'Ghost', 'Human '],{}), 'Part-Human': Entity(Part-Human,['Half-Human/Half-Giant'],{}), 'Pure-blood or half-blood': Entity(Pure-blood or half-blood,['Human', 'Ghost', 'Human '],{}), 'Part-Goblin': Entity(Part-Goblin,['Human(goblin ancestry)'],{}), 'Muggle-born or half-blood': Entity(Muggle-born or half-blood,['Human'],{}), 'Quarter-Veela': Entity(Quarter-Veela,['Human', 'Human '],{}), 'Muggle': Entity(Muggle,['Human '],{}), 'Squib': Entity(Squib,['Human '],{})},name=_)

In [50]:
E.elements

OrderedDict([(0, array([ 0,  1,  2,  5,  9, 10])),
             (1, array([0, 1, 2, 3, 4, 5])),
             (2, array([0, 1, 3, 5, 6, 7])),
             (3, array([1, 2, 5])),
             (4, array([0, 1, 2, 3, 5])),
             (5, array([4, 8])),
             (6, array([5])),
             (7, array([0]))])

In [51]:
E.dimensions

(8, 11, 10, 38, 27)

In [53]:
reg = dict()
elts = E.elements_by_level(0,1)

In [70]:
E._arr.shape,E.labels.keys()

((8, 11, 10, 38, 27),
 odict_keys(['House', 'Blood status', 'Species', 'Hair colour', 'Eye colour']))

In [73]:
newarray = np.sum(E._arr, axis=(2,3,4))

In [74]:
indices={['Half-blood','Pure-blood'

array([[16,  2,  7,  0,  0, 11,  0,  0,  0,  4,  1],
       [ 2,  7, 14,  5,  1,  9,  0,  0,  0,  0,  0],
       [ 1,  6,  0,  1,  0,  8,  1,  1,  0,  0,  0],
       [ 0,  5, 11,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 1,  4,  2,  1,  0,  5,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  2,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [97]:
E.labels

OrderedDict([('House',
              OrderedDict([(0, 'Unknown'),
                           (1, 'Gryffindor'),
                           (2, 'Ravenclaw'),
                           (3, 'Slytherin'),
                           (4, 'Hufflepuff'),
                           (5, 'Beauxbatons Academy of Magic'),
                           (6, 'Durmstrang Institute'),
                           (7, 'House elf')])),
             ('Blood status',
              OrderedDict([(0, 'Unknown'),
                           (1, 'Half-blood'),
                           (2, 'Pure-blood'),
                           (3, 'Muggle-born'),
                           (4, 'Part-Human'),
                           (5, 'Pure-blood or half-blood'),
                           (6, 'Part-Goblin'),
                           (7, 'Muggle-born or half-blood'),
                           (8, 'Quarter-Veela'),
                           (9, 'Muggle'),
                           (10, 'Squib')])),
             ('Species

In [98]:
E._rlabels

defaultdict(dict,
            {'House': {'Unknown': 0,
              'Gryffindor': 1,
              'Ravenclaw': 2,
              'Slytherin': 3,
              'Hufflepuff': 4,
              'Beauxbatons Academy of Magic': 5,
              'Durmstrang Institute': 6,
              'House elf': 7},
             'Blood status': {'Unknown': 0,
              'Half-blood': 1,
              'Pure-blood': 2,
              'Muggle-born': 3,
              'Part-Human': 4,
              'Pure-blood or half-blood': 5,
              'Part-Goblin': 6,
              'Muggle-born or half-blood': 7,
              'Quarter-Veela': 8,
              'Muggle': 9,
              'Squib': 10},
             'Species': {'Unknown': 0,
              'Human': 1,
              'Half-Human/Half-Giant': 2,
              'Werewolf': 3,
              'Human (Werewolf traits)': 4,
              'Human(goblin ancestry)': 5,
              'Ghost': 6,
              'Centaur': 7,
              'Human ': 8,
              'Hu

In [None]:
def restrict_to(self, levels=[0, 1], levelnames=None, indices=None, names=None)

    if levelnames:
        levels = [int(np.where(np.array(self._headers) == name)[0]) for name in levelnames]
    axes = tuple([k for k in range(len(self.dimensions)) if k not in levels])
    new_arr = np.sum(self._arr, axis=axes)

    if names:
        ## rewrite names in terms of their 

In [110]:
# def restrict_to(self, labels):
# #     assumption this is a subset of labels dictionary, keyed by header, 
# #with either a list of categorical values or a list of indices

tlabels = {'House': ['Gryffindor','Ravenclaw'], 
           'Blood status': ['Half-blood','Pure-blood', 'Pure-blood or half-blood']}
ilabels = {'House': [1,2], 
           'Blood status': [1,2,5]}

In [138]:
def restrict_to(self, labels):
    
    ## first identify dimensions for restriction
    keys = list(labels.keys())
    if set(keys).issubset(self._headers):
        levels = [int(np.where(np.array(self._headers) == name)[0]) for name in labels]
        names = keys
    else:
        levels = keys  ## numeric positions of chosen dimensions in headers
        names = [self._headers[k] for k in levels]
        
    axes = tuple([k for k in range(len(self.dimensions)) if k not in levels])
    newarr = np.sum(self._arr, axis=axes)
    
#     ## next identify keys to restrict on in each dimension
#     newlabels = OrderedDict()
#     for kdx,key in enumerate(keys):
#         newlabels[self._headers[key]] = {jdx:labels[keys[kdx]] for jdx,j in enumerate(labels)}
#     labels = [k if k in self._headers else self._headers[k] for k in labels]
#     return labels
    return newarr,levels

In [139]:
newarr,levels = restrict_to(E,ilabels)
newarr

array([[16,  2,  7,  0,  0, 11,  0,  0,  0,  4,  1],
       [ 2,  7, 14,  5,  1,  9,  0,  0,  0,  0,  0],
       [ 1,  6,  0,  1,  0,  8,  1,  1,  0,  0,  0],
       [ 0,  5, 11,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 1,  4,  2,  1,  0,  5,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  2,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [140]:
for k in levels:
    E._headers[k]

In [123]:
newarray[[1,2]][:,[1,2,5]]

array([[ 7, 14,  9],
       [ 6,  0,  8]])

In [120]:
newarray

array([[16,  2,  7,  0,  0, 11,  0,  0,  0,  4,  1],
       [ 2,  7, 14,  5,  1,  9,  0,  0,  0,  0,  0],
       [ 1,  6,  0,  1,  0,  8,  1,  1,  0,  0,  0],
       [ 0,  5, 11,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 1,  4,  2,  1,  0,  5,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  2,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [124]:
E.dimensions

(8, 11, 10, 38, 27)

In [125]:
dlabels = dict()
for dim,ct in enumerate(E.dimensions):
    dlabels[dim] = np.arange(ct)

In [127]:
OrderedDict([(dim,np.arange(ct)) for dim,ct in enumerate(E.dimensions)])

OrderedDict([(0, array([0, 1, 2, 3, 4, 5, 6, 7])),
             (1, array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])),
             (2, array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])),
             (3,
              array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
                     17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
                     34, 35, 36, 37])),
             (4,
              array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
                     17, 18, 19, 20, 21, 22, 23, 24, 25, 26]))])