## Create a Static EntitySet and then a Static Hypergraph from the Harry Potter character data

In [1]:
from hypernetx import *
import matplotlib.pyplot as plt
from collections import OrderedDict,defaultdict
import scipy
from scipy.sparse import coo_matrix,issparse
import pandas as pd
import numpy as np
import itertools as it
import importlib as imp
import untitled_StaticEntity as us

### Read Dataset from kaggle

In [2]:
## Read dataset in using pandas. Fix index column or use default pandas index. 
harrydata = pd.read_csv('HarryPotter/datasets/Characters_edit.csv',encoding='unicode_escape').set_index('Id')
harrydata = pd.DataFrame(harrydata)
harrydata

Unnamed: 0_level_0,Name,Gender,Job,House,Wand,Patronus,Species,Blood status,Hair colour,Eye colour,Loyalty,Skills,Birth,Death
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,Harry James Potter,Male,Student,Gryffindor,"11"" Holly phoenix feather",Stag,Human,Half-blood,Black,Bright green,Albus Dumbledore | Dumbledore's Army | Order o...,Parseltongue| Defence Against the Dark Arts | ...,"Thursday, July 31, 1980",
2,Ronald Bilius Weasley,Male,Student,Gryffindor,"12"" Ash unicorn tail hair",Jack Russell terrier,Human,Pure-blood,Red,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Wizard chess | Quidditch goalkeeping,"Saturday, March 1, 1980",
3,Hermione Jean Granger,Female,Student,Gryffindor,"10¾"" vine wood dragon heartstring",Otter,Human,Muggle-born,Brown,Brown,Dumbledore's Army | Order of the Phoenix | Hog...,Almost everything,"19 September, 1979",
4,Albus Percival Wulfric Brian Dumbledore,Male,Headmaster,Gryffindor,"15"" Elder Thestral tail hair core",Phoenix,Human,Half-blood,Silver| formerly auburn,Blue,Dumbledore's Army | Order of the Phoenix | Hog...,Considered by many to be one of the most power...,Late August 1881,"30 June, 1997"
5,Rubeus Hagrid,Male,Keeper of Keys and Grounds | Professor of Care...,Gryffindor,"16"" Oak unknown core",,Half-Human/Half-Giant,Part-Human,Black,Black,Albus Dumbledore | Order of the Phoenix | Hogw...,Resistant to stunning spells| above average st...,6 December 1928,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Hogwarts kitchens worker (December 1994 - 1998)""",,,,House elf,,Green,,,"A type of magic specific to house-elves, perfo...",44010,"Late March, 1998",,,
140,Kreacher,Male,,,,,,,,,,,,
"Black family's house-elf (?-1996),",,,,,,,,,,,,,,
"Harry Potter's house-elf,",,,,,,,,,,,,,,


In [3]:
## Choose string to fill NaN. These will be set to 0 in system id = sid
harry = harrydata[['House','Blood status','Species','Hair colour','Eye colour']].fillna("Unknown")
for c in harry.columns:
    harry[c] = harry[c].apply(lambda x : x.replace('\xa0',' '))

In [4]:
harry.iloc[:10]

Unnamed: 0_level_0,House,Blood status,Species,Hair colour,Eye colour
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Gryffindor,Half-blood,Human,Black,Bright green
2,Gryffindor,Pure-blood,Human,Red,Blue
3,Gryffindor,Muggle-born,Human,Brown,Brown
4,Gryffindor,Half-blood,Human,Silver| formerly auburn,Blue
5,Gryffindor,Part-Human,Half-Human/Half-Giant,Black,Black
6,Gryffindor,Pure-blood,Human,Blond,Unknown
7,Gryffindor,Pure-blood,Human,Red,Brown
8,Gryffindor,Pure-blood,Human,Red,Brown
9,Gryffindor,Pure-blood,Human,Red,Bright brown
10,Gryffindor,Muggle-born,Human,Black,Brown


### Generate a counter for each column 
- Assign a sid to each value in that column
- Create a reverse counter to grab name from sid

**Questions for Tony and Cliff**
- how should we index the objects? 
- sids are whole numbers starting with column 0 and running through each column
- ldict and rdict are indexed starting with 0 representing missing values
- would we lose anything if we indexed these as -1 for missing values and then
compute the incidence matrix using only nonnegative indices?

In [5]:
ctr = [HNXCount() for c in range(5)]
ldict = OrderedDict()
rdict = OrderedDict()
for idx,c in enumerate(harry.columns):
    ldict[c] = defaultdict(ctr[idx])
    rdict[c] = OrderedDict()
    ldict[c]['Unknown']
    rdict[c][0] = 'Unknown'
    for k in harry[c]:
        ldict[c][k]
        rdict[c][ldict[c][k]] = k
    ldict[c] = dict(ldict[c]) 
dims = tuple([len(ldict[c]) for c in harry.columns])
# sid = dict()
# sctr = HNXCount()
# for cdx,c in enumerate(harry.columns):
#     for x in range(len(ldict[c])):
#         sid[sctr()] = 

In [6]:
rdict['Eye colour'].values()

odict_values(['Unknown', 'Bright green', 'Blue', 'Brown', 'Black', 'Bright brown', 'Hazel', 'Grey', 'Green', 'Dark', 'Pale silvery', 'Silvery', 'Gooseberry', 'Scarlet ', 'Pale, freckled', 'Astonishingly blue', 'Variable', 'One dark, one electric blue', 'Yellowish', 'Ruddy', 'Grey/Blue[', 'Dark blue[', 'Bright Blue', 'Dark Grey', 'Pale', 'Yellow', '44010'])

### Create an array of tuples giving positions of 1's in incidence Tensor
- The tuples indicate one point across the possible node/edge assignments
- The dimensions of the tuple give the number of unique labels in potential nodes/columns

In [7]:
m = len(harry)
n = len(harry.columns)
data = np.zeros((m,n),dtype=int)
for rid in range(m):
    for cid in range(n):
        c = harry.columns[cid]
        data[rid,cid] = ldict[c][harry.iloc[rid][c]]
dims = tuple([len(ldict[c]) for c in harry.columns])
print(f'{data[:5]}\n\nDimensions = {dims}')

[[1 1 1 1 1]
 [1 2 1 2 2]
 [1 3 1 3 3]
 [1 1 1 4 2]
 [1 4 2 1 4]]

Dimensions = (8, 11, 10, 38, 27)


In [8]:
ldict['House'].keys()

dict_keys(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff', 'Beauxbatons Academy of Magic', 'Durmstrang Institute', 'House elf'])

In [9]:
## labeldata replaces numbers in data with their corresponding labels
labeldata = np.array(data,dtype=str)
for rdx in range(m):
    for cdx in range(n):
        c = harry.columns[cdx]
        labeldata[rdx,cdx] = rdict[c][data[rdx,cdx]]
## Make a hypergraph from the House and Blood Status
labeldata[:,:][:5],harrydata['Name'][:5]

(array([['Gryffindor', 'Half-blood', 'Human', 'Black', 'Bright green'],
        ['Gryffindor', 'Pure-blood', 'Human', 'Red', 'Blue'],
        ['Gryffindor', 'Muggle-born', 'Human', 'Brown', 'Brown'],
        ['Gryffindor', 'Half-blood', 'Human', 'Silver| formerly aubu',
         'Blue'],
        ['Gryffindor', 'Part-Human', 'Half-Human/Half-Giant', 'Black',
         'Black']], dtype='<U21'),
 Id
 1                         Harry James Potter
 2                      Ronald Bilius Weasley
 3                      Hermione Jean Granger
 4    Albus Percival Wulfric Brian Dumbledore
 5                              Rubeus Hagrid
 Name: Name, dtype: object)

## Create Sparse Incidence "Tensor"
There is no scipy sparse tensor, so instead we choose the data columns first, then create the array

1. First create the incidence matrix - and check method outputs
2. Then create a sparse version and compare


In [10]:
Edx,Ndx = level1,level2 = 0,1

imat = np.zeros((dims[level1],dims[level2]),dtype=int)
tdata = data.transpose()[[level1,level2]]
idata = tdata.transpose()
for i,j in idata:
    imat[i,j] +=1
spmat = coo_matrix(([1]*len(tdata[0]),(tdata[0],tdata[1])))

In [11]:
spmat

<8x11 sparse matrix of type '<class 'numpy.int64'>'
	with 144 stored elements in COOrdinate format>

In [12]:
imat

array([[16,  2,  7,  0,  0, 11,  0,  0,  0,  4,  1],
       [ 2,  7, 14,  5,  1,  9,  0,  0,  0,  0,  0],
       [ 1,  6,  0,  1,  0,  8,  1,  1,  0,  0,  0],
       [ 0,  5, 11,  0,  0, 12,  0,  0,  0,  0,  0],
       [ 1,  4,  2,  1,  0,  5,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  2,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [13]:
slabels={k:np.array(list(rdict[k].values())) for k in harry.columns[[level1,level2]]}
slabels

{'House': array(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff',
        'Beauxbatons Academy of Magic', 'Durmstrang Institute',
        'House elf'], dtype='<U28'),
 'Blood status': array(['Unknown', 'Half-blood', 'Pure-blood', 'Muggle-born', 'Part-Human',
        'Pure-blood or half-blood', 'Part-Goblin',
        'Muggle-born or half-blood', 'Quarter-Veela', 'Muggle', 'Squib'],
       dtype='<U25')}

## Create StaticEntity 

In [14]:
imp.reload(us)
E = us.StaticEntity(spmat, slabels)
E.dimensions

(8, 11)

In [15]:
E.elements,E.children

(OrderedDict([(0, array([0, 1, 2, 4, 7])),
              (1, array([0, 1, 2, 3, 4])),
              (2, array([0, 1, 3, 4])),
              (3, array([1, 2, 4])),
              (4, array([1, 5])),
              (5, array([0, 1, 2, 3, 4, 6])),
              (6, array([2])),
              (7, array([2]))]),
 {'Half-blood',
  'Muggle',
  'Muggle-born',
  'Muggle-born or half-blood',
  'Part-Goblin',
  'Part-Human',
  'Pure-blood',
  'Pure-blood or half-blood',
  'Quarter-Veela',
  'Squib',
  'Unknown'})

In [16]:
E._arr.getcol(0).todense()

matrix([[16],
        [ 2],
        [ 1],
        [ 0],
        [ 1],
        [ 0],
        [ 0],
        [ 2]])

In [17]:
np.array(list(E._labels.keys()))[0]
self = E
f = lambda kdx: self._labels.get(list(self._headers[kdx]), {})
self._labels[self._headers[0]], self._headers[0]

(array(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff',
        'Beauxbatons Academy of Magic', 'Durmstrang Institute',
        'House elf'], dtype='<U28'),
 'House')

In [18]:
E.__dict__

{'_uid': 'test',
 '_dims': (8, 11),
 '_arr': <8x11 sparse matrix of type '<class 'numpy.int64'>'
 	with 144 stored elements in COOrdinate format>,
 '_labels': OrderedDict([('House',
               array(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff',
                      'Beauxbatons Academy of Magic', 'Durmstrang Institute',
                      'House elf'], dtype='<U28')),
              ('Blood status',
               array(['Unknown', 'Half-blood', 'Pure-blood', 'Muggle-born', 'Part-Human',
                      'Pure-blood or half-blood', 'Part-Goblin',
                      'Muggle-born or half-blood', 'Quarter-Veela', 'Muggle', 'Squib'],
                     dtype='<U25'))]),
 '_keyindex': <function untitled_StaticEntity.StaticEntity.__init__.<locals>.<lambda>(category)>,
 '_headers': array(['House', 'Blood status'], dtype='<U12'),
 '_index': <function untitled_StaticEntity.StaticEntity.__init__.<locals>.<lambda>(category, value)>,
 '_labs': <function untitle

In [19]:
E._labels

OrderedDict([('House',
              array(['Unknown', 'Gryffindor', 'Ravenclaw', 'Slytherin', 'Hufflepuff',
                     'Beauxbatons Academy of Magic', 'Durmstrang Institute',
                     'House elf'], dtype='<U28')),
             ('Blood status',
              array(['Unknown', 'Half-blood', 'Pure-blood', 'Muggle-born', 'Part-Human',
                     'Pure-blood or half-blood', 'Part-Goblin',
                     'Muggle-born or half-blood', 'Quarter-Veela', 'Muggle', 'Squib'],
                    dtype='<U25'))])

In [20]:
level1,level2 = 0,1
elts = E.elements_by_level(level1,level2)
{E.translate(level1,kdx): [E.translate(level2,wdx) for wdx in vdx] for kdx, vdx in elts.items()}

{('House', 'Unknown'): [('Blood status', 'Unknown'),
  ('Blood status', 'Half-blood'),
  ('Blood status', 'Pure-blood'),
  ('Blood status', 'Part-Human'),
  ('Blood status', 'Muggle-born or half-blood')],
 ('House', 'Gryffindor'): [('Blood status', 'Unknown'),
  ('Blood status', 'Half-blood'),
  ('Blood status', 'Pure-blood'),
  ('Blood status', 'Muggle-born'),
  ('Blood status', 'Part-Human')],
 ('House', 'Ravenclaw'): [('Blood status', 'Unknown'),
  ('Blood status', 'Half-blood'),
  ('Blood status', 'Muggle-born'),
  ('Blood status', 'Part-Human')],
 ('House', 'Slytherin'): [('Blood status', 'Half-blood'),
  ('Blood status', 'Pure-blood'),
  ('Blood status', 'Part-Human')],
 ('House', 'Hufflepuff'): [('Blood status', 'Half-blood'),
  ('Blood status', 'Pure-blood or half-blood')],
 ('House', 'Beauxbatons Academy of Magic'): [('Blood status', 'Unknown'),
  ('Blood status', 'Half-blood'),
  ('Blood status', 'Pure-blood'),
  ('Blood status', 'Muggle-born'),
  ('Blood status', 'Part-Human

In [21]:
E._arr

<8x11 sparse matrix of type '<class 'numpy.int64'>'
	with 144 stored elements in COOrdinate format>

## Select columns to use from data and create hypergraph

In [38]:
Edx = 0;Ndx = 1
Ename = harry.columns[Edx];Nname = harry.columns[Ndx]

In [39]:
## Add weights to hypergraph 
thisdata = data[:,[Ndx,Edx]].astype(int)
sp,counts = np.unique(thisdata,axis=0,return_counts=True)
sp,counts

(array([[ 0,  0],
        [ 0,  1],
        [ 0,  2],
        [ 0,  4],
        [ 0,  7],
        [ 1,  0],
        [ 1,  1],
        [ 1,  2],
        [ 1,  3],
        [ 1,  4],
        [ 2,  0],
        [ 2,  1],
        [ 2,  3],
        [ 2,  4],
        [ 3,  1],
        [ 3,  2],
        [ 3,  4],
        [ 4,  1],
        [ 4,  5],
        [ 5,  0],
        [ 5,  1],
        [ 5,  2],
        [ 5,  3],
        [ 5,  4],
        [ 5,  6],
        [ 6,  2],
        [ 7,  2],
        [ 8,  5],
        [ 9,  0],
        [10,  0]]),
 array([16,  2,  1,  1,  2,  2,  7,  6,  5,  4,  7, 14, 11,  2,  5,  1,  1,
         1,  1, 11,  9,  8, 12,  5,  1,  1,  1,  2,  4,  1]))

In [40]:
mat = np.zeros((dims[Ndx],dims[Edx]),dtype=int)
for d in range(len(counts)):
    mat[sp[d][0],sp[d][1]] = counts[d]

In [41]:
## the matrix will generate a hypergraph. Non zero entries will be 1's
print(mat[1:,1:])

[[ 7  6  5  4  0  0  0]
 [14  0 11  2  0  0  0]
 [ 5  1  0  1  0  0  0]
 [ 1  0  0  0  1  0  0]
 [ 9  8 12  5  0  1  0]
 [ 0  1  0  0  0  0  0]
 [ 0  1  0  0  0  0  0]
 [ 0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]]


In [26]:
## Generate weights for each of the nodes based on counts relative to the whole
M = mat[1:,1:]
tot = np.sum(M)
radii = dict(zip([rdict[Nname][r] for r in range(1,len(rdict[Nname]))],[0.5+3*np.sum(M[r],axis=0)/tot for r in range(dims[Ndx]-1)]))
radii

{'Human': 2.9098360655737703,
 'Half-Human/Half-Giant': 0.5491803278688525,
 'Werewolf': 0.5245901639344263,
 'Human (Werewolf traits)': 0.5245901639344263,
 'Human(goblin ancestry)': 0.5245901639344263,
 'Ghost': 0.5737704918032787,
 'Centaur': 0.5,
 'Human ': 0.8688524590163935,
 'Human (Metamorphmagus)': 0.5245901639344263}

In [27]:
## Generate weights for each of the edges in the same way
Mt = M.transpose()
tott = np.sum(Mt)
radiit = OrderedDict(zip([rdict[Ename][r] for r in range(1,len(rdict[Ename]))],[0.5+3*np.sum(Mt[r],axis=0)/tot for r in range(dims[Edx]-1)]))
radiit

OrderedDict([('Half-blood', 1.0901639344262295),
             ('Pure-blood', 1.3360655737704918),
             ('Muggle-born', 0.6721311475409836),
             ('Part-Human', 0.5491803278688525),
             ('Pure-blood or half-blood', 1.6311475409836065),
             ('Part-Goblin', 0.5245901639344263),
             ('Muggle-born or half-blood', 0.5245901639344263),
             ('Quarter-Veela', 0.5491803278688525),
             ('Muggle', 0.5983606557377049),
             ('Squib', 0.5245901639344263)])

In [28]:
rdict[Nname],rdict[Ename]

(OrderedDict([(0, 'Unknown'),
              (1, 'Human'),
              (2, 'Half-Human/Half-Giant'),
              (3, 'Werewolf'),
              (4, 'Human (Werewolf traits)'),
              (5, 'Human(goblin ancestry)'),
              (6, 'Ghost'),
              (7, 'Centaur'),
              (8, 'Human '),
              (9, 'Human (Metamorphmagus)')]),
 OrderedDict([(0, 'Unknown'),
              (1, 'Half-blood'),
              (2, 'Pure-blood'),
              (3, 'Muggle-born'),
              (4, 'Part-Human'),
              (5, 'Pure-blood or half-blood'),
              (6, 'Part-Goblin'),
              (7, 'Muggle-born or half-blood'),
              (8, 'Quarter-Veela'),
              (9, 'Muggle'),
              (10, 'Squib')]))

In [29]:
edges = [rdict[Ename][k] for k in range(1,dims[Edx])]
nodes = [rdict[Nname][k] for k in range(1,dims[Ndx])]
hmat = np.where(M >0, 1,0)

In [30]:
hmat

array([[1, 1, 1, 0, 1, 0, 1, 1, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 0, 1, 0, 0, 1, 1, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [31]:
harryshyp = hnx.Hypergraph.from_numpy_array(hmat,node_names=nodes,edge_names=edges)

In [32]:
m,rd,cd = harryshyp.incidence_matrix(index=True)  ## how to force the incidence matrix to agree with order of input?

## by setting a static flag on construction the hmat will be treated as arr and node_names, edge_names will be turned into labels
## in this case the hmat is already the incidence matrix

In [33]:
m.todense()

matrix([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
        [1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
        [0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
        [1, 0, 0, 1, 1, 1, 0, 1, 1, 1],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [34]:
rdict['Blood status'],rd

(OrderedDict([(0, 'Unknown'),
              (1, 'Half-blood'),
              (2, 'Pure-blood'),
              (3, 'Muggle-born'),
              (4, 'Part-Human'),
              (5, 'Pure-blood or half-blood'),
              (6, 'Part-Goblin'),
              (7, 'Muggle-born or half-blood'),
              (8, 'Quarter-Veela'),
              (9, 'Muggle'),
              (10, 'Squib')]),
 {0: 'Werewolf',
  1: 'Half-Human/Half-Giant',
  2: 'Human(goblin ancestry)',
  3: 'Human (Metamorphmagus)',
  4: 'Human',
  5: 'Ghost',
  6: 'Human ',
  7: 'Human (Werewolf traits)'})

<img src="HarryPotter/bloodstatus-house.png">

In [35]:
E.level('Half-blood')


(1, 1)

In [36]:
E._arr.__dict__

{'_shape': (8, 11),
 'maxprint': 50,
 'row': array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2,
        2, 3, 1, 2, 1, 0, 0, 0, 0, 3, 3, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 3, 3, 3, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0,
        0, 0, 0, 4, 0, 0, 5, 5, 6, 0, 0, 0, 0, 1, 3, 3, 4, 1, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7], dtype=int32),
 'col': array([ 1,  2,  3,  1,  4,  2,  2,  2,  2,  3,  1,  3,  2,  2,  1,  5,  2,
         2,  2,  5,  5,  0,  5,  5,  2,  5,  5,  3,  5,  1,  2,  2,  1,  5,
         5,  1,  6,  1,  1,  3,  5,  1,  5,  5,  1,  1,  2,  2,  2,  2,  1,
         5,  2,  2,  2,  5,  5,  1,  2,  2,  1,  5,  5,  5,  7,  0,  5,  2,
         5,  5,  0,  0,  5,  5,  0,  5,  5,  5,  0,  5,  1,  5,  5,  2,  3,
         5,  1,  2,  1,  5,  5,  5,  5,  5,  2,  2,  2,  2,  0,  5

In [37]:
E.elements_by_level(1,0,translate=True)

{('Blood status', 'Unknown'): [('House', 'Unknown'),
  ('House', 'Gryffindor'),
  ('House', 'Ravenclaw'),
  ('House', 'Hufflepuff'),
  ('House', 'House elf')],
 ('Blood status', 'Half-blood'): [('House', 'Unknown'),
  ('House', 'Gryffindor'),
  ('House', 'Ravenclaw'),
  ('House', 'Slytherin'),
  ('House', 'Hufflepuff')],
 ('Blood status', 'Pure-blood'): [('House', 'Unknown'),
  ('House', 'Gryffindor'),
  ('House', 'Slytherin'),
  ('House', 'Hufflepuff')],
 ('Blood status', 'Muggle-born'): [('House', 'Gryffindor'),
  ('House', 'Ravenclaw'),
  ('House', 'Hufflepuff')],
 ('Blood status', 'Part-Human'): [('House', 'Gryffindor'),
  ('House', 'Beauxbatons Academy of Magic')],
 ('Blood status', 'Pure-blood or half-blood'): [('House', 'Unknown'),
  ('House', 'Gryffindor'),
  ('House', 'Ravenclaw'),
  ('House', 'Slytherin'),
  ('House', 'Hufflepuff'),
  ('House', 'Durmstrang Institute')],
 ('Blood status', 'Part-Goblin'): [('House', 'Ravenclaw')],
 ('Blood status', 'Muggle-born or half-blood'):