In [86]:
import io
import os
import re
import math
from xml.etree import ElementTree as ET
import urllib.parse
import json
import codecs
import subprocess
import networkx as nx
from networkx.algorithms import bipartite
import pandas as pd
import numpy as np
from collections import Counter
import locale
locale.setlocale(locale.LC_ALL, 'de-DE.utf-8')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

#### Define RecipeCollection class

In [136]:
class RecipeCollection:
    def __init__(self, graphLab_path=None, descript_fn=None, igdtCat_path=None, working_dir=None):
        # check for missing parameters
        if graphLab_path == None:
            print ('Specify path to graphLab.')
            return None
        if descript_fn == None:
            print ('Specify descriptor filename.')
            return None
        if igdtCat_path == None:
            print ('Specify path to ingredients catalogue.')
            return None
        if working_dir == None:
            print ('Specify working directory.')
            return None
        # set namespaces
        ns = {'fr': 'http://fruschtique.de/ns/recipe', 'fe': 'http://fruschtique.de/ns/fe'}
        # set object variables
        self.graphLab_path = graphLab_path
        self.descript_fn   = descript_fn
        self.igdtCat_path  = igdtCat_path
        self.working_dir   = working_dir
        # read descriptor file
        descriptor = graphLab_path + descript_fn
        with open(descriptor, 'r', encoding='utf-8') as d:
            descript = ET.parse(d)
            self.d_root = descript.getroot()
        # read list of recipes in collection
        file_in = graphLab_path + self.d_root.find('fe:experimentPath', ns).text + 'catalogue.xml'
        with open(file_in, 'r', encoding='utf-8') as f:
            list_in = ET.parse(f)
            root_in = list_in.getroot()
        in_files = [urllib.parse.unquote(doc.get("href")[8:], encoding="utf-8") for doc in root_in.findall('doc')]
        #print (in_files)
        # check for subcollection directories
        common = os.path.commonpath(in_files)
        os.chdir (common)
        sub_paths = [p for p in os.listdir() if os.path.isdir(p)]
        if len(sub_paths) > 2 or len(sub_paths) < 1:
            print ('Wrong number of subcollections.')
            return None
        # get subcollection files
        os.chdir (sub_paths[0])
        sub_coll_rcp_A = [rcp[0:-4] for rcp in os.listdir()]      
        os.chdir('..')
        os.chdir (sub_paths[1])
        sub_coll_rcp_B = [rcp[0:-4] for rcp in os.listdir()]
        # get metadata from descriptor
        title = self.d_root.find('fe:fullTitle', ns).text
        coll_A = self.d_root.find('fe:A-collection', ns)
        author_A = coll_A.find('fe:A-author', ns).text
        collName_A = coll_A.find('fe:A-name', ns).text
        coll_B = self.d_root.find('fe:B-collection', ns)
        author_B = coll_B.find('fe:B-author', ns).text
        collName_B = coll_B.find('fe:B-name', ns).text
        # create recipe and subcollection lists
        rcp_list = []
        for fn_rcp in in_files:
            with open(fn_rcp, 'r', encoding='utf-8') as f:
                rcp_in = ET.parse(f)
                rcp_root = rcp_in.getroot()
                rcp_name = rcp_root.find('fr:recipeName', ns).text
                igdts = []
                lists = rcp_root.findall('.//fr:igdtList', ns)
                for li in lists:
                    xx = [entry.get("ref") for entry in li.findall('.//fr:igdtName', ns) if entry.get("ref") != '']
                    #print (xx)
                    igdts.extend(xx)
                rcp = {'recipeName' : rcp_name, 'ingredients' : igdts}
                rcp_list.append(rcp)
        #print (rcp_list)
        rcp_dict = {'title':title, 'collections':{sub_paths[0]:{'name':collName_A, 'author':author_A,'recipes':sub_coll_rcp_A}, sub_paths[1]:{'name':collName_B, 'author':author_B,'recipes':sub_coll_rcp_B}}, 'recipes': rcp_list}
        # write rcp_dict to json file
        file_out = graphLab_path + self.d_root.find('fe:experimentPath', ns).text + 'outout.json'
        with open(file_out, "w", encoding='utf-8') as write_file:
            json.dump(rcp_dict, write_file, ensure_ascii=False)
        # open json file  
        with open(file_out, encoding='utf-8') as file:
            self.coll = json.load(file) 
        # read ingredients catalogue
        
        with open(igdtCat_path, encoding='utf-8') as file:
            self.cat = json.load(file)
            self.catEntries = [entry for entry in self.cat]
        #print (self.cat)
        self.subCollLtrs = [subcoll for subcoll in self.coll.get('collections')]
        self.recipes     = [rcp for rcp in self.coll.get('recipes')]
        self.ingredients = list(set(igt for rcp in self.recipes for igt in rcp.get('ingredients') ))     
        #print ('#distinct_igdt: ', len(self.ingredients))
        return
            
    def __str__(self):
        return f"Collection with {len(self.recipes)} recipes in {len(self.subCollLtrs)} subcollections with {len(self.ingredients)} distinct ingredients\nsupported by an ingredients catalog with {len(self.catEntries)} entries\n"
    
    def infoSubcolls (self):
        xx = [(subcoll,self.coll.get('collections').get(subcoll).get('name'),self.coll.get('collections').get(subcoll).get('author'),len(self.coll.get('collections').get(subcoll).get('recipes'))) for subcoll in self.coll.get('collections')]
        str = ''
        for sc in xx:
            str += f"subcollection {sc[0]} -- name: {sc[1]}, author: {sc[2]}, {sc[3]} recipes\n"
        return str   
        
    def recipes_list (self,coll=None):
        if coll == None:
            return [self.recipes]
        else:
            return self.coll.get('collections').get(coll).get('recipes') 
        
    def ingredients_list (self,coll=None):
        if coll == None:
            return self.ingredients
        else:
            xx = [rcp for rcp in self.coll.get('collections').get(coll).get('recipes')]
            yy = [igt for rcp in self.coll.get("recipes") if rcp.get('recipeName') in xx for igt in rcp.get('ingredients')]
            zz = list(set(yy))
            zz.sort(key=locale.strxfrm)
            return zz
                
    def catalog_list (self, select=None):
        if select == None:
            return [igt for igt in self.cat]
        elif type(select) is str:
            return self.cat.get(select)
        elif type(select) is list:
            return [self.cat.get(s) for s in select]
        
    def toGraph (self, coll=None):
        
        def igtGraph (i2r):
            B = nx.Graph(from_coll=coll,created_by='fruschtique RecipeCollection')
            X = nx.Graph(from_coll=coll,created_by='fruschtique RecipeCollection')
            top = [rcp.get('recipeName') for rcp in i2r]
            bottom = list(set([igt for rcp in i2r for igt in rcp.get('ingredients')]))
            #print ('bottom', len(bottom), 'top', len(top))
            e_list = []
            #print (i2r)
            for rcp in i2r:
                name = rcp.get('recipeName')
                for igt in rcp.get('ingredients'):
                    e_list.append((name,igt))
            #print (e_list)
            B.add_nodes_from(top, bipartite=0)
            B.add_nodes_from(bottom, bipartite=1)
            B.add_edges_from(e_list)
            X = bipartite.weighted_projected_graph(B, bottom)
            attr_dict = {igt: self.cat[igt] for igt in bottom}
            occ_list = [igt for rcp in i2r for igt in rcp.get('ingredients')]
            occ_dict = Counter(occ_list)
            occ_attr = {k:{'occ':occ_dict.get(k)} for k in occ_dict.keys()}
            #print ('occ_attr', len (occ_attr))
            nx.set_node_attributes(X, attr_dict)
            nx.set_node_attributes(X, occ_attr)
            e_attr = {}
            for e in list(X.edges(data=True)):
                x = [e[0],e[1]]
                x.sort(key=locale.strxfrm)
                id = str(x[0]) + '--' + str(x[1])
                xx = (e[0],e[1])
                e_attr[xx] = {'id':id}
            nx.set_edge_attributes(X, e_attr)
            return X
        
        # no parameter
        if coll == None:
            print ('Specify subcollection to be transformed.')
            return None
        # single subcollection
        elif type(coll) is str:
            if len(coll) != 1:
                print('Use a single character for subcollection specification.')
                return None
            elif not(coll in self.subCollLtrs):
                print (f"The subcollection {coll} is not contained in this collection.")
                return None
            else:
                xx = [rcp for rcp in self.coll.get('collections').get(coll).get('recipes')]
                i2r = [rcp for rcp in self.coll.get("recipes") if rcp.get('recipeName') in xx]
                return igtGraph(i2r)
        # two subcollections
        elif type(coll) is list:
            if len(coll) > 2:
                print('Two subcollections is maximum for graph generation.')
                return None
            elif not(coll[0] in self.subCollLtrs):
                print (f"The subcollection {coll[0]} is not contained in this collection.")
                return None
            elif not(coll[1] in self.subCollLtrs):
                print (f"The subcollection {coll[1]} is not contained in this collection.")
                return None
            else:
                i2r = [rcp for rcp in self.coll.get("recipes")]
                #print ('i2r', len(i2r))
                GG = igtGraph(i2r)
                print ('GG nodes', len (GG.nodes(data=True)))
                Arecipes = [rcp for rcp in self.coll.get('collections').get(coll[0]).get('recipes')]
                Brecipes = [rcp for rcp in self.coll.get('collections').get(coll[1]).get('recipes')]
                Aingredients = set([igt for rcp in self.coll.get("recipes") if rcp.get('recipeName') in Arecipes for igt in rcp.get('ingredients')])
                Bingredients = set([igt for rcp in self.coll.get("recipes") if rcp.get('recipeName') in Brecipes for igt in rcp.get('ingredients')])
                print ('B ingredients', Bingredients)
                print('Aigt', len(Aingredients))
                print('Bigt', len(Bingredients))
                ABingredients = Aingredients.intersection(Bingredients)
                Aingredients_pure = Aingredients.difference(ABingredients)
                Bingredients_pure = Bingredients.difference(ABingredients)
                Asub_dict = {igt: {'sub':'A'} for igt in Aingredients_pure}
                Bsub_dict = {igt: {'sub':'B'} for igt in Bingredients_pure}
                ABsub_dict = {igt: {'sub':'AB'} for igt in ABingredients}
                sub_dict = {**Asub_dict, **Bsub_dict, **ABsub_dict}
                nx.set_node_attributes(G, sub_dict)
                A_attr = {(e[0],e[1]):{'sub': 'A'} for e in list(GG.edges(data=True)) if (e[0] in Aingredients_pure and e[1] in Aingredients_pure) or (e[0] in Aingredients_pure and e[1] in ABingredients) or (e[0] in ABingredients and e[1] in Aingredients_pure)}
                B_attr = {(e[0],e[1]):{'sub': 'B'} for e in list(GG.edges(data=True)) if (e[0] in Bingredients_pure and e[1] in Bingredients_pure) or (e[0] in Bingredients_pure and e[1] in ABingredients) or (e[0] in ABingredients and e[1] in Bingredients_pure)}
                AB_attr = {(e[0],e[1]):{'sub': 'AB'} for e in list(GG.edges(data=True)) if e[0] in ABingredients and e[1] in ABingredients}
                e_attr = {**A_attr,**B_attr,**AB_attr}
                #print ('#A edges: ',len(A_attr),'\n#B edges: ',len(B_attr),'\n#AB edges: ',len(AB_attr))
                nx.set_edge_attributes(GG, e_attr)
                #print (B.edges(data=True))
                return GG

    def nodeSets(self,graph=None,coll=None):
        if graph == None:
            print ('Specify graph.')
            return None
        elif coll == None:
            print ('Specify subcollection.')
            return None
        if type(coll) is str:
            if len(coll) != 1:
                print('Use a single character for subcollection specification.')
                return None
            elif not(coll in self.subCollLtrs):
                print(f"Subcollection {coll} does not exist.")
                return None
            else:
                Anodes = set ([n for (n,attr) in graph.nodes(data=True) if attr.get('sub') == coll])
                return list(Anodes)
        elif type(coll) is list:
            if len(coll) > 2:
                print('Two subcollections is maximum for node set generation.')
                return None
            elif not(coll[0] in self.subCollLtrs):
                print (f"The subcollection {coll[0]} is not contained in this collection.")
                return None
            elif not(coll[1] in self.subCollLtrs):
                print (f"The subcollection {coll[1]} is not contained in this collection.")
                return None
            else:
                xx = f"{coll[0]}{coll[1]}"
                ABnodes = [n for (n,attr) in graph.nodes(data=True) if attr.get('sub') == xx]
                return ABnodes
        else:
            return None
    
    def edgeSets(self,graph=None,coll=None):         
        if graph == None:
            print ('Specify graph.')
            return None
        elif coll == None:
            print ('Specify subcollection.')
            return None
        if type(coll) is str:
            if len(coll) != 1:
                print('Use a single character for subcollection specification.')
                return None
            elif not(coll in self.subCollLtrs):
                print(f"Subcollection {coll} does not exist.")
                return None
            else:
                n_A  = [n for n,attr in graph.nodes(data=True) if attr.get('sub') == coll]
                A_e_pure  = [e for e in graph.edges(data=True) if e[0] in n_A and e[1] in n_A]
                return A_e_pure  
        elif type(coll) is list:
            if len(coll) > 2:
                print('Two subcollections is maximum for node set generation.')
                return None
            elif not(coll[0] in self.subCollLtrs):
                print (f"The subcollection {coll[0]} is not contained in this collection.")
                return None
            elif not(coll[1] in self.subCollLtrs):
                print (f"The subcollection {coll[1]} is not contained in this collection.")
                return None
            else:
                n_A  = [n for n,attr in graph.nodes(data=True) if attr.get('sub') == coll[0]]
                n_B  = [n for n,attr in graph.nodes(data=True) if attr.get('sub') == coll[1]]
                n_AB = [n for n,attr in graph.nodes(data=True) if attr.get('sub') == f"{coll[0]}{coll[1]}"]
                A_e_pure       = [e for e in graph.edges(data=True) if e[0] in n_A and e[1] in n_A]
                B_e_pure       = [e for e in graph.edges(data=True) if e[0] in n_B and e[1] in n_B]
                A_e_mixed      = [e for e in graph.edges(data=True) if (e[0] in n_A and e[1] in n_AB) or (e[1] in n_A and e[0] in n_AB)]
                B_e_mixed      = [e for e in graph.edges(data=True) if (e[0] in n_B and e[1] in n_AB) or (e[1] in n_B and e[0] in n_AB)]
                AB_e_intersect = [e for e in graph.edges(data=True) if e[0] in n_AB and e[1] in n_AB]
                return {'A_e_pure' : A_e_pure, 'B_e_pure' : B_e_pure, 'A_e_mixed' : A_e_mixed, 'B_e_mixed' : B_e_mixed, 'AB_e_intersect' : AB_e_intersect}
        else:
            return None
        
    def toDot(self,graph,path,fn):
        dot = 'graph {\ngraph[rankdir="LR", outputorder="edgesfirst"]\nnode[fontname="Arial", fontsize=120, shape=circle, style=filled, fixedsize=shape];\n'
        for u,v,att in graph.edges(data=True):
            dot += u+' -- '+v+' [penwidth='+str(att.get('weight'))
            if att.get('weight') > 1:
                dot += ', color=Red]\n'
            else:
                dot += ']\n'
        for u,att in graph.nodes(data=True):
            dot += u+' [width=' + str(1+3*math.sqrt(att.get('occ'))) + ', label=' + str(att.get('i-name')) + ', class=' + str(att.get('i-class')) + ']\n'
        dot += '}'
        os.chdir(path)
        with codecs.open(fn, 'w', encoding = 'utf8') as file:
            file.write(dot)
        return
    
    def toGephi(self,graph,fn):
        pass
    
    def toCSV(self,graph,path,fn):
        os.chdir(path)
        basename, extension = os.path.splitext(fn)
        nodes_fn = basename + '_nodes.csv'
        with codecs.open(nodes_fn, 'w', encoding = 'utf8') as file:
            file.write('n,i-name,i-class,occ,sub\n')
            for (n,attr) in graph.nodes(data=True):
                file.write(f"{n},{attr.get('i-name')},{attr.get('i-class')},{attr.get('occ')},{attr.get('sub')}\n")
        edges_fn = basename + '_edges.csv'
        with codecs.open(edges_fn, 'w', encoding = 'utf8') as file:
            file.write('n1,n2,id,weight,sub\n')
            for (n1,n2,attr) in graph.edges(data=True):
                file.write(f"{n1},{n2},{attr.get('id')},{attr.get('weight')},{attr.get('sub')}\n")
        return
    
    def previewSVG(self,graph=None,scale=1.0):
        if graph == None:
            print ('Specify graph.')
            return None
        
        subprocess.run (['sfdp', self.working_dir + 'dotdot.dot', '-o' + self.working_dir + 'svgGraph-poor.svg', '-Goverlap=prism', '-Tsvg'])
        
        os.chdir(self.working_dir)
        with open('svgGraph-poor.svg', 'r', encoding='utf-8') as f:
            svg_in = ET.parse(f)
            root_in = svg_in.getroot()
        ns = {'svg': 'http://www.w3.org/2000/svg'}
        
        edges = root_in.findall('svg:g/svg:g[@class="edge"]',ns)
        #e_coor = {e.find('svg:title',ns).text:e.find('svg:path',ns).get('d') for e in edges}
        #print(e_coor)
        nodes = root_in.findall('svg:g/svg:g[@class="node"]',ns)
        #n_coor = {n.find('svg:title',ns).text:{'cx':n.find('svg:ellipse',ns).get('cx'), 'cy':n.find('svg:ellipse',ns).get('cy'),\
        #         'rx':n.find('svg:ellipse',ns).get('rx'), 'ry':n.find('svg:ellipse',ns).get('ry')} for n in nodes}
        #print(n_coor)
        #t_coor = {n.find('svg:title',ns).text:{'x':n.find('svg:text',ns).get('x'), 'y':n.find('svg:text',ns).get('y'),\
        #         'font-size':n.find('svg:text',ns).get('font-size')} for n in nodes}
        #print(t_coor)
        
        xx = max([n.find('svg:ellipse',ns).get('rx') for n in nodes])
        #xx = [x for x in list(nx.get_node_attributes(graph, "occ").values())]
        font_size = math.ceil(float(scale)*20.0*float(max(xx)))
        
        transform = root_in.find('svg:g[@id="graph0"]',ns).get('transform')
        
        preview = ET.Element('html')
        head  = ET.SubElement(preview, 'head')
        style = ET.SubElement(head, 'style')
        style.text = \
        ' .i-alc   {fill: #7087ED; stroke: #7087ED; background-color: #7087ED}' +\
        ' .i-carb  {fill: #C8A98B; stroke: #C8A98B; background-color: #C8A98B}' +\
        ' .i-condi {fill: #D58680; stroke: #D58680; background-color: #D58680}' +\
        ' .i-egg   {fill: #70A287; stroke: #70A287; background-color: #70A287}' +\
        ' .i-etc   {fill: #9AA6BF; stroke: #9AA6BF; background-color: #9AA6BF}' +\
        ' .i-fat   {fill: #81CDD8; stroke: #81CDD8; background-color: #81CDD8}' +\
        ' .i-fish  {fill: #ffdab9; stroke: #ffdab9; background-color: #ffdab9}' +\
        ' .i-fruit {fill: #7FDD46; stroke: #7FDD46; background-color: #7FDD46}' +\
        ' .i-herb  {fill: #95A84E; stroke: #95A84E; background-color: #95A84E}' +\
        ' .i-meat  {fill: #EE5874; stroke: #EE5874; background-color: #EE5874}' +\
        ' .i-milk  {fill: #6EA2DC; stroke: #6EA2DC; background-color: #6EA2DC}' +\
        ' .i-nuts  {fill: #D09E44; stroke: #D09E44; background-color: #D09E44}' +\
        ' .i-onion {fill: #60C667; stroke: #60C667; background-color: #60C667}' +\
        ' .i-spice {fill: #FF7F50; stroke: #FF7F50; background-color: #FF7F50}' +\
        ' .i-sweet {fill: #CDE1A6; stroke: #CDE1A6; background-color: #CDE1A6}' +\
        ' .i-veg   {fill: #65DDB7; stroke: #65DDB7; background-color: #65DDB7}'
        
        script = ET.SubElement(head, 'script')
        script.text = \
        'function show_A()    {' +\
        'let g0 = document.getElementById("graph0");' +\
        'let g1 = g0.cloneNode(false);' +\
        'g0.remove();' +\
        'const n = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'n.setAttribute("href","#A_nodes");' +\
        'g1.appendChild (n);' +\
        'let svg = document.getElementsByTagName("svg")[0];' +\
        'svg.appendChild(g1);' +\
        '};' +\
        'function show_B()    {' +\
        'let g0 = document.getElementById("graph0");' +\
        'let g1 = g0.cloneNode(false);' +\
        'g0.remove();' +\
        'const n = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'n.setAttribute("href","#B_nodes");' +\
        'g1.appendChild (n);' +\
        'let svg = document.getElementsByTagName("svg")[0];' +\
        'svg.appendChild(g1);' +\
        '};' +\
        'function show_AB()   {' +\
        'let g0 = document.getElementById("graph0");' +\
        'let g1 = g0.cloneNode(false);' +\
        'g0.remove();' +\
        'const n = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'n.setAttribute("href","#AB_nodes");' +\
        'g1.appendChild (n);' +\
        'let svg = document.getElementsByTagName("svg")[0];' +\
        'svg.appendChild(g1);' +\
        '};' +\
        'function show_full() {' +\
        'let g0 = document.getElementById("graph0");' +\
        'let g1 = g0.cloneNode(false);' +\
        'g0.remove();' +\
        'const nA = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'nA.setAttribute("href","#A_nodes");' +\
        'g1.appendChild (nA);' +\
        'const nB = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'nB.setAttribute("href","#B_nodes");' +\
        'g1.appendChild (nB);' +\
        'const nAB = document.createElementNS("http://www.w3.org/2000/svg","use");' +\
        'nAB.setAttribute("href","#AB_nodes");' +\
        'g1.appendChild (nAB);' +\
        'let svg = document.getElementsByTagName("svg")[0];' +\
        'svg.appendChild(g1);' +\
        '};' 
        
        body  = ET.SubElement(preview, 'body')
        div_attr   = {'style':'width:1200px;height:120px;'}
        div_form   = ET.SubElement(body,'div', attrib=div_attr)
        buttonA_attr = {'id':'btn_graph_A', 'type':'button','onclick':'show_A()', 'style':f"cursor:pointer;font-size:16px; margin:24px; padding:12px"}
        buttonA      = ET.SubElement(div_form, 'button', attrib=buttonA_attr)
        buttonA.text = 'subgraph A'
        buttonB_attr = {'id':'btn_graph_B', 'type':'button','onclick':'show_B()', 'style':f"cursor:pointer;font-size:16px; margin:24px; padding:12px"}
        buttonB      = ET.SubElement(div_form, 'button', attrib=buttonB_attr)
        buttonB.text = 'subgraph B'
        buttonAB_attr = {'id':'btn_graph_AB', 'type':'button','onclick':'show_AB()', 'style':f"cursor:pointer;font-size:16px; margin:24px; padding:12px"}
        buttonAB      = ET.SubElement(div_form, 'button', attrib=buttonAB_attr)
        buttonAB.text = 'subgraph A ∩ B'
        buttonfull_attr = {'id':'btn_graph_full', 'type':'button','onclick':'show_full()', 'style':f"cursor:pointer;font-size:16px; margin:24px; padding:12px"}
        buttonAB      = ET.SubElement(div_form, 'button', attrib=buttonfull_attr)
        buttonAB.text = 'full graph'
        
        div   = ET.SubElement(body,'div')
        svg_out_attr = {'xmlns':'http://www.w3.org/2000/svg', 'xmlns:xlink':'http://www.w3.org/1999/xlink', 'version':'1.1', 'viewBox':root_in.get('viewBox')}
        svg_out = ET.SubElement(div, 'svg', attrib=svg_out_attr)
        defs = ET.SubElement(svg_out,'defs')
        #edges_A_pure = ET.SubElement(defs,'g').set('id','A_edges_pure')
        #edges_A_mixed = ET.SubElement(defs,'g').set('id','A_edges_mixed')
        #edges_B_pure = ET.SubElement(defs,'g').set('id','B_edges_pure')
        #edges_B_mixed = ET.SubElement(defs,'g').set('id','B_edges_mixed')
        #edges_AB = ET.SubElement(defs,'g').set('id','AB_edges')
        
        A_n = self.nodeSets(G,'A') 
        B_n = self.nodeSets(G,'B') 
        AB_n = self.nodeSets(G,['A','B'])
        
        nodes_A = ET.SubElement(defs, 'g', id='A_nodes')
        nodes_B = ET.SubElement(defs, 'g', id='B_nodes')
        nodes_AB = ET.SubElement(defs, 'g', id='AB_nodes')
        
        for n in nodes:
            if n.find('svg:title',ns).text in A_n: 
                sub = 'a'
                def_el = nodes_A
            elif n.find('svg:title',ns).text in B_n: 
                sub = 'b'
                def_el = nodes_B
            elif n.find('svg:title',ns).text in AB_n: 
                sub = 'ab'
                def_el = nodes_AB
            node_attr   = {'class':'node', 'id':n.find('svg:title',ns).text, 'data-sub':sub, 'style':'cursor: pointer;'}
            node        = ET.SubElement(def_el, 'g', attrib=node_attr)
            title       = ET.SubElement(node, 'title')
            title.text  = f"{self.cat.get(n.find('svg:title',ns).text).get('i-name')}"
            ellip_class = f"i-{self.cat.get(n.find('svg:title',ns).text).get('i-class')}"            
            ellip_attr  = {'class':ellip_class, 'cx':n.find('svg:ellipse',ns).get('cx'), 'cy':n.find('svg:ellipse',ns).get('cy'), 'rx':n.find('svg:ellipse',ns).get('rx'), 'ry':n.find('svg:ellipse',ns).get('ry')}
            ellipse     = ET.SubElement(node, 'ellipse', attrib=ellip_attr)
            text_attr   = {'x':n.find('svg:text',ns).get('x'), 'y':n.find('svg:text',ns).get('y'), 'style':f"text-anchor: middle; font-family: Arial; font-size: {font_size}px;"}
            text        = ET.SubElement(node, 'text', attrib=text_attr)
            text.text   = n.find('svg:text',ns).text 
        
        graph0_attr = {'transform':transform, 'id':'graph0'}
        graph0 = ET.SubElement(svg_out,'g', attrib=graph0_attr)
        use_attr = {'href':'#A_nodes'}
        ET.SubElement(graph0,'use', use_attr)
        use_attr = {'href':'#B_nodes'}
        ET.SubElement(graph0,'use', use_attr)
        use_attr = {'href':'#AB_nodes'}
        ET.SubElement(graph0,'use', use_attr)
        
        tree = ET.ElementTree(preview)
        ET.indent(tree)
        tree.write(self.working_dir + 'preview.html')
        
        return

#### Instantiate RecipeCollection class

In [141]:
HD_YO = RecipeCollection('C:/Users/nlutt/Documents/Websites/graphLab/',
                         'currentDescriptor compareHD-YO.xml',
                         'C:/Users/nlutt/myPyPro/second/data/igt_cat.json',
                         'C:/Users/nlutt/myPyPro/second/data/')
print (HD_YO)

print(HD_YO.infoSubcolls())
xx = len(HD_YO.ingredients_list('A'))
#print ('Anz Zutaten Davidis: ', xx)
#print(HD_YO.catalog_list(['aal','champignon']))
G = HD_YO.toGraph(['A','B'])
print ('G', G)
print ('G attributes: ', G.graph)
#print (G.nodes(data=True))
HD_YO.toDot(G, 'C:/Users/nlutt/myPyPro/second/data/', 'test2dot.dot')

#xx = [(e1,e2) for (e1,e2,attr) in G.edges(data=True) if attr.get('sub') == 'A']
#A_n = nx.edge_subgraph(G,xx)
#intersect = ['A','B']

A_n = HD_YO.nodeSets(G,'A')
print (f"nodeSet A: {len(A_n)} nodes")
B_n = HD_YO.nodeSets(G,'B')
print (f"nodeSet B: {B_n} nodes")
AB_n = HD_YO.nodeSets(G, ['B','A'])
print (f"nodeSet AB: {len(AB_n)} nodes")

#print ('Anz Knoten G', len(G.nodes))

edgeSet_A_pure = HD_YO.edgeSets(G,'A')
#print (f"edgeSet A pure: {len(edgeSet_A_pure)} edges")
edgeSet_B_pure = HD_YO.edgeSets(G,'B')
#print (f"edgeSet B pure: {len(edgeSet_B_pure)} edges")
#print (len(edge_sets.get('A_e_pure')),len(edge_sets.get('A_e_mixed')),len(edge_sets.get('B_e_pure')),len(edge_sets.get('B_e_mixed')),len(edge_sets.get('AB_e_intersect')))

#HD_YO.toDot (G,'C:/Users/nlutt/Documents/Websites/graphLab/sampleSpaces/compareHD-YO/graphs/','xxx.dot')
#HD_YO.toCSV (G,'C:/Users/nlutt/Documents/Websites/graphLab/sampleSpaces/compareHD-YO/graphs/','xxx.csv')

#HD_YO.previewSVG(G,1.0)

Collection with 187 recipes in 2 subcollections with 281 distinct ingredients
supported by an ingredients catalog with 717 entries

subcollection A -- name: HD-Gemüse, author: Henriette Davidis, 95 recipes
subcollection B -- name: YO-Gemüse, author: Yotam Ottolenghi, 92 recipes

GG nodes 281
B ingredients {'ziegenfrischkäse', 'schnittlauch', 'petersilie', 'sonnenblumenöl', 'granatapfelsirup', 'koriander', 'sojasauce', 'minze', 'salbei', 'hibiskus', 'ingwer', 'mangopickle', 'gurke', 'kokoscrème', 'szechuanpfeffer', 'jalapeno', 'chipotle', 'steinpilz', 'piment', 'zucchini', 'knollensellerie', 'spargel', 'weiße_bohnen', 'cumin', 'hühnerbrühe', 'olivenöl', 'crownprincekürbis', 'pfirsich', 'pecorino', 'curcuma', 'ancho', 'grüner_spargel', 'tofu', 'kardamom', 'radieschen', 'cayennepfeffer', 'kimchi', 'chilipaste', 'sahne', 'mangold', 'babyspinat', 'rote_bete', 'fischsauce', 'ahornsirup', 'hartweizengrieß', 'graupen', 'dijonsenf', 'süßkartoffel', 'stärke', 'shiitake', 'kümmel', 'oregano', 'mi