<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [3]:
def CMAP2DF(cmapID,baseURL = 'https://cmapscloud.ihmc.us/resources/rid='):
    """Reads a CMAP on a remote server and puts its contents into data frames"""
    cxlDoc=getCMAP(cmapID, baseURL)
    #print (cxlDoc)
    parseCMAP(cxlDoc)

def getCMAP(cmapID='1W30W801X-V1NSJ8-3R4',baseURL = 'https://cmapscloud.ihmc.us/resources/rid=', email='ic3foods@gmail.com'):
    """
    Given a CMAP ID and possibly a Server URL, read a CMAP from a CMAP server, the default server being the CMAPs cloud server
    Note we are using this webservice: https://cmap.ihmc.us/xml/Cmapserver-HTTP-API.pdf
    """
    import base64, sys, requests
    import getpass
    from urllib import request
    from urllib.request import urlopen
    from urllib.error import URLError
    #For CMaps GET commands: get.resmeta, get.cmap
    getCMAPmetadata = '/?cmd=get.resmeta' 
    getCMAP = '/?cmd=get.cmap' 
    getPermissions = '/?cmd=get.permissions' 
    url = baseURL + cmapID + getCMAP
    pwd = getpass.getpass(prompt='Password: ', stream=None) 
    requests = requests.get(url, auth=(email, pwd), allow_redirects=True, verify=True)
    cxlDoc = requests.text
    return(cxlDoc)

def parseCMAP(cxlDoc=getCMAP()):
    """
    Given a cmap cxl (xml) document, parse it and shove it into a data frame.
    We'll start with the resource metadata (ResMeta), followed by:
    concepts, link phrases, connections, their appearances, and any associated resources
    """
    import xmltodict
    import pandas as pd
    ###################################
    def dicKeyRenamer (oldDic, dicRenamer):
        """
        A dic key renaming function we'll use for renaming variables from oldDic 
        with a dicRenamer containing oldnames:newnames. Putting the renamed k,v pairs
        in a newDic
        """
        newDic = {}
        for k,v in oldDic.items():
            for k2,v2 in dicRenamer.items():
                if k==k2:
                    newDic[v2] = v
        return(newDic)
    ###################################
    def countDicItems (d):
        """
        takes a (sub) dictionary and returns the number of items (lists or dics)
        """
        count = 0
        for x in enumerate(d.items()): 
            if isinstance(x[1][1], list): 
                count += len(x[1][1]) 
        return (count)
    ###################################
    def subDic2DF (rootDicName, rootDicVal, dicLoc, nodParent, nodFamily, dicColRenames={}):
        """
        takes a parent (sub) dicionary (node) and makes the homogeneous child nodes into a dataframe 
        with ability to rename columns if dicColRenames provided in form of {dicName:colName}
        nodeText and 
        """
        if (isinstance(nodParent, int)) or \
            (nodParent is None) or \
            (nodFamily is None) or \
            (nodParent in dicLoc): 
            if nodParent is None:
                print("nodParent is None!")
                dicParent = dicLoc
            else:
                dicParent = dicLoc[nodParent]
                #print(dicParent)
                nodeCount  = countDicItems(dicParent)
                #print('nodeCount:' + str(nodeCount))
                keyCount = len(dicParent)
            #print('keyCount:' + str(keyCount))
            if nodFamily is None: #make a df for the keys at this level
                print("nodFamily is None!")
                keyCount = len(dicParent)
                if keyCount>0:
                    print('')
                df = pd.DataFrame([dicParent])
            else:
                if nodeCount>1:
                    df = pd.DataFrame(dicParent[nodFamily]) # turn dic into dataframe
                else:
                    df = pd.DataFrame([dicParent[nodFamily]] )#accomodate singleton as a non-scalar with extra '[]'
                df.rename(columns= dicColRenames,inplace=True)    #get rid of the dict names and make more SQL friendly
            for column in df: #rename columns to remove/replace special characters
                ampNum = str(column).find('@')  #eventually update this to find all special characters that may be in a dict name
                if ampNum >-1:
                    newCol = str(column)[ampNum+1:]
                    df.rename(columns= {column:newCol},inplace=True)
                newCol = str(column).replace(':','-')
                df.rename(columns= {column:newCol},inplace=True)
            df.insert(loc=0,column=rootDicName, value=rootDicVal) # add rootID as a column   
            if  dicLoc != dicMapResPeople and nodFamily != 'resource':
                if nodFamily is not None:
                    display(nodFamily + 's')
                    display(df)
            return(df)
    #############################################
    #First transform cxl into dic
    dicDoc = xmltodict.parse (cxlDoc) 
    #set-up the mapID and associated variables
    print(cxlDoc)
    mapID = dicDoc['cmap']['res-meta']['dc:source'].split(':')[-1]
    rootDicName = 'mapID' #we'll use rootDicName as we parse the dics
    rootDicVal = mapID    #we'll use rootDicVal as we parse the dics
    dicLoc = None
    nodParent = None
    nodFamily = None
    dicColRenames = {}
    tupDic2DF = (rootDicName, rootDicVal, dicLoc, nodParent, nodFamily,dicColRenames )
    #############################################
    #Make a df of the map (resource) metadata
    #first make a list people and orgs to pull out their sub-dics 
    listResMetaPeopleOrgs = ['dc:creator', 'dc:contributor', 'dcterms:rightsHolder'] 
    #create new dics with & without people/orgs 
    dicMapResMeta = {key:val for key, val in dict(dicDoc['cmap']['res-meta']).items() if key not in listResMetaPeopleOrgs}
    dicMapResPeople = {key:val for key, val in dict(dicDoc['cmap']['res-meta']).items() if key in listResMetaPeopleOrgs}

    #Now we'll make a DF from the resmeta dic sans people/org references
    dicLoc = dicMapResMeta
    tupDic2DF = (rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    dfMapResMeta = subDic2DF(*tupDic2DF)
    #############################################
    #make a df of all the people sans metadata
    dicLoc = dicMapResPeople
    dfMapResPeople = None
    for item in listResMetaPeopleOrgs:
        nodParent = item
        tupDic2DF = (rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
        if dfMapResPeople is not None:
            dfMapResPeople = dfMapResPeople.append(subDic2DF(*tupDic2DF))
            dfMapResPeople['role'].iat[-1] =item.split(':')[-1]
        else:
            dfMapResPeople = subDic2DF(*tupDic2DF)
            dfMapResPeople.insert(loc=1, column='role', value=item.split(':')[-1])
        if 'vcard-ORG' in dfMapResPeople.columns:
            dfMapResPeople['vcard-ORG'].iat[-1] = str(dfMapResPeople['vcard-ORG'].iat[-1]).split(", '",1)[-1][:-4] 
    display(dfMapResPeople)
    ###################################
    #Variables for Map-level dict extraction
    dicLoc = dicDoc['cmap']['map'] 
    #############################################
    #Get the Concepts
    nodParent = 'concept-list'
    nodFamily = 'concept'
    dfConcepts = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    numConcepts = len(dfConcepts.index)
    #############################################
    #Get the Concept-appearances
    nodParent = 'concept-appearance-list'
    nodFamily = 'concept-appearance'
    dfConceptsAprncss = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the Linking Phrases 
    nodParent = 'linking-phrase-list'
    nodFamily = 'linking-phrase'
    dfLinkingPhrases = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the linking-phrase-appearances
    nodParent = 'linking-phrase-appearance-list'
    nodFamily = 'linking-phrase-appearance'
    dfLinkingPhraseAprncs = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the connections
    nodParent = 'connection-list'
    nodFamily = 'connection'
    dfCnxns = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the connections-appearances
    nodParent = 'connection-appearance-list'
    nodFamily = 'connection-appearance'
    dfCnxnAprncs = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the resource-groups
    nodParent = 'resource-group-list'
    nodFamily = 'resource-group'
    dfResGrps = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    dfResGrps['resource']
    #############################################
    #Get the resources
    dicLoc = dicDoc['cmap']['map']['resource-group-list']['resource-group']
    rgCount = len(dfResGrps.index)
    rgCounter = 0 #the counter for iterating through each of the ResourceGroups
    dfResources = None
    while rgCounter < rgCount:
        nodParent = rgCounter
        nodFamily = 'resource'
        if dfResources is not None:
            dfResources = dfResources.append(subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily))
            dfResources['parent-id'].iat[-1] = dicLoc[rgCounter].get("@parent-id")
        else:
            dfResources = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
            dfResources.insert(loc=1, column='parent-id', value=dicLoc[rgCounter].get("@parent-id"))
        rgCounter +=1
    display(dfResources)
    print('DONE!!')
    #############################################
    #Get the resource-appearances
    dicLoc = dicDoc['cmap']['map']
    nodParent = 'resource-appearance-list'
    nodFamily = 'resource-appearance'
    dfResAprncs = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the stylesheets
    nodParent = 'style-sheet-list'
    nodFamily = 'style-sheet'
    dfStyleSheets = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    #############################################
    #Get the styles within stylesheets
    listStyles = ['map-style', 'concept-style', 'linking-phrase-style', 'connection-style', 'resource-style']
    dicLoc = dicDoc['cmap']['map']['style-sheet-list']['style-sheet']
    dfStyles = None
    rgCount = len(dfStyleSheets.index)
    rgCounter = 0 #the counter for iterating through each of the style-sheets
    while rgCounter < rgCount:
        nodParent = rgCounter
        for item in listStyles:
            nodFamily = item
            if item in (dicLoc[nodParent]):
                if dfStyles is not None:
                    dfStyles = dfStyles.append(subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily))
                    dfStyles['style-sheet-id'].iat[-1] = dicLoc[rgCounter].get("@id")
                    dfStyles['style-type'].iat[-1] = str(item)
                else:
                    dfStyles = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
                    dfStyles.insert(loc=1, column='style-sheet-id', value = dicLoc[rgCounter].get("@id"))
                    dfStyles.insert(loc=1, column='style-type', value = str(item))
            display(dfStyles)
        rgCounter +=1
    
    #############################################    #Get the extra-properties
    dicLoc = dicDoc['cmap']['map']
    nodParent = 'extra-properties-list'
    nodFamily = 'properties-list'
    dfExtraProperties = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    display(dfExtraProperties)
    #############################################
    #Get the images
    nodParent = 'image-list'
    nodFamily = 'image'
    dfExtraProperties = subDic2DF(rootDicName, rootDicVal, dicLoc, nodParent, nodFamily)
    display(dfExtraProperties)
    #############################################

Password: ········


In [4]:
CMAP2DF('1W46SJ7J7-CZ2M5Y-3MW')#'1W3PQRS9F-JF73RT-C6K')#,'https://cmapspublic3.ihmc.us:443/resources/rid=')#('1W0WVLFBD-TS6DJK-DSG')#('1W30W801X-V1NSJ8-3R4') #('1SZD25T3-FT7FSN-FZ')#

Password: ········
<?xml version="1.0" encoding="UTF-8"?>
<cmap xmlns:dcterms="http://purl.org/dc/terms/" xmlns="http://cmap.ihmc.us/xml/cmap/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:vcard="http://www.w3.org/2001/vcard-rdf/3.0#">
    <res-meta>
        <dc:title>Knows-Bodies-Domains</dc:title>
        <dc:creator>
            <vcard:FN>Matthew Lange</vcard:FN>
            <vcard:EMAIL>mclange@ucdavis.edu</vcard:EMAIL>
            <vcard:ORG>
                <vcard:Orgname>IC-FOODS at UC Davis</vcard:Orgname>
            </vcard:ORG>
        </dc:creator>
        <dc:contributor>
            <vcard:FN>Matthew Lange</vcard:FN>
            <vcard:EMAIL>matthew@ic-foods.org</vcard:EMAIL>
            <vcard:ORG>
                <vcard:Orgname>IC-FOODS at UC Davis</vcard:Orgname>
            </vcard:ORG>
        </dc:contributor>
        <dcterms:rightsHolder>
            <vcard:FN>Matthew Lange</vcard:FN>
            <vcard:EMAIL>mclange@ucdavis.edu</vcard:EMAIL>
            <vca

Unnamed: 0,mapID,role,vcard-FN,vcard-EMAIL,vcard-ORG
0,1W46SJ7J7-CZ2M5Y-3MW,creator,Matthew Lange,mclange@ucdavis.edu,IC-FOODS at UC Davis
0,1W46SJ7J7-CZ2M5Y-3MW,contributor,Matthew Lange,matthew@ic-foods.org,IC-FOODS at UC Davis
0,1W46SJ7J7-CZ2M5Y-3MW,rightsHolder,Matthew Lange,mclange@ucdavis.edu,IC-FOODS at UC Davis


'concepts'

Unnamed: 0,mapID,id,label
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM41Y-19FLRVD-4TG,KB
1,1W46SJ7J7-CZ2M5Y-3MW,1W46T9398-1T8V5D2-4JS,KnowledgeBody\nCommunity\n(knowBodCom)
2,1W46SJ7J7-CZ2M5Y-3MW,1W46SJD95-15J5WXD-3PL,KnowBrow-enabled KMs
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RS-7C3YGF-4PW,ratiings
4,1W46SJ7J7-CZ2M5Y-3MW,1W46T5L1L-3PSW7L-4GG,????
5,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C70-1X22SGM-544,????
6,1W46SJ7J7-CZ2M5Y-3MW,1W46SNVXT-H4WMNL-3WB,KnowBrow\nfeatures
7,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6Q-17KYF41-47T,community\nknowledge\ndevelopment
8,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RM6-C40QRR-4L6,KnowledgeDomain\nCommunity\n(knowDomCom)
9,1W46SJ7J7-CZ2M5Y-3MW,1W46SSPX6-1RXLRVV-41H,KnowBrow-enabled KM componentss\n(e.g. proposi...


'concept-appearances'

Unnamed: 0,mapID,id,x,y,width,height,stylesheet-id
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM41Y-19FLRVD-4TG,444,647,38,31,
1,1W46SJ7J7-CZ2M5Y-3MW,1W46T9398-1T8V5D2-4JS,180,776,109,61,
2,1W46SJ7J7-CZ2M5Y-3MW,1W46SJD95-15J5WXD-3PL,482,80,153,31,
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RS-7C3YGF-4PW,465,836,64,33,
4,1W46SJ7J7-CZ2M5Y-3MW,1W46T5L1L-3PSW7L-4GG,439,495,49,31,
5,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C70-1X22SGM-544,830,906,49,31,
6,1W46SJ7J7-CZ2M5Y-3MW,1W46SNVXT-H4WMNL-3WB,445,189,79,45,
7,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6Q-17KYF41-47T,456,445,91,61,
8,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RM6-C40QRR-4L6,485,778,123,61,
9,1W46SJ7J7-CZ2M5Y-3MW,1W46SSPX6-1RXLRVV-41H,517,120,222,47,1W46SSPX6-QLPJ0W-41L


'linking-phrases'

Unnamed: 0,mapID,id,label
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM421-27G07F9-4TK,registers\nservices
1,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RM9-1DSZ9JR-4L9,part of\none or more
2,1W46SJ7J7-CZ2M5Y-3MW,1W46TW65F-4XJML1-50Q,may\nhave\nchild
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RV-24NTRMW-4PZ,sets
4,1W46SJ7J7-CZ2M5Y-3MW,1W46SM0YF-1YJ6WN3-3T3,unlock
5,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6T-18WP7K1-47X,are\nearned\nby\nengaging\nin
6,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C73-ML0GZ5-547,????


'linking-phrase-appearances'

Unnamed: 0,mapID,id,x,y,width,height
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM421-27G07F9-4TK,320,696,48,25
1,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RM9-1DSZ9JR-4L9,318,776,67,25
2,1W46SJ7J7-CZ2M5Y-3MW,1W46TW65F-4XJML1-50Q,332,895,28,36
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RV-24NTRMW-4PZ,327,834,24,11
4,1W46SJ7J7-CZ2M5Y-3MW,1W46SM0YF-1YJ6WN3-3T3,306,232,37,11
5,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6T-18WP7K1-47X,308,474,51,64
6,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C73-ML0GZ5-547,664,903,29,11


'connections'

Unnamed: 0,mapID,id,from-id,to-id
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM422-1MHMDCR-4TT,1W46TM421-27G07F9-4TK,1W46TM41Y-19FLRVD-4TG
1,1W46SJ7J7-CZ2M5Y-3MW,1W46SRMLQ-19B3W1F-40S,1W46SM0YF-1YJ6WN3-3T3,1W46SSPX6-1RXLRVV-41H
2,1W46SJ7J7-CZ2M5Y-3MW,1W46TW65G-19TF567-50Z,1W46TW65F-4XJML1-50Q,1W46TR9DT-LKLSKC-4X1
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RW-V4LN7R-4Q2,1W46T9398-1T8V5D2-4JS,1W46TH8RV-24NTRMW-4PZ
4,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RMB-1V5BK3-4LL,1W46T9RM9-1DSZ9JR-4L9,1W46T9RM6-C40QRR-4L6
5,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C74-1TQXY7D-54H,1W46V1C73-ML0GZ5-547,1W46V1C70-1X22SGM-544
6,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6V-1STCL5N-485,1W46SZN6T-18WP7K1-47X,1W46SZN6Q-17KYF41-47T
7,1W46SJ7J7-CZ2M5Y-3MW,1W46SNVXW-1800S1Z-3WF,1W46SM0YF-1YJ6WN3-3T3,1W46SNVXT-H4WMNL-3WB
8,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C73-1XGWZHV-54B,1W46TR9DT-LKLSKC-4X1,1W46V1C73-ML0GZ5-547
9,1W46SJ7J7-CZ2M5Y-3MW,1W46T6SJY-18ZW3K2-4H7,1W46SM0YF-1YJ6WN3-3T3,1W46T6SJV-25PGHM3-4H4


'connection-appearances'

Unnamed: 0,mapID,id,from-pos,to-pos
0,1W46SJ7J7-CZ2M5Y-3MW,1W46TM422-1MHMDCR-4TT,right,left
1,1W46SJ7J7-CZ2M5Y-3MW,1W46SRMLQ-19B3W1F-40S,right,left
2,1W46SJ7J7-CZ2M5Y-3MW,1W46TW65G-19TF567-50Z,right,left
3,1W46SJ7J7-CZ2M5Y-3MW,1W46TH8RW-V4LN7R-4Q2,right,left
4,1W46SJ7J7-CZ2M5Y-3MW,1W46T9RMB-1V5BK3-4LL,right,left
5,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C74-1TQXY7D-54H,right,left
6,1W46SJ7J7-CZ2M5Y-3MW,1W46SZN6V-1STCL5N-485,right,left
7,1W46SJ7J7-CZ2M5Y-3MW,1W46SNVXW-1800S1Z-3WF,right,left
8,1W46SJ7J7-CZ2M5Y-3MW,1W46V1C73-1XGWZHV-54B,right,left
9,1W46SJ7J7-CZ2M5Y-3MW,1W46T6SJY-18ZW3K2-4H7,right,left


TypeError: 'NoneType' object is not subscriptable

In [29]:
display(df)

NameError: name 'df' is not defined

In [None]:
b = 2
c=3
d=4
a =[b,c,d]
display(a)
d = 5
display (a)