In [315]:
import csv
import requests
import xml.etree.ElementTree as ET 
import pandas as pd 

In [316]:
tree = ET.parse('ioofservicT1-Metadata.xml')

In [317]:
root = tree.getroot()

### Get all table fields info

In [334]:
fields = []

In [335]:
for index, child in enumerate(root[0][1]):
    for item in child:
        if 'EntityType' in child.tag:
            field = item.attrib
            field['Table'] = child.attrib['Name']
            fields.append(field)
        else:
            continue

In [336]:
df_fields = pd.DataFrame.from_records(fields)

### Get all table level info

In [337]:
tables = []

In [338]:
for index, child in enumerate(root[0][0][0]):
    if 'EntitySet' in child.tag:
        table = child.attrib
        for grand_child in child:
            try:
                table['Description'] = grand_child[1].text
            except:
                table['Description'] = ''
        tables.append(table)

In [339]:
df_tables = pd.DataFrame.from_records(tables)

In [340]:
df_tables.shape

(620, 8)

### Extract relationship

In [341]:
associations = []

In [342]:
for index, child in enumerate(root[0][0][0]):
    # print(child.tag)
    for item in child:
        if 'AssociationSet' in child.tag:
            association = child.attrib
            association['ToTable'] = child[1].attrib['EntitySet']
        else:
            continue
    associations.append(association)

In [343]:
df_associations = pd.DataFrame.from_records(associations).drop_duplicates()

In [344]:
df_fields.drop_duplicates()

Unnamed: 0,Table,Name,Type,Nullable,{http://www.successfactors.com/edm/sap}required,{http://www.successfactors.com/edm/sap}creatable,{http://www.successfactors.com/edm/sap}updatable,{http://www.successfactors.com/edm/sap}upsertable,{http://www.successfactors.com/edm/sap}visible,{http://www.successfactors.com/edm/sap}sortable,...,FromRole,ToRole,{http://www.successfactors.com/edm/sap}field-control,{http://www.successfactors.com/edm/sap}picklist,{http://www.successfactors.com/edm/sap}inlineRequired,Precision,Scale,DefaultValue,{http://www.successfactors.com/edm/sap}sensitive-personal-data,{http://www.successfactors.com/edm/sap}elm-strength
0,Entity,,,,,,,,,,...,,,,,,,,,,
1,Entity,deletable,Edm.Boolean,true,false,false,false,false,true,false,...,,,,,,,,,,
2,Entity,deletablePath,Edm.String,true,false,false,false,false,true,false,...,,,,,,,,,,
3,Entity,effectiveDated,Edm.Boolean,true,false,false,false,false,true,false,...,,,,,,,,,,
4,Entity,insertable,Edm.Boolean,true,false,false,false,false,true,false,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13195,NominationTarget,nominationId,Edm.Int64,false,true,false,false,false,true,true,...,,,,,,,,,,
13196,NominationTarget,nominationType,Edm.Int32,false,true,false,false,false,true,true,...,,,,,,,,,,
13197,NominationTarget,positionNav,,,false,false,false,false,true,true,...,NominationTarget,Position,,,,,,,,
13198,NominationTarget,successorNav,,,false,false,false,false,true,true,...,NominationTarget,Successor,,,,,,,,


In [345]:
df_tables = df_tables.loc[df_tables['Name']!='Entity']

### Post extract processing

##### 1. df_fields table

In [346]:
df_fields['Type'] = df_fields['Type'].str.replace('Edm.', '')

In [347]:
cols = df_fields.columns.to_list()

In [348]:
for idx, col in enumerate(cols):
    if '{http://www.successfactors.com/edm/sap}' in col:
        col = col.replace('{http://www.successfactors.com/edm/sap}', '')
        cols[idx] = col
    else:
        continue

In [349]:
df_fields.columns = cols

In [350]:
df_fields.rename(columns={'Name': 'Column'}, inplace=True)

In [351]:
df_fields = df_fields.loc[df_fields['Table']!='Entity']
df_fields = df_fields.loc[df_fields['Column'].notnull()]
df_fields.shape

(12564, 26)

In [290]:
df_fields.head(2)

Unnamed: 0,Table,Column,Type,Nullable,required,creatable,updatable,upsertable,visible,sortable,...,FromRole,ToRole,field-control,picklist,inlineRequired,Precision,Scale,DefaultValue,sensitive-personal-data,elm-strength
17,PaymentInformationDetailV3MWI,,,,,,,,,,...,,,,,,,,,,
18,PaymentInformationDetailV3MWI,PaymentInformationDetailV3_externalCode,Int64,False,True,True,True,True,True,True,...,,,,,,,,,,


##### 2. df_tables

In [352]:
df_tables.columns

Index(['Name', 'EntityType', '{http://www.successfactors.com/edm/sap}label',
       '{http://www.successfactors.com/edm/sap}creatable',
       '{http://www.successfactors.com/edm/sap}updatable',
       '{http://www.successfactors.com/edm/sap}upsertable',
       '{http://www.successfactors.com/edm/sap}deletable', 'Description'],
      dtype='object')

In [353]:
cols = df_tables.columns.to_list()

In [354]:
for idx, col in enumerate(cols):
    if '{http://www.successfactors.com/edm/sap}' in col:
        col = col.replace('{http://www.successfactors.com/edm/sap}', '')
        cols[idx] = col
    else:
        continue

In [355]:
df_tables.columns = cols

In [356]:
cols

['Name',
 'EntityType',
 'label',
 'creatable',
 'updatable',
 'upsertable',
 'deletable',
 'Description']

In [357]:
df_tables['MetaType'] = 'Table'

In [359]:
df_tables = df_tables[['Name', 'EntityType', 'MetaType', 'Description']]

##### 3. df_associations

In [360]:
cols = df_associations.columns.to_list()

In [361]:
for idx, col in enumerate(cols):
    if '{http://www.successfactors.com/edm/sap}' in col:
        col = col.replace('{http://www.successfactors.com/edm/sap}', '')
        cols[idx] = col
    else:
        continue

In [362]:
df_associations.columns = cols

In [363]:
df_associations

Unnamed: 0,Name,Association,ToTable
0,mdfSystemRecordStatusNav_of_EmpCostDistribution,SFOData.mdfSystemRecordStatusNav_of_EmpCostDis...,MDFEnumValue
620,mdfSystemStatusNav_of_TimeAccountDetail,SFOData.mdfSystemStatusNav_of_TimeAccountDetail,MDFEnumValue
621,Background_Languages_variant_picklist,SFOData.Background_Languages_variant_picklist,PicklistOption
622,periodModelNav_of_WorkSchedule,SFOData.periodModelNav_of_WorkSchedule,WorkSchedule
623,legalEntityNav_of_PayrollDataMaintenanceTask,SFOData.legalEntityNav_of_PayrollDataMaintenan...,FOCompany
...,...,...,...
4110,PersonKey_UserAccount,SFOData.PersonKey_UserAccount,UserAccount
4111,legalEntityTypeNav_of_FOLegalEntityLocalUSA,SFOData.legalEntityTypeNav_of_FOLegalEntityLoc...,PickListValueV2
4112,mdfSystemStatusNav_of_EmployeeTimePOL,SFOData.mdfSystemStatusNav_of_EmployeeTimePOL,MDFEnumValue
4113,mdfSystemStatusNav_of_ShiftClassification,SFOData.mdfSystemStatusNav_of_ShiftClassification,MDFEnumValue


### Merge tables

In [364]:
df_fields.tail(2)

Unnamed: 0,Table,Column,Type,Nullable,required,creatable,updatable,upsertable,visible,sortable,...,FromRole,ToRole,field-control,picklist,inlineRequired,Precision,Scale,DefaultValue,sensitive-personal-data,elm-strength
13198,NominationTarget,successorNav,,,False,False,False,False,True,True,...,NominationTarget,Successor,,,,,,,,
13199,NominationTarget,talentPoolNav,,,False,False,False,False,True,True,...,NominationTarget,TalentPool,,,,,,,,


In [365]:
df_tables.head(2)

Unnamed: 0,Name,EntityType,MetaType,Description
1,PaymentInformationDetailV3MWI,SFOData.PaymentInformationDetailV3MWI,Table,Child object of PaymentInformationV3. Stores t...
2,TimeTypeCAN,SFOData.TimeTypeCAN,Table,Contains the time type canada definition.


In [366]:
df = pd.merge(df_fields, df_tables, left_on='Table', right_on='Name', how='left')

In [367]:
df_associations.head(2)

Unnamed: 0,Name,Association,ToTable
0,mdfSystemRecordStatusNav_of_EmpCostDistribution,SFOData.mdfSystemRecordStatusNav_of_EmpCostDis...,MDFEnumValue
620,mdfSystemStatusNav_of_TimeAccountDetail,SFOData.mdfSystemStatusNav_of_TimeAccountDetail,MDFEnumValue


In [368]:
df_associations = df_associations[['Association', 'ToTable']]

In [369]:
df = pd.merge(df, df_associations, left_on='Relationship', right_on='Association', how='left')

In [370]:
df.shape

(12564, 32)

In [371]:
df.drop(columns=['Name', 'EntityType'], inplace=True)

In [372]:
df.to_excel('sf_schema.xlsx', index=False)

In [373]:
df

Unnamed: 0,Table,Column,Type,Nullable,required,creatable,updatable,upsertable,visible,sortable,...,inlineRequired,Precision,Scale,DefaultValue,sensitive-personal-data,elm-strength,MetaType,Description,Association,ToTable
0,PaymentInformationDetailV3MWI,PaymentInformationDetailV3_externalCode,Int64,false,true,true,true,true,true,true,...,,,,,,,Table,Child object of PaymentInformationV3. Stores t...,,
1,PaymentInformationDetailV3MWI,PaymentInformationV3_effectiveStartDate,DateTime,false,true,true,true,true,true,true,...,,,,,,,Table,Child object of PaymentInformationV3. Stores t...,,
2,PaymentInformationDetailV3MWI,PaymentInformationV3_worker,String,false,true,true,true,true,true,true,...,,,,,,,Table,Child object of PaymentInformationV3. Stores t...,,
3,PaymentInformationDetailV3MWI,accountType,String,true,false,true,true,true,true,true,...,,,,,,,Table,Child object of PaymentInformationV3. Stores t...,,
4,PaymentInformationDetailV3MWI,createdBy,String,true,false,false,false,false,true,true,...,,,,,,,Table,Child object of PaymentInformationV3. Stores t...,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12559,NominationTarget,nominationId,Int64,false,true,false,false,false,true,true,...,,,,,,,Table,Contains the nomination information,,
12560,NominationTarget,nominationType,Int32,false,true,false,false,false,true,true,...,,,,,,,Table,Contains the nomination information,,
12561,NominationTarget,positionNav,,,false,false,false,false,true,true,...,,,,,,,Table,Contains the nomination information,SFOData.NominationTarget_ASSO_Position,Position
12562,NominationTarget,successorNav,,,false,false,false,false,true,true,...,,,,,,,Table,Contains the nomination information,SFOData.NominationTarget_ASSO_Successor,Successor
