<a href="https://colab.research.google.com/github/pradh/api-python/blob/svg/notebooks/Topic_Validator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
# @title Setup
# Import MCF parser from github
import requests
import pandas as pd
from IPython.display import display, HTML


PARSER_URL = 'https://raw.githubusercontent.com/datacommonsorg/tools/master/kg_util/mcf_parser.py'
VAR_INFO_API = 'https://api.datacommons.org/v1/bulk/info/variable?key=AIzaSyCTI4Xz-UW_G2Q2RfknhcfdAnTHq5X5XuI&'
HEADERS = {'Accept': 'application/json'}

# make sure your filename is the same as how you want to import
with open('mcf_parser.py', 'w') as fw:
  fw.write(requests.get(PARSER_URL).text)
import mcf_parser as mcflib

pd.options.display.max_rows = 100000

In [51]:
TOPIC_MCFS = [
    'https://gist.githubusercontent.com/lucy-kind/64326ebc0c39466c90734b0c3389a27d/raw/0f9b2e3adb221b5b387989bd8daef0f481af93f0/RacialPopulationByAge.mcf',
    'https://gist.githubusercontent.com/lucy-kind/a72214243a4c596e2233c9d26b1dd56e/raw/53e8596a22cd792944627a0f020ad3a68bad0936/RacialGenderPopulationByAge.mcf',
    'https://gist.githubusercontent.com/lucy-kind/0ee0cbfcc1c9b5e84c70e52541d20d89/raw/28aa1ff28e9668ffd529fea7c39208a653c21f52/MedicalConditionByAge.mcf',
    'https://gist.githubusercontent.com/lucy-kind/c1d1d91d8b6f2a6d7da05a998b3d2c96/raw/fb1ab69d91b1d3e0aae5d1683bfdd92a86e1e37b/kind_topics.mcf',
    'https://gist.githubusercontent.com/lucy-kind/10f40872d6aaa092afbc2d6aeb570874/raw/02b1a3fdd5e8b0cd7e82ff50dcede5420e2470c6/topics101-200.mcf',
]

In [52]:
# @title Implementation

def process_svpg(svpg, node, lstats):
  if 'name' not in node:
    lstats['ERR_MissingName'].append(svpg)
  if 'member' not in node:
    lstats['ERR_MissingMember'].append(svpg)
    return
  process_svs(node['member'], lstats, is_svpg=True)


def process_svs(svs, lstats, is_svpg=False):
  if not svs:
    return
  url = VAR_INFO_API + '&'.join(['nodes=' + sv for sv in svs])
  res = requests.get(url, headers=HEADERS).json()
  for sv_info in res.get('data', []):
    sv = sv_info['node']
    if 'info' not in sv_info:
      lstats['ERR_MissingSV'].append(sv)
    c = 0
    for s in sv_info.get('info', {}).get('placeTypeSummary', {}).values():
      c += s['placeCount']
    if c:
      if not lstats['NUM_Places_Min']:
        lstats['NUM_Places_Min'] = c
      else:
        lstats['NUM_Places_Min'] = min(lstats['NUM_Places_Min'], c)
      lstats['NUM_Places_Max'] = max(lstats['NUM_Places_Max'], c)
      lstats['NUM_Places_Tot'] += c
    lstats['NUM_SVs'] += 1

  # TODO: Handle is_svpg


def load_mcf(url):
  mcf = {}
  fname = url.split('/')[-1]
  with open(fname, 'w') as f:
    f.write(requests.get(url).text)
  with open(fname, 'r') as f:
    for (s, p, o, _) in mcflib.mcf_to_triples(f):
      s = s.replace('dcid:', '')
      if s not in mcf:
        mcf[s] = {}
      if p not in mcf[s]:
        mcf[s][p] = []
      mcf[s][p].append(o)
  return mcf


def process(mcf):
  stats = {
    'Topic': [],
    'ERR_MissingName': [],
    'ERR_MissingSV': [],
    'ERR_MissingMember': [],
    'NUM_SVs': [],
    'NUM_Places_Min': [],
    'NUM_Places_Max': [],
    'NUM_Places_Tot': [],
  }
  for dcid, node in mcf.items():
    t = node['typeOf'][0]
    if t != 'Topic':
      continue
    lstats = {
      'Topic': dcid,
      'ERR_MissingName': [],
      'ERR_MissingSV': [],
      'ERR_MissingMember': [],
      'NUM_SVs': 0,
      'NUM_Places_Min': 0,
      'NUM_Places_Max': 0,
      'NUM_Places_Tot': 0,
    }
    if 'name' not in node:
      lstats['ERR_MissingName'].append(t)
    svs = []
    for v in node['relevantVariable']:
      if v.startswith('dc/svpg'):
        process_svpg(v, mcf[v], lstats)
      else:
        svs.append(v)
    process_svs(svs, lstats)
    for k, v in lstats.items():
      stats[k].append(v)
  return stats

for tf in TOPIC_MCFS:
  print(f'Processing {tf.split("/")[-1]}')
  stats = process(load_mcf(tf))
  df = pd.DataFrame(stats)
  display(HTML(df.to_html().replace(",","<br>")))

Processing RacialPopulationByAge.mcf


Unnamed: 0,Topic,ERR_MissingName,ERR_MissingSV,ERR_MissingMember,NUM_SVs,NUM_Places_Min,NUM_Places_Max,NUM_Places_Tot
0,dc/topic/AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRacesPopulationByAge,[],[Count_Person_0To4Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_10To14Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_15To19Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_20To24Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_25To29Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_30To34Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_35To39Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_40To44Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_45To49Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_50To54Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_55To59Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_5To9Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_60To64Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_65To69Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_70To74Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_75To79Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_80To84Years_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_85OrMoreYears_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces],[],18,0,0,0
1,dc/topic/AmericanIndianAndAlaskaNativeAlonePopulationByAge,[],[],[],18,3197,3197,57546
2,dc/topic/AsianAlonePopulationByAge,[],[],[],18,3193,3196,57525
3,dc/topic/AsianOrPacificIslanderPopulationByAge,[],[],[],18,3137,3189,56518
4,dc/topic/BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRacesPopulationByAge,[],[Count_Person_0To4Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_10To14Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_15To19Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_20To24Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_25To29Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_30To34Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_35To39Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_40To44Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_45To49Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_50To54Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_55To59Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_5To9Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_60To64Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_65To69Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_70To74Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_75To79Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_80To84Years_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_85OrMoreYears_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces],[],18,0,0,0
5,dc/topic/BlackOrAfricanAmericanAlonePopulationByAge,[],[],[],18,3197,3573,57922
6,dc/topic/HispanicOrLatinoPopulationByAge,[],[],[],18,3196,3573,57906
7,dc/topic/HispanicOrLatino_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRacesPopulationByAge,[],[],[],18,3141,3141,56538
8,dc/topic/HispanicOrLatino_AmericanIndianAndAlaskaNativeAlonePopulationByAge,[],[],[],18,3192,3196,57508
9,dc/topic/HispanicOrLatino_AsianAloneOrInCombinationWithOneOrMoreOtherRacesPopulationByAge,[],[],[],18,3141,3141,56538


Processing RacialGenderPopulationByAge.mcf


Unnamed: 0,Topic,ERR_MissingName,ERR_MissingSV,ERR_MissingMember,NUM_SVs,NUM_Places_Min,NUM_Places_Max,NUM_Places_Tot
0,dc/topic/AmericanIndianAndAlaskaNativeAloneFemalePopulationByAge,[],[],[],18,3197,3197,57546
1,dc/topic/AmericanIndianAndAlaskaNativeAloneMalePopulationByAge,[],[],[],18,3197,3197,57546
2,dc/topic/AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRacesFemalePopulationByAge,[],[Count_Person_0To4Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_10To14Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_15To19Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_20To24Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_25To29Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_30To34Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_35To39Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_40To44Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_45To49Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_50To54Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_55To59Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_5To9Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_60To64Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_65To69Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_70To74Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_75To79Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_80To84Years_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_85OrMoreYears_Female_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces],[],18,0,0,0
3,dc/topic/AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRacesMalePopulationByAge,[],[Count_Person_0To4Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_10To14Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_15To19Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_20To24Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_25To29Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_30To34Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_35To39Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_40To44Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_45To49Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_50To54Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_55To59Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_5To9Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_60To64Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_65To69Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_70To74Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_75To79Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_80To84Years_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces  Count_Person_85OrMoreYears_Male_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces],[],18,0,0,0
4,dc/topic/AsianAloneFemalePopulationByAge,[],[],[],18,3193,192629,1194123
5,dc/topic/AsianAloneMalePopulationByAge,[],[],[],18,3193,192629,1194123
6,dc/topic/AsianOrPacificIslanderFemalePopulationByAge,[],[],[],18,3137,3189,56518
7,dc/topic/AsianOrPacificIslanderMalePopulationByAge,[],[],[],18,3137,3189,56518
8,dc/topic/BlackOrAfricanAmericanAloneFemalePopulationByAge,[],[],[],18,3197,192630,1194520
9,dc/topic/BlackOrAfricanAmericanAloneMalePopulationByAge,[],[],[],18,3197,192630,1194520


Processing MedicalConditionByAge.mcf


Unnamed: 0,Topic,ERR_MissingName,ERR_MissingSV,ERR_MissingMember,NUM_SVs,NUM_Places_Min,NUM_Places_Max,NUM_Places_Tot
0,dc/topic/ArthritisFemalePopulationByAge,[],[],[],18,1,1,18
1,dc/topic/ArthritisMalePopulationByAge,[],[],[],18,1,1,18
2,dc/topic/AsthmaFemalePopulationByAge,[],[],[],18,1,1,18
3,dc/topic/AsthmaMalePopulationByAge,[],[],[],18,1,1,18
4,dc/topic/CancerFemalePopulationByAge,[],[],[],18,1,1,18
5,dc/topic/CancerMalePopulationByAge,[],[],[],18,1,1,18
6,dc/topic/DementiaFemalePopulationByAge,[],[],[],18,1,1,18
7,dc/topic/DementiaMalePopulationByAge,[],[],[],18,1,1,18
8,dc/topic/DiabetesFemalePopulationByAge,[],[],[],18,1,1,18
9,dc/topic/DiabetesMalePopulationByAge,[],[],[],18,1,1,18


Processing kind_topics.mcf


Unnamed: 0,Topic,ERR_MissingName,ERR_MissingSV,ERR_MissingMember,NUM_SVs,NUM_Places_Min,NUM_Places_Max,NUM_Places_Tot
0,dc/topic/AdolescentBirthRate,[],[],[],2,215,225,440
1,dc/topic/AdultCorrectionalFacilitiesResidents,[],[],[],6,53,53,318
2,dc/topic/Age,[],[Median_Age_Person_BlackAlone],[],12,1477,410606,3002690
3,dc/topic/AlcoholIndustry,[],[],[],17,1,2610,8805
4,dc/topic/AmbientAirPollution,[],[],[],3,180,183,546
5,dc/topic/Avalanche,[],[],[],3,18,8000,15995
6,dc/topic/BankingAccess,[],[sdg/FB_BNK_ACCSS_15GEQ_FALE],[],10,56,160,1231
7,dc/topic/BingeDrinking,[],[],[],11,12,136367,409275
8,dc/topic/Births,[],[],[],6,631,2192,10747
9,dc/topic/CO2Emissions,[],[],[],29,1,222,250


Processing topics101-200.mcf


Unnamed: 0,Topic,ERR_MissingName,ERR_MissingSV,ERR_MissingMember,NUM_SVs,NUM_Places_Min,NUM_Places_Max,NUM_Places_Tot
0,dc/topic/BingeDrinking,[],[],[],11,12,136367,409275
1,dc/topic/HeightStunting,[],[],[],2,155,155,310
2,dc/topic/Malnutrition,[],[dc/topic/HeightStunting],[],5,32,183,279
3,dc/topic/PhysicalActivity,[],[],[],6,11,31,126
4,dc/topic/PhysicalInactivity,[],[],[],0,0,0,0
5,dc/topic/PopulationFormerSmoker,[],[],[],6,30,31,183
6,dc/topic/PopulationNonsmokingTobaccoUser,[],[],[],6,31,32,189
7,dc/topic/PopulationNormalWeight,[],[],[],6,31,32,189
8,dc/topic/PopulationObese,[],[Count_Person_18OrMoreYears_Female_Obesity_AsAFractionOf_Count_Person_18OrMoreYears_Male  Count_Person_18OrMoreYears_Obesity_Rural_AsAFractionOf_Count_Person_18OrMoreYears_Urban],[],7,1,136367,409101
9,dc/topic/PopulationOverweight,[],[Count_Person_Upto4Years_Female_Overweight_AsFractionOf_Count_Person_Upto4Years_Male],[],8,31,118,307
