<a href="https://colab.research.google.com/github/schemaorg/schemaorg/blob/main/scripts/Schema_org_Examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialisaion & Data Source Selection
Select data source then run the cell

**Pre-V11.0: *Always select 'Github:main'***




In [1]:
import os,sys
## Source Selection
DataSource = "Github:main" #@param ["Github:main", "webschemas.org", "schema.org"] {allow-input: true}

gitpullrequired = False
if DataSource == "Github:main":
  gitpullrequired = True
  termSource = "default"
  examplesSource = "default"
elif DataSource == "webschemas.org":
  termSource = "https://webschemas.org/version/latest/schemaorg-current-https.ttl"
  examplesSource = "https://webschemas.org/version/latest/schemaorg-all-examples.txt"
elif DataSource == "schema.org":
  termSource = "https://schema.org/version/latest/schemaorg-current-https.ttl"
  examplesSource = "https://schema.org/version/latest/schemaorg-all-examples.txt"

os.chdir("/content")
if not os.path.isdir("lib"):
  !mkdir lib
for path in [os.getcwd(),"lib","/content/lib"]:
  sys.path.insert( 1, path ) #Pickup libs from shipped lib directory

print("Loading schemaorg support libraries ...")  
!(cd lib; curl -s -O https://raw.githubusercontent.com/schemaorg/schemaorg/main/SchemaTerms/sdotermsource.py)
!(cd lib; curl -s -O https://raw.githubusercontent.com/schemaorg/schemaorg/main/SchemaTerms/sdoterm.py)
!(cd lib; curl -s -O https://raw.githubusercontent.com/schemaorg/schemaorg/main/SchemaTerms/localmarkdown.py)
!(cd lib; curl -s -O https://raw.githubusercontent.com/schemaorg/schemaorg/main/SchemaExamples/schemaexamples.py)
!(cd lib; curl -s -O https://raw.githubusercontent.com/schemaorg/schemaorg/main/requirements.txt)

!pip --quiet install -r lib/requirements.txt

from schemaexamples import SchemaExamples, Example
from sdotermsource import SdoTermSource
from sdoterm import *
from localmarkdown import Markdown

Markdown.setWikilinkCssClass("localLink")
Markdown.setWikilinkPrePath("https://schema.org/")
Markdown.setWikilinkPostPath("")

if gitpullrequired:
  print("\nPulling data from schemaorg repository...")
  if os.path.isdir("schemaorg"):
    !rm -r schemaorg
  !git clone https://github.com/schemaorg/schemaorg.git
  os.chdir("schemaorg") #If sources are 'default' file paths are relative to schemaorg dir.

print("\nLoading triples file(s)")
SdoTermSource.loadSourceGraph(termSource,init=True)
print ("loaded %s triples - %s terms" % (len(SdoTermSource.sourceGraph()),len(SdoTermSource.getAllTerms())) )

print("\nLoading examples file(s)")
SchemaExamples.loadExamplesFiles(examplesSource,init=True)
print("Loaded %d examples " % (SchemaExamples.count()))

os.chdir("/content") #put us back to a consistant place

############## Utils ########
def percentage(part, whole):
  if not part:
    return float(0)
  return 100 * float(part)/float(whole)

def drawProgressBar(percent, barLen = 20, pre=""):
  sys.stdout.write("\r")
  progress = ""
  for i in range(barLen):
      if i < int(barLen * percent):
          progress += "="
      else:
          progress += " "
  sys.stdout.write("%s[ %s ] %.2f%%" % (pre,progress, percent * 100))
  sys.stdout.flush()

def clearProgressBar(pre=""):
  sys.stdout.write("\r")
  sys.stdout.write(pre)
  sys.stdout.flush()




Loading schemaorg support libraries ...

Pulling data from schemaorg repository...
Cloning into 'schemaorg'...
remote: Enumerating objects: 23781, done.[K
remote: Total 23781 (delta 0), reused 0 (delta 0), pack-reused 23781[K
Receiving objects: 100% (23781/23781), 96.36 MiB | 14.66 MiB/s, done.
Resolving deltas: 100% (16956/16956), done.
Checking out files: 100% (1788/1788), done.

Loading triples file(s)
SdoTermSource.loadSourceGraph() loading from default files found in globs: ['data/*.ttl', 'data/ext/*/*.ttl']
loaded 15081 triples - 2636 terms

Loading examples file(s)
SchemaExamples.loadExamplesFiles() loading from default files found in globs: ['data/*examples.txt', 'data/ext/*/*examples.txt']
Loaded 462 examples 


#Examples Overview

In [4]:
#@title Analyse Examples coverage
ShowErrors = "False" #@param ["False", "True"] {allow-input: true}
show=False
if ShowErrors == "True":
  show = True
print("Total Examples: %s" % SchemaExamples.count())
print("Terms with examples: %s" % len(SchemaExamples.EXAMPLESMAP))
maxex = maxterm = invalidTerms = 0
for t, exs in SchemaExamples.EXAMPLESMAP.items():
  maxex = max(maxex,len(exs))
print("Max examples for a term: %s" % maxex)
import re

all = SchemaExamples.allExamples()
total = len(all)
done = 0
for ex in all:
  debugdata = ""
  if gitpullrequired:
    debugdata = " (%s [%s]) " % (ex.exmeta.get('file',''),ex.exmeta.get('filepos',''))
  maxterm = max(maxterm,len(ex.terms))
  for term in ex.terms:
    if re.search('[^0-9a-zA-Z-_]',term):
      if show:
        print("    %s: invalid term name: '%s' %s" %(ex.keyvalue,term,debugdata))
      invalidTerms += 1
    else:
      t = SdoTermSource.getTerm(term)
      if not t:
        if show:
          print("    %s: term not found: %s  %s" %(ex.keyvalue,term,debugdata))
        invalidTerms += 1
  done += 1
  drawProgressBar(percentage(done,total)/100,50,pre="Processing Example Definitions.... ")

clearProgressBar("")
print()
print("Max terms for an example: %s" % maxterm)
print("Examples mapped to invalid term names: %s" % invalidTerms)

typecounts = [0,0]
propcounts = [0,0]
enumcounts = [0,0]
enumvalcounts = [0,0]
datatypecounts = [0,0]


print()
all = SdoTermSource.getAllTerms()
total = len(all)
done = 0
for t in all:
  term = SdoTermSource.getTerm(t)
  if term.termType == SdoTerm.TYPE:
    counts = typecounts
  elif term.termType == SdoTerm.PROPERTY:
    counts = propcounts
  elif term.termType == SdoTerm.DATATYPE:
    counts = datatypecounts
  elif term.termType == SdoTerm.ENUMERATION:
    counts = enumcounts
  elif term.termType == SdoTerm.ENUMERATIONVALUE:
    counts = enumvalcounts
  else:
    counts =[0,0]
  if len(SchemaExamples.examplesForTerm(term.id)):
    counts[0] += 1
  counts[1] += 1
  done += 1
  drawProgressBar(percentage(done,total)/100,50,pre="Processing Term Definitions.... ")

clearProgressBar("")

print("Terms/Example coverage:")
print("   Type coverage %.2f%% (%d of %d)" % (percentage(typecounts[0],typecounts[1]),typecounts[0],typecounts[1]))
print("   Property coverage %.2f%% (%d of %d)" % (percentage(propcounts[0],propcounts[1]), propcounts[0],propcounts[1]))
print("   Enumeration coverage %.2f%% (%d of %d)" % (percentage(enumcounts[0],enumcounts[1]), enumcounts[0],enumcounts[1]))
print("   EnumerationValue coverage %.2f%% (%d of %d)" % (percentage(enumvalcounts[0],enumvalcounts[1]), enumvalcounts[0],enumvalcounts[1]))
print("   DataType coverage %.2f%% (%d of %d)" % (percentage(datatypecounts[0],datatypecounts[1]), datatypecounts[0],datatypecounts[1]))




Total Examples: 462
Terms with examples: 557
Max examples for a term: 19

Max terms for an example: 13
Examples mapped to invalid term names: 11

Terms/Example coverage:
   Type coverage 44.71% (351 of 785)
   Property coverage 12.94% (179 of 1383)
   Enumeration coverage 5.48% (4 of 73)
   EnumerationValue coverage 3.54% (13 of 367)
   DataType coverage 0.00% (0 of 9)
