In [4]:
%pip install spacy
import spacy
import os
import subprocess

nlp = spacy.load("en_core_web_sm")

texte = "There are courses and laboratory courses. Homeworks are part of courses. Courses are organized by teachers. Teachers are either professors or assistants. Professors teach courses while assistants only teach laboratory courses."
doc = nlp(texte)

Note: you may need to restart the kernel to use updated packages.


In [5]:
concepts = set()
relations = []

# Extract concepts (nouns and proper nouns)
for token in doc:
    if token.pos_ in ["NOUN", "PROPN"]:
        concepts.add(token.text)

In [6]:
# Extract relations
relations.append(("Homework", "partOf", "Course"))
relations.append(("LaboratoryCourse", "isTypeOf", "Course"))
relations.append(("Course", "organizedBy", "Teacher"))
relations.append(("LaboratoryCourse", "organizedBy", "Teacher"))
relations.append(("Professor", "isTypeOf", "Teacher"))
relations.append(("Assistant", "isTypeOf", "Teacher"))
relations.append(("Professor", "teaches", "Course"))
relations.append(("Assistant", "teaches", "LaboratoryCourse"))

# Display
print("ðŸ”¹ Concepts extracted:")
for c in sorted(concepts):
    print("-", c)

print("\nðŸ”¹ Relations extracted:")
for s, v, o in relations:
    print(f"{s} --{v}--> {o}")

ðŸ”¹ Concepts extracted:
- Courses
- Homeworks
- Professors
- Teachers
- assistants
- courses
- laboratory
- part
- professors
- teachers

ðŸ”¹ Relations extracted:
Homework --partOf--> Course
LaboratoryCourse --isTypeOf--> Course
Course --organizedBy--> Teacher
LaboratoryCourse --organizedBy--> Teacher
Professor --isTypeOf--> Teacher
Assistant --isTypeOf--> Teacher
Professor --teaches--> Course
Assistant --teaches--> LaboratoryCourse


In [7]:
from rdflib import Graph, Namespace, RDF, RDFS

# Create the graph
g = Graph()
EX = Namespace("http://example.org/courses/ontology/")

g.bind("ex", EX)

# ======================
# Classes
# ======================
g.add((EX.Course, RDF.type, RDFS.Class))
g.add((EX.LaboratoryCourse, RDF.type, RDFS.Class))
g.add((EX.Homework, RDF.type, RDFS.Class))
g.add((EX.Teacher, RDF.type, RDFS.Class))
g.add((EX.Professor, RDF.type, RDFS.Class))
g.add((EX.Assistant, RDF.type, RDFS.Class))

# Class hierarchy
g.add((EX.LaboratoryCourse, RDFS.subClassOf, EX.Course))
g.add((EX.Professor, RDFS.subClassOf, EX.Teacher))
g.add((EX.Assistant, RDFS.subClassOf, EX.Teacher))

# ======================
# Properties
# ======================
g.add((EX.partOf, RDF.type, RDF.Property))
g.add((EX.organizedBy, RDF.type, RDF.Property))
g.add((EX.teaches, RDF.type, RDF.Property))

# Domains and ranges
g.add((EX.partOf, RDFS.domain, EX.Homework))
g.add((EX.partOf, RDFS.range, EX.Course))

g.add((EX.organizedBy, RDFS.domain, EX.Course))
g.add((EX.organizedBy, RDFS.range, EX.Teacher))

g.add((EX.teaches, RDFS.domain, EX.Teacher))
g.add((EX.teaches, RDFS.range, EX.Course))

<Graph identifier=Ne36b3fc3954046e4a2e4e7fe069f1e6c (<class 'rdflib.graph.Graph'>)>

In [8]:
# ======================
# Individuals
# ======================
g.add((EX.MathCourse, RDF.type, EX.Course))
g.add((EX.PhysicsLabCourse, RDF.type, EX.LaboratoryCourse))
g.add((EX.Homework1, RDF.type, EX.Homework))
g.add((EX.ProfessorJohn, RDF.type, EX.Professor))
g.add((EX.AssistantMary, RDF.type, EX.Assistant))

# ======================
# Relationships between individuals
# ======================
g.add((EX.Homework1, EX.partOf, EX.MathCourse))
g.add((EX.MathCourse, EX.organizedBy, EX.ProfessorJohn))
g.add((EX.PhysicsLabCourse, EX.organizedBy, EX.AssistantMary))
g.add((EX.ProfessorJohn, EX.teaches, EX.MathCourse))
g.add((EX.AssistantMary, EX.teaches, EX.PhysicsLabCourse))

<Graph identifier=Ne36b3fc3954046e4a2e4e7fe069f1e6c (<class 'rdflib.graph.Graph'>)>

In [9]:
g.serialize("ontologie_courses.ttl", format="turtle")
print("File ontologie_courses.ttl created successfully")

File ontologie_courses.ttl created successfully
