In [11]:
from owlready2 import Thing, get_ontology, AllDisjoint, sync_reasoner_pellet, Imp, FunctionalProperty
from owlready2 import *

# Create a new ontology
onto = get_ontology("http://example.org/onto#")

# Define classes for the ontology based on the CSV file contents
with onto:
    class TVShow(Thing): pass
    class Episode(Thing): pass
    class User(Thing): pass
    class Comment(Thing): pass
    class Rating(Thing): pass
    class Emotion(Thing): pass

    # Define properties

    # Define relationships
    class posts_by(Comment >> User): pass

    class comments_on(Comment >> Episode): pass
    class follows(User >> TVShow): pass
    class has_started(User >> TVShow): pass
    class had_stopped(User >> TVShow): pass
    class has_watched(User >> Episode): pass

    class rated_by(Rating >> User): pass
    class rates(Rating >> Episode): pass

    class expressed_by(Emotion >> User): pass
    class evokes(Emotion >> Episode):  pass



In [12]:
import csv

def load_data(filename, process_row):
    with open(filename, 'rt', encoding='utf-8') as file:
        reader = csv.DictReader(file, delimiter=',')
        count = 0
        for row in reader:
            if count > 0 and count%100000 == 0:
                print(f'Processed {count} rows')
            process_row(row)
            count += 1

In [13]:
import datetime

def process_tv_shows_episodes(row):
    tv_show = onto.TVShow(name=row['tv_show_name'])
    tv_show.titre = row['tv_show_name']

    episode = onto.Episode(name=row['episode_id'])
    episode.aPourSaison = int(row['episode_season_number'])
    episode.aPourNumero = int(row['episode_number'])
    episode.rewatchedCount = int(row['rewatched_count'])

    episode.aPourDate = datetime.datetime.strptime(row['created_at'], '%Y-%m-%d %H:%M:%S').date()

    user = onto.User(name=row['user_id'])
    user.has_started.append(tv_show) # user has started watching the tv show because they have seen at least one episode
    user.has_watched.append(episode) # user has watched the episode

path = './data/seen_episode_modified.csv'
load_data(path, process_tv_shows_episodes)
print(f'Loaded {len(onto.TVShow.instances())} TV shows and {len(onto.Episode.instances())} episodes seen and {len(onto.User.instances())} users')

Loaded 121 TV shows and 5236 episodes seen and 5358 users


In [14]:
def process_episodes_ratings(row):
    episode = onto.Episode(name=row['episode_id'])
    user = onto.User(name=row['user_id'])
    rating = onto.Rating(name=row['vote_key'])
    rating.note = int(row['note'])
    rating.rated_by.append(user)
    rating.rates.append(episode)
    rating.aPourDate = datetime.datetime.strptime(row['created_at'], '%Y-%m-%d %H:%M:%S').date()

path = './data/ratings_episode_votes_modified.csv'
load_data(path, process_episodes_ratings)
print(f'Loaded {len(onto.Rating.instances())} ratings')

Loaded 5575 ratings


In [15]:
def process_tv_shows_followings(row):
    tv_show = onto.TVShow(name=row['tv_show_name'])

    user = onto.User(name=row['user_id'])
    user.follows.append(tv_show)
    if row['active'] == '0' or row['archived'] == '1': # user has stopped following the tv show
        user.had_stopped.append(tv_show)

path = './data/followed_tv_show_modified.csv'
load_data(path, process_tv_shows_followings)
print(f'Loaded {len(onto.User.instances())} users and {len(onto.TVShow.instances())} TV shows')

Loaded 5358 users and 121 TV shows


In [16]:
def process_episodes_comments(row):
    episode = onto.Episode(name=row['episode_id'])
    user = onto.User(name=row['user_id'])
    comment = onto.Comment(name=row['comment_id'])
    comment.contenu = row['comment']
    comment.posts_by.append(user)
    comment.comments_on.append(episode)
    comment.aPourDate = datetime.datetime.strptime(row['created_at'], '%Y-%m-%d %H:%M:%S').date()

path = './data/episode_comment_modified.csv'
load_data(path, process_episodes_comments)
print(f'Loaded {len(onto.Comment.instances())} comments')

Loaded 5271 comments


In [17]:
def process_episodes_emotions(row):
    episode = onto.Episode(name=row['episode_id'])
    user = onto.User(name=row['user_id'])
    emotion = onto.Emotion(name=row['emotion_vote_id'])
    emotion.emotion = row['emotion_id']
    emotion.expressed_by.append(user)
    emotion.evokes.append(episode)
    emotion.aPourDate = datetime.datetime.strptime(row['created_at'], '%Y-%m-%d %H:%M:%S').date()

path = './data/emotions_episode_votes_modified.csv'
load_data(path, process_episodes_emotions)
print(f'Loaded {len(onto.Emotion.instances())} emotions')

Loaded 5577 emotions


In [18]:
sync_reasoner_pellet([onto], infer_property_values = True, infer_data_property_values = True, debug=True)

* Owlready2 * Running Pellet...
    java -Xmx2000M -cp C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\owlready2\pellet\antlr-3.2.jar;C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\owlready2\pellet\antlr-runtime-3.2.jar;C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\owlready2\pellet\aterm-java-1.6.jar;C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\owlready2\pellet\commons-codec-1.6.jar;C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\owlready2\pellet\httpclient-4.2.3.jar;C:\Users\romai\AppData\Local\Packages\PythonSoftwareFoundation.Pyth