In [2]:
import csv
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS

# Load CSV file
csv_file = 'movies.csv'
output_file = 'movies.rdf'

# Define namespaces
SCHEMA = Namespace("http://schema.org/")
EX = Namespace("http://example.org/")

# Create an RDF graph
g = Graph()
g.bind("schema", SCHEMA)
g.bind("ex", EX)

# Read CSV and convert to RDF
with open(csv_file, 'r', encoding='utf-8') as f:  # Specify encoding here
    reader = csv.DictReader(f)
    for row in reader:
        # Create a URI for the movie
        movie_uri = URIRef(EX + "movie/" + row['title'].replace(" ", "_"))
        g.add((movie_uri, RDF.type, SCHEMA.Movie))

        # Add title
        g.add((movie_uri, SCHEMA.name, Literal(row['title'])))

        # Add overview
        if row['overview']:
            g.add((movie_uri, SCHEMA.description, Literal(row['overview'])))

        # Add genres (split by commas if multiple)
        genres = row['genres'].split(',')
        for genre in genres:
            genre_uri = URIRef(EX + "genre/" + genre.strip().replace(" ", "_"))
            g.add((movie_uri, SCHEMA.genre, genre_uri))
            g.add((genre_uri, RDF.type, SCHEMA.Genre))
            g.add((genre_uri, SCHEMA.name, Literal(genre.strip())))

        # Add producer
        if row['producer']:
            producer_uri = URIRef(EX + "producer/" + row['producer'].replace(" ", "_"))
            g.add((movie_uri, SCHEMA.producer, producer_uri))
            g.add((producer_uri, RDF.type, SCHEMA.Person))
            g.add((producer_uri, SCHEMA.name, Literal(row['producer'])))

        # Add cast (split by commas if multiple)
        cast_members = row['cast'].split(',')
        for member in cast_members:
            cast_uri = URIRef(EX + "person/" + member.strip().replace(" ", "_").replace('"', ""))
            g.add((movie_uri, SCHEMA.actor, cast_uri))
            g.add((cast_uri, RDF.type, SCHEMA.Person))
            g.add((cast_uri, SCHEMA.name, Literal(member.strip())))

# Save RDF to file
g.serialize(destination=output_file, format='turtle')

print(f"RDF data has been saved to {output_file}")


http://example.org/person/Donald_Sutherland_as_Jerry_O`Neill does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Eddie_Murphy_as_Sherman_Klump_|_Buddy_Love does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Elliot_Levey_as_TV_Director< does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Willeke_van_Ammelrooy_as_Kate`s_Mother does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Courtney_J._Clark_as_Logans`s_Sister does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Aaliyah_as_Trish_O`Day does not look like a valid URI, trying to serialize this will break.
http://example.org/person/Delroy_Lindo_as_Isaak_O`Day does not look like a valid URI, trying to serialize this will break.
http://example.org/person/D.B._Woodside_as_Colin_O`Day does not look like a valid URI, trying to seri

Exception: "http://example.org/person/Brendan_Fletcher_as_Johnny_D`Franco" does not look like a valid URI, I cannot serialize this as N3/Turtle. Perhaps you wanted to urlencode it?