# Tourism Structures and Organizations

Generate a Graph for Accomodations, Restaurant etc...

### Imports

Into utils are stored constants for namespaces, vocabularies and common functions

In [None]:
# Set autoreload for imported packages

%load_ext autoreload
%autoreload 2


In [None]:
# Import utils
import os
import sys

rootFolder = os.path.abspath(
    os.path.dirname(
        os.path.dirname(
            os.path.abspath("__file__"))))

sys.path.append(rootFolder)

from utils import *

In [None]:
# Imports other required libraries
import pandas as pd

from rdflib import Literal, RDF, URIRef

from rdflib.namespace import XSD, DC, RDF, RDFS, SKOS


In [None]:
config = getConfig("../conf.ini")

BASE_URL = config.get("API", "base_url")

## Graph creation

In [None]:
g = createGraph()

g.bind("sm", SM)
g.bind("clv", CLV)
g.bind("poi", POI)
g.bind("cov", COV)
g.bind("acco", ACCO)

g.bind("acco", EROGATION_CHANNELS)
g.bind("orgateco", ORG_ATECO)
g.bind("accotype", ACCO_TYPES)
g.bind("poiclass", POI_CLASSIFICATION)

g.bind("anncsu", ANNCSU)
g.bind("trstrc", TOURISM_STRUCT)

In [None]:
# Create ANNCSU endpoint, with information about the dataset

TourismStructData = URIRef(TOURISM_STRUCT)

g.add((TourismStructData, RDF.type, SKOS.ConceptScheme))
g.add((TourismStructData, RDFS.label, Literal("Strutture ricettive, aziende agricole e ristoranti per turisti", lang="it")))
g.add((TourismStructData, RDFS.label, Literal("Accomodation facilities and organizations for tourists", lang="en")))
g.add((TourismStructData, DC.creator, URIRef("https://w3id.org/people/lucamartinelli")))

In [None]:
# Get the data

datasetID = config.get("TOURISM_STRUCT", "dataset")

# Accomodation facilities
tourismStructures = getOpenData(
    BASE_URL, datasetID, config.get("TOURISM_STRUCT", "structures"), dtype={'IVA': str, 'TELEFONO': str})


In [None]:
# Create emails, phones and websites (to avoid repetitions)

allEmails = pd.DataFrame(tourismStructures["EMAIL"]).dropna(
).drop_duplicates().set_index(["EMAIL"])
for email, _ in allEmails.iterrows():
    Email = URIRef(TOURISM_STRUCT["mail-" + genNameForID(email)])

    g.add((Email, RDF.type, SM["Email"]))
    g.add((Email, DC.title, Literal(email, datatype=XSD.string)))

    g.add((Email, SKOS.inScheme, TourismStructData))

    g.add((Email, SM["hasEmailType"], EROGATION_CHANNELS["042"]))
    g.add((Email, SM["emailAddress"], Literal(
        "mailto:" + email, datatype=XSD.anyURI)))

allPhones = pd.DataFrame(tourismStructures["TELEFONO"]).dropna(
).drop_duplicates().set_index(["TELEFONO"])
for phone, _ in allPhones.iterrows():
    Telephone = URIRef(TOURISM_STRUCT["tel-" + genNameForID(phone)])

    g.add((Telephone, RDF.type, SM["Telephone"]))
    g.add((Telephone, DC.title, Literal(phone, datatype=XSD.string)))

    g.add((Telephone, SKOS.inScheme, TourismStructData))

    g.add((Telephone, SM["hasTelephoneType"],
           EROGATION_CHANNELS["03"]))
    g.add((Telephone, SM["telephoneNumber"],
           Literal(phone, datatype=XSD.string)))

allWebsites = pd.DataFrame(tourismStructures["SITO"]).dropna(
).drop_duplicates().set_index(["SITO"])
for website, _ in allWebsites.iterrows():
    WebSite = URIRef(TOURISM_STRUCT["web-" + genNameForID(website)])

    g.add((WebSite, RDF.type, SM["WebSite"]))
    g.add((WebSite, DC.title, Literal(website, datatype=XSD.string)))

    g.add((WebSite, SKOS.inScheme, TourismStructData))

    g.add((WebSite, SM["URL"],
           Literal(website, datatype=XSD.anyURI)))


In [None]:
for i, struct in tourismStructures.iterrows():
    name = standardizeName(struct["NOME"])
    codAteco = struct["COD_ATECO"].replace(".", "")

    codPOI = struct["COD_POI"]

    codAcco = struct["COD_ACCO"]
    stars = struct["STELLE"]

    vat = struct["IVA"]
    phone = struct["TELEFONO"]
    email = struct["EMAIL"]
    website = struct["SITO"]

    # Get address reference
    Address = URIRef(
        ANNCSU["ad-{}-{}".format(struct["PROGR_NAZIONALE"], struct["PROGR_CIVICO"])])

    # Create organization reference
    Organization = URIRef(TOURISM_STRUCT["org-" + str(i)])

    # Add organization node
    g.add((Organization, RDF.type, COV["PrivateOrganization"]))
    g.add((Organization, DC.title, Literal(name, datatype=XSD.string)))
    
    g.add((Organization, SKOS.inScheme, TourismStructData))
    g.add((TourismStructData, SKOS.hasTopConcept, Organization))

    # Add attributes to organization
    g.add((Organization, COV["legalName"], Literal(name, datatype=XSD.string)))
    g.add((Organization, CLV["hasPrimaryAddress"], Address))
    g.add(
        (Organization, COV["hasPrivateOrgActivityType"], ORG_ATECO[codAteco]))

    if not pd.isna(vat):
        g.add((Organization, COV["VATnumber"],
              Literal(vat, datatype=XSD.string)))

    # Create accomodation reference
    Accomodation = None

    if not pd.isna(codAcco):
        Accomodation = URIRef(TOURISM_STRUCT["acco-" + str(i)])

        # Add accomodation node
        g.add((Accomodation, RDF.type, ACCO["Accommodation"]))
        g.add((Accomodation, DC.title, Literal(name, datatype=XSD.string)))
        
        g.add((Accomodation, SKOS.inScheme, TourismStructData))

        # Add attributes to accomodation
        g.add((Accomodation, POI["POIofficialName"],
              Literal(name, datatype=XSD.string)))
        g.add((Accomodation, CLV["hasAddress"], Address))
        g.add(
            (Accomodation, ACCO["hasAccommodationTypology"], ACCO_TYPES[codAcco]))
        g.add(
            (Accomodation, POI["hasPOICategory"], POI_CLASSIFICATION[codPOI]))
        
        g.add((Accomodation, ACCO["hasAccommodationOwner"], Organization))

    # Create online contact points
    if not (pd.isna(phone) and pd.isna(email) and pd.isna(website)):
        OnlineContactPoint = URIRef(TOURISM_STRUCT["ocp-" + str(i)])

        g.add((OnlineContactPoint, RDF.type, SM["OnlineContactPoint"]))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Informazioni di contatto per " + name, lang="it")))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Contact information for " + name, lang="en")))
        
        g.add((OnlineContactPoint, SKOS.inScheme, TourismStructData))

        if not pd.isna(phone):
            Telephone = URIRef(TOURISM_STRUCT["tel-" + genNameForID(phone)])
            g.add((OnlineContactPoint, SM["hasTelephone"], Telephone))

        if not pd.isna(email):
            Email = URIRef(TOURISM_STRUCT["mail-" + genNameForID(email)])
            g.add((OnlineContactPoint, SM["hasEmail"], Email))

        if not pd.isna(website):
            WebSite = URIRef(TOURISM_STRUCT["web-" + genNameForID(website)])
            g.add((OnlineContactPoint, SM["hasWebSite"], WebSite))

        g.add((Organization, SM["hasOnlineContactPoint"], OnlineContactPoint))

        if Accomodation:
            g.add(
                (Accomodation, SM["hasOnlineContactPoint"], OnlineContactPoint))


In [None]:
# Save graph
saveGraph(g, "tourismStructures")