# Tourism Structures and Organizations

Generate a Graph for Accomodations, Restaurant etc...

### Imports

Into utils are stored constants for namespaces, vocabularies and common functions

In [223]:
# Set autoreload for imported packages

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [224]:
# Import utils
import os
import sys

rootFolder = os.path.abspath(
    os.path.dirname(
        os.path.dirname(
            os.path.abspath("__file__"))))

sys.path.append(rootFolder)

from utils import *

In [225]:
# Imports other required libraries
import pandas as pd

from rdflib import Literal, RDF, URIRef

from rdflib.namespace import XSD, DC, RDF, RDFS, SKOS


In [226]:
config = getConfig("../conf.ini")

BASE_URL = config.get("API", "base_url")

## Graph creation

In [227]:
g = createGraph()

g.bind("sm", SM)
g.bind("clv", CLV)
g.bind("cpv", CPV)
g.bind("poi", POI)
g.bind("cov", COV)
g.bind("acco", ACCO)

g.bind("smchannels", EROGATION_CHANNELS)
g.bind("accotype", ACCO_TYPES)
g.bind("poiclass", POI_CLASSIFICATION)
g.bind("starrating", ACCO_STAR_RATINGS)

g.bind("anncsu", ANNCSU)
g.bind("accommodation", ACCO_DATA)
g.bind("organization", COV_DATA)

In [228]:
# Create ANNCSU endpoint, with information about the dataset

AccommodationData = URIRef(ACCO_DATA)

g.add((AccommodationData, RDF.type, SKOS.ConceptScheme))
g.add((AccommodationData, RDFS.label, Literal("Strutture ricettive e locazioni turistiche", lang="it")))
g.add((AccommodationData, RDFS.label, Literal("Accommodation facilities and resorts", lang="en")))
g.add((AccommodationData, DC.creator, ONTO_AUTHOR))


<Graph identifier=Nfa2c898caf654ab5aa46231061f061b8 (<class 'rdflib.graph.Graph'>)>

In [229]:
# Get the data
datasetID = config.get("ACCOMMODATIONS", "dataset")

# Accomodation facilities
accommodationFacilities = getOpenData(
    BASE_URL, datasetID, config.get("ACCOMMODATIONS", "accommodation_facilities"), dtype={'IVA': str, 'TELEFONO': str, 'STELLE': str,'FAX': str})
accommodationFacilities = accommodationFacilities.set_index(["CODICE_IDENTIFICATIVO"])
accommodationFacilities = accommodationFacilities.applymap(
    lambda x: x.strip() if type(x) == str else x)

# Resorts
resorts = getOpenData(
    BASE_URL, datasetID, config.get("ACCOMMODATIONS", "resorts"), dtype={'IVA': str})
resorts = resorts.set_index(["CODICE_ALLOGGIO"])
resorts = resorts.applymap(
    lambda x: x.strip() if type(x) == str else x)


In [230]:
features = pd.read_csv("features.csv", index_col=["CODICE"])
statuses = pd.read_csv("statuses.csv", index_col=["CODICE"])

In [231]:
for code, feature in features.iterrows():
    OSDFeature = URIRef(ACCO_DATA["feature-"] + code)
    g.add((OSDFeature, RDF.type,
          ACCO["OSDFeature"]))
    g.add((OSDFeature, DC.title,
          Literal(feature["ITA"], lang="it")))
    g.add((OSDFeature, DC.title,
          Literal(feature["ENG"], lang="en")))

    g.add((OSDFeature, ACCO["featureName"],
          Literal(feature["ITA"], lang="it")))
    g.add((OSDFeature, ACCO["featureName"],
          Literal(feature["ENG"], lang="en")))

    g.add((OSDFeature, SKOS.inScheme, AccommodationData))

    OfferedServiceDescription = URIRef(ACCO_DATA["service-"] + code)

    g.add((OfferedServiceDescription, RDF.type,
          ACCO["OfferedServiceDescription"]))
    g.add((OfferedServiceDescription, DC.title,
          Literal(feature["ITA"], lang="it")))
    g.add((OfferedServiceDescription, DC.title,
          Literal(feature["ENG"], lang="en")))

    g.add((OfferedServiceDescription, SKOS.inScheme, AccommodationData))

    g.add((OfferedServiceDescription, ACCO["hasOSDFeature"], OSDFeature))


In [232]:
for code, status in statuses.iterrows():
    POIState = URIRef(ACCO_DATA["status-"] + code)
    g.add((POIState, RDF.type, POI["POIState"]))
    g.add((POIState, DC.title,
          Literal(feature["ITA"], lang="it")))
    g.add((POIState, DC.title,
          Literal(feature["ENG"], lang="en")))

    g.add((POIState, POI["POIstate"],
          Literal(feature["ITA"], lang="it")))
    g.add((POIState, POI["POIstate"],
          Literal(feature["ENG"], lang="en")))

    g.add((POIState, SKOS.inScheme, AccommodationData))


In [233]:
# Create emails, phones and websites (to avoid repetitions)

allEmails = pd.concat([pd.DataFrame(accommodationFacilities["EMAIL"]),
                       pd.DataFrame(resorts["EMAIL"])]).dropna().drop_duplicates().set_index(["EMAIL"])
for email, _ in allEmails.iterrows():
    Email = URIRef(ACCO_DATA["mail-" + genNameForID(email)])

    g.add((Email, RDF.type, SM["Email"]))
    g.add((Email, DC.title, Literal(email, datatype=XSD.string)))

    g.add((Email, SKOS.inScheme, AccommodationData))

    g.add((Email, SM["hasEmailType"], EROGATION_CHANNELS["042"]))
    g.add((Email, SM["emailAddress"], Literal(
        "mailto:" + email, datatype=XSD.anyURI)))

allPecs = pd.DataFrame(resorts["PEC"]).dropna(
).drop_duplicates().set_index(["PEC"])
for pec, _ in allPecs.iterrows():
    Pec = URIRef(ACCO_DATA["pec-" + genNameForID(pec)])

    g.add((Pec, RDF.type, SM["Email"]))
    g.add((Pec, DC.title, Literal(pec, datatype=XSD.string)))

    g.add((Pec, SKOS.inScheme, AccommodationData))

    g.add((Pec, SM["hasEmailType"], EROGATION_CHANNELS["041"]))
    g.add((Pec, SM["emailAddress"], Literal(
        "mailto:" + pec, datatype=XSD.anyURI)))

allPhones = pd.DataFrame(accommodationFacilities["TELEFONO"])
allPhones['TELEFONO'] = allPhones['TELEFONO'].str.split(",")
allPhones = allPhones.explode(
    'TELEFONO').dropna().drop_duplicates().set_index(["TELEFONO"])
for phones, _ in allPhones.iterrows():
    for phone in phones.split(" - "):
        Telephone = URIRef(ACCO_DATA["tel-" + genNameForID(phone)])

        g.add((Telephone, RDF.type, SM["Telephone"]))
        g.add((Telephone, DC.title, Literal(phone, datatype=XSD.string)))

        g.add((Telephone, SKOS.inScheme, AccommodationData))

        g.add((Telephone, SM["hasTelephoneType"],
               EROGATION_CHANNELS["03"]))
        g.add((Telephone, SM["telephoneNumber"],
               Literal(phone, datatype=XSD.string)))

allFaxes = pd.DataFrame(accommodationFacilities["FAX"]).dropna(
).drop_duplicates().set_index(["FAX"])
for fax, _ in allPhones.iterrows():
    Fax = URIRef(ACCO_DATA["fax-" + genNameForID(fax)])

    g.add((Fax, RDF.type, SM["Telephone"]))
    g.add((Fax, DC.title, Literal(phone, datatype=XSD.string)))

    g.add((Fax, SKOS.inScheme, AccommodationData))

    g.add((Fax, SM["hasTelephoneType"],
           EROGATION_CHANNELS["033"]))
    g.add((Fax, SM["telephoneNumber"],
           Literal(phone, datatype=XSD.string)))

allWebsites = pd.concat([pd.DataFrame(accommodationFacilities["SITO"]),
                         pd.DataFrame(resorts["SITO"])]).dropna().drop_duplicates().set_index(["SITO"])
for website, _ in allWebsites.iterrows():
    WebSite = URIRef(ACCO_DATA["web-" + genNameForID(website)])

    g.add((WebSite, RDF.type, SM["WebSite"]))
    g.add((WebSite, DC.title, Literal(website, datatype=XSD.string)))

    g.add((WebSite, SKOS.inScheme, AccommodationData))

    g.add((WebSite, SM["URL"],
           Literal(website, datatype=XSD.anyURI)))


In [234]:
for codAccommodation, acco in accommodationFacilities.iterrows():
    name = standardizeName(acco["DENOMINAZIONE"])

    codPOI = acco["COD_POI"]

    codAcco = acco["COD_ACCO"]
    stars = acco["STELLE"]

    vatCode = acco["IVA"]
    phones = acco["TELEFONO"]
    fax = acco["FAX"]
    email = acco["EMAIL"]
    website = acco["SITO"]

    status = acco["CODICE_STATO"]

    features = acco["SERVIZI"]

    # Create accommodation reference
    Accommodation = URIRef(ACCO_DATA["acco-" + str(codAccommodation)])

    # Add accommodation node
    g.add((Accommodation, RDF.type, ACCO["Accommodation"]))
    g.add((Accommodation, DC.title, Literal(name, datatype=XSD.string)))

    g.add((Accommodation, SKOS.inScheme, AccommodationData))

    # Add attributes to accommodation
    g.add((Accommodation, POI["POIofficialName"],
           Literal(name, datatype=XSD.string)))
    
    g.add(
        (Accommodation, ACCO["hasAccommodationTypology"], ACCO_TYPES[codAcco]))
    g.add(
        (Accommodation, POI["hasPOICategory"], POI_CLASSIFICATION[codPOI]))

    g.add((Accommodation, ACCO["accommodationCode"],
          Literal(codAccommodation, datatype=XSD.string)))

    g.add((Accommodation, POI["hasPOIState"],
          URIRef(ACCO_DATA["status-" + status])))

    # Organization that own accommodation
    if not pd.isna(vatCode):
        Organization = URIRef(COV_DATA["org-" + vatCode])
        g.add((Accommodation, ACCO["hasAccommodationOwner"], Organization))

    if not pd.isna(stars):
        g.add(
            (Accommodation, ACCO["hasAccommodationClassification"], ACCO_STAR_RATINGS[str(stars)]))

    # Add features offered
    for feature in str(features).split(","):
        OfferedServiceDescription = URIRef(ACCO_DATA["service-" + feature])
        g.add(
            (Accommodation, ACCO["hasOfferedServiceDescription"], OfferedServiceDescription))

    # Get address reference
    if not pd.isna(acco["PROGR_NAZIONALE"]):
        progrCivico = int(acco["PROGR_CIVICO"]) if not pd.isna(
            acco["PROGR_CIVICO"]) else "snc"
        Address = URIRef(
            ANNCSU["ad-{}-{}".format(int(acco["PROGR_NAZIONALE"]), progrCivico)])
        g.add((Accommodation, CLV["hasAddress"], Address))

    # Create online contact points
    if not (pd.isna(phone) and pd.isna(email) and pd.isna(website)):
        OnlineContactPoint = URIRef(ACCO_DATA["ocp-" + str(codAccommodation)])

        g.add((OnlineContactPoint, RDF.type, SM["OnlineContactPoint"]))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Informazioni di contatto per " + name, lang="it")))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Contact information for " + name, lang="en")))

        g.add((OnlineContactPoint, SKOS.inScheme, AccommodationData))

        if not pd.isna(phones):
            for phone in str(phones).split(","):
                Telephone = URIRef(ACCO_DATA["tel-" + genNameForID(phone)])
                g.add((OnlineContactPoint, SM["hasTelephone"], Telephone))

        if not pd.isna(email):
            Email = URIRef(ACCO_DATA["mail-" + genNameForID(email)])
            g.add((OnlineContactPoint, SM["hasEmail"], Email))

        if not pd.isna(fax):
            Fax = URIRef(ACCO_DATA["fax-" + genNameForID(fax)])
            g.add((OnlineContactPoint, SM["hasTelephone"], Fax))

        if not pd.isna(website):
            WebSite = URIRef(ACCO_DATA["web-" + genNameForID(website)])
            g.add((OnlineContactPoint, SM["hasWebSite"], WebSite))

        g.add(
            (Accommodation, SM["hasOnlineContactPoint"], OnlineContactPoint))


In [235]:
allPeople = pd.DataFrame({"COGNOME": resorts["COGNOME"], "NOME": resorts["NOME"]}).dropna(
).drop_duplicates().set_index(["COGNOME", "NOME"])
for (surname, name), _ in allPeople.iterrows():
    Person = URIRef(
        ACCO_DATA["person-" + genNameForID(surname) + "-" + genNameForID(name)])

    g.add((Person, RDF.type, CPV["Person"]))
    g.add((Person, DC.title, Literal(name + " " + surname, datatype=XSD.string)))

    g.add((Person, SKOS.inScheme, AccommodationData))

    g.add((Person, CPV["givenName"], Literal(name, datatype=XSD.string)))
    g.add((Person, CPV["familyName"], Literal(surname, datatype=XSD.string)))


In [236]:
for codResort, resort in resorts.iterrows():
    name = standardizeName(resort["DENOMINAZIONE"])

    codPOI = resort["COD_POI"]
    codAcco = resort["COD_ACCO"]

    vatCode = resort["IVA"]
    surnameLocator = resort["COGNOME"]
    nameLocator = resort["NOME"]
    email = resort["EMAIL"]
    pec = resort["PEC"]
    website = resort["SITO"]

    status = "open"

    totalRooms = resort["NUM_CAMERE"]
    totalBeds = resort["NUM_LETTI"]

    # Create accommodation reference
    Accommodation = URIRef(ACCO_DATA["acco-" + str(codResort)])

    # Add accommodation node
    g.add((Accommodation, RDF.type, ACCO["Accommodation"]))
    g.add((Accommodation, DC.title, Literal(name, datatype=XSD.string)))

    g.add((Accommodation, SKOS.inScheme, AccommodationData))

    # Add attributes to accommodation
    g.add((Accommodation, POI["POIofficialName"],
           Literal(name, datatype=XSD.string)))

    g.add(
        (Accommodation, ACCO["hasAccommodationTypology"], ACCO_TYPES[codAcco]))
    g.add(
        (Accommodation, POI["hasPOICategory"], POI_CLASSIFICATION[codPOI]))

    g.add((Accommodation, ACCO["accommodationCode"],
          Literal(codResort, datatype=XSD.string)))

    g.add((Accommodation, POI["hasPOIState"],
          URIRef(ACCO_DATA["status-" + status])))
    
    g.add((Accommodation, ACCO["totalRoom"],
          Literal(totalRooms, datatype=XSD.int)))
    g.add((Accommodation, ACCO["totalBed"],
          Literal(totalBeds, datatype=XSD.int)))

    # Organization that own accommodation
    if not pd.isna(vatCode):
        Organization = URIRef(COV_DATA["org-" + vatCode])
        g.add((Accommodation, ACCO["hasAccommodationOwner"], Organization))
    else:
        Person = URIRef(
            COV_DATA["person-" + genNameForID(surnameLocator) + "-" + genNameForID(nameLocator)])
        g.add((Accommodation, ACCO["hasAccommodationOwner"], Person))

    # Get address reference
    if not pd.isna(resort["PROGR_NAZIONALE"]):
        progrCivico = int(resort["PROGR_CIVICO"]) if not pd.isna(
            acco["PROGR_CIVICO"]) else "snc"
        Address = URIRef(
            ANNCSU["ad-{}-{}".format(int(acco["PROGR_NAZIONALE"]), progrCivico)])
        g.add((Accommodation, CLV["hasAddress"], Address))

    # Create online contact points
    if not (pd.isna(email) and pd.isna(pec) and pd.isna(website)):
        OnlineContactPoint = URIRef(ACCO_DATA["ocp-" + str(codResort)])

        g.add((OnlineContactPoint, RDF.type, SM["OnlineContactPoint"]))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Informazioni di contatto per " + name, lang="it")))
        g.add((OnlineContactPoint, DC.title, Literal(
            "Contact information for " + name, lang="en")))

        g.add((OnlineContactPoint, SKOS.inScheme, AccommodationData))

        if not pd.isna(email):
            Email = URIRef(ACCO_DATA["mail-" + genNameForID(email)])
            g.add((OnlineContactPoint, SM["hasEmail"], Email))

        if not pd.isna(pec):
            Pec = URIRef(ACCO_DATA["mail-" + genNameForID(pec)])
            g.add((OnlineContactPoint, SM["hasEmail"], Pec))

        if not pd.isna(website):
            WebSite = URIRef(ACCO_DATA["web-" + genNameForID(website)])
            g.add((OnlineContactPoint, SM["hasWebSite"], WebSite))

        g.add(
            (Accommodation, SM["hasOnlineContactPoint"], OnlineContactPoint))


In [237]:
# Save graph
saveGraph(g, "accommodationFacilities")