# ANNCSU

Generate a Graph for ANNCSU

### Imports

Into utils are stored constants for namespaces, vocabularies and common functions

In [None]:
# Set autoreload for imported packages

%load_ext autoreload
%autoreload 2


In [None]:
# Import utils
import os
import sys

rootFolder = os.path.abspath(
    os.path.dirname(
        os.path.dirname(
            os.path.abspath("__file__"))))

sys.path.append(rootFolder)

from utils import *

In [None]:
# Imports other required libraries

import pandas as pd
from pykml import parser

from rdflib import Literal, RDF, URIRef

from rdflib.namespace import XSD, DC, RDF, RDFS, SKOS


In [None]:
config = getConfig("../conf.ini")

BASE_URL = config.get("API", "base_url")

## Graph creation

In [None]:
g = createGraph()

g.bind("clv", CLV)
g.bind("anncsu", ANNCSU)
g.bind("cities", CITIES)

In [None]:
# Create ANNCSU endpoint, with information about the dataset

ANNCSUData = URIRef(ANNCSU)

g.add((ANNCSUData, RDF.type, SKOS.ConceptScheme))
g.add((ANNCSUData, RDFS.label, Literal("Anagrafe nazionale numeri civici e strade urbane", lang="it")))
g.add((ANNCSUData, RDFS.label, Literal("Civic Addressing and Street Naming", lang="en")))
g.add((ANNCSUData, DC.creator, URIRef("https://w3id.org/people/lucamartinelli")))

In [None]:
# Get the data

datasetID = config.get("ANNCSU", "dataset")

# ANNCSU streets
anncsuAddresses = getOpenData(
    BASE_URL, datasetID, config.get("ANNCSU", "streets"))
anncsuAddresses.set_index("PROGR_NAZIONALE", inplace=True)

# ANNCSU civic numbers
anncsuCivics = getOpenData(BASE_URL, datasetID, config.get("ANNCSU", "civics"))
anncsuCivics.set_index("PROGR_CIVICO", inplace=True)

# ISTAT census sections
censusSectionsObj = getOpenData(BASE_URL,
                                datasetID, config.get("ANNCSU", "census_sections"), rawData=True)

# Since this is a KML file, need to be parsed by pykml
censusSections = parser.parse(censusSectionsObj)


In [None]:
# Generate dataframe for localities (Address Areas)
localitiesDF = pd.DataFrame(
    anncsuAddresses["LOCALITA'"]).drop_duplicates().reset_index(drop=True)


In [None]:
# Add localities to graph
for i, locality in localitiesDF.iterrows():
  localityName = standardizeName(locality["LOCALITA'"])
  # AddressArea ref = loc-locality-name
  AddressArea = URIRef(ANNCSU["loc-" + genNameForID(localityName)])
  
  # Create AddressArea and put into ANNCSU scheme
  g.add((AddressArea, RDF.type, CLV.AddressArea))
  g.add((AddressArea, SKOS.inScheme, URIRef(ANNCSU)))
  g.add((AddressArea, DC.title, Literal(localityName, datatype=XSD.string)))
  
  # Add attributes
  g.add((AddressArea, L0["name"], Literal(localityName, datatype=XSD.string)))


In [None]:
# Add census sections to graph
for pm in censusSections.getroot().Document.Folder.Placemark:
  # ISTAT census sections are in the form {city_code}{census_number}, I need only the census number
  censID = int(str(pm.name)[6::])
  # Get the polygon of census section's boundaries
  polygon = pm.Polygon.outerBoundaryIs.LinearRing.coordinates

  # Create census section and geometry refs (cs-id, gsc-id)
  CensusSection = URIRef(ANNCSU["cs-" + str(censID)])
  Geometry = URIRef(ANNCSU["gsc-" + str(censID)])

  # Create geometry node
  g.add((Geometry, RDF.type, CLV.Geometry))
  g.add((Geometry, SKOS.inScheme, URIRef(ANNCSU)))
  
  # Geometry type Polygon and serialization the polygon coordinates
  g.add((Geometry, CLV["hasGeometryType"], URIRef(CLV["Polygon"])))
  g.add((Geometry, CLV["serialization"], Literal(str(polygon).replace(" ", "\n"), datatype=XSD.string)))

  # Add titles
  g.add((Geometry, DC.title, Literal("Sezione di censimento " + str(censID), lang="it")))
  g.add((Geometry, DC.title, Literal("Census section " + str(censID), lang="en")))
  g.add((CensusSection, DC.title, Literal("Sezione di censimento " + str(censID), lang="it")))
  g.add((CensusSection, DC.title, Literal("Census section " + str(censID), lang="en")))

  # Create census section node
  g.add((CensusSection, RDF.type, CLV.CensusSection))
  g.add((CensusSection, SKOS.inScheme, URIRef(ANNCSU)))

  # Add properties to Census Section
  g.add((CensusSection, CLV["hasGeometry"], Geometry))

In [None]:
# Add street toponyms to graph
for streetID, address in anncsuAddresses.iterrows():
    # DUG is Denominazione Urbanistica Generica (Via, Piazza, etc...)
    dugName = standardizeName(address["DUG"])
    # The street name
    streetName = standardizeName(address["DENOM_COMPLETA"])

    # Create street ref (st-streetID), where streetID is PROGR_NAZIONALE
    StreetToponym = URIRef(ANNCSU["st-" + str(streetID)])

    # Full name of the street (DUG + DENOM)
    fullName = "{} {}".format(
        standardizeName(address["DUG"]),
        standardizeName(address["DENOM_COMPLETA"])
    )

    # Create StreetToponym node
    g.add((StreetToponym, RDF.type, CLV.StreetToponym))
    g.add((StreetToponym, SKOS.inScheme, URIRef(ANNCSU)))
    g.add((StreetToponym, DC.title, Literal(fullName, datatype=XSD.string)))

    # Add attributes
    g.add((StreetToponym, CLV["toponymQualifier"],
           Literal(dugName, datatype=XSD.string)))
    g.add((StreetToponym, CLV["officialStreetName"],
          Literal(streetName, datatype=XSD.string)))

In [None]:
# Add addresses to graph
for civicID, civic in anncsuCivics.iterrows():
    # Civic attributes
    civicNumber = civic["CIVICO"]
    civicExponent = civic["ESPONENTE"]
    civicPeculiarity = civic["SPECIFICITA"]
    civicMeter = civic["SISTEMA_METRICO"]
    streetID = civic["PROGR_NAZIONALE"]

    # Civic full name (eg: 10/A)
    civicFullName = "{}{}{}{}".format(
        civicNumber if not pd.isna(civicNumber) else "",
        "/" + civicExponent if not pd.isna(civicExponent) else "",
        civicMeter if not pd.isna(civicMeter) else "",
        " " + civicPeculiarity if not pd.isna(civicPeculiarity) else "",
    )

    # Get address information from streetID
    address = anncsuAddresses.loc[streetID]

    # PostCode from configuration
    postCode = config.get("ANNCSU", "postCode")

    # Get census ref
    censID = int(civic["SEZIONE_DI_CENSIMENTO"])
    CensusSection = URIRef(ANNCSU["cs-" + str(censID)])

    # Get Street Toponym ref
    StreetToponym = URIRef(ANNCSU["st-" + str(streetID)])

    # Create civic number ref (cn-civicID)
    CivicNumbering = URIRef(ANNCSU["cn-" + str(civicID)])

    # Get address coordinates
    longitude = civic["COORDINATA_X"]
    latitude = civic["COORDINATA_Y"]
    altitude = civic["COORDINATA_Z"]

    # Get locality ref from address
    localityName = standardizeName(address["LOCALITA'"])
    AddressArea = URIRef(ANNCSU["loc-" + genNameForID(localityName)])

    # Create full name (dug street name, civic - postCode, locality)
    fullName = "{} {}, {} - {}, {}".format(
        standardizeName(address["DUG"]),
        standardizeName(address["DENOM_COMPLETA"]),
        civicFullName, postCode, localityName
    )

    # Create CivicNumbering node
    g.add((CivicNumbering, RDF.type, CLV.CivicNumbering))
    g.add((CivicNumbering, SKOS.inScheme, URIRef(ANNCSU)))
    g.add((CivicNumbering, DC.title, Literal(civicFullName, datatype=XSD.string)))

    # Add attributes
    if not pd.isna(civicNumber):
        g.add((CivicNumbering, CLV["streetNumber"],
              Literal(civicNumber, datatype=XSD.int)))

    if not pd.isna(civicExponent):
        g.add((CivicNumbering, CLV["exponent"],
              Literal(civicExponent, datatype=XSD.string)))

    if not pd.isna(civicPeculiarity):
        g.add((CivicNumbering, CLV["peculiarity"],
               Literal(civicExponent, datatype=XSD.string)))

    if not pd.isna(civicMeter):
        g.add((CivicNumbering, CLV["exponent"],
              Literal(civicMeter, datatype=XSD.int)))

    # Create final Address ref
    Address = URIRef(ANNCSU["ad-" + str(streetID) + "-" + str(civicID)])

    # Create Address node
    g.add((Address, RDF.type, CLV.Address))
    g.add((Address, SKOS.inScheme, URIRef(ANNCSU)))
    g.add((Address, DC.title, Literal(fullName, datatype=XSD.string)))

    # Set Address attributes
    g.add((Address, CLV["hasStreetToponym"], StreetToponym))
    g.add((Address, CLV["hasNumber"], CivicNumbering))
    g.add((Address, CLV["hasCensusSection"], CensusSection))
    g.add((Address, CLV["hasAddressArea"], AddressArea))

    # Create geometry for Address with geographic positioning
    if not pd.isna(longitude) and not pd.isna(latitude):
        Geometry = URIRef(ANNCSU["gcn-" + str(civicID)])

        g.add((Geometry, RDF.type, CLV.Geometry))
        g.add((Geometry, SKOS.inScheme, URIRef(ANNCSU)))
        g.add((Geometry, DC.title, Literal(fullName, datatype=XSD.string)))

        # Set the poing
        g.add((Geometry, CLV["hasGeometryType"], URIRef(CLV["Point"])))
        g.add((Geometry, CLV["lat"], Literal(latitude, datatype=XSD.double)))
        g.add((Geometry, CLV["long"], Literal(longitude, datatype=XSD.double)))

        if not pd.isna(altitude):
            g.add((Geometry, CLV["alt"], Literal(
                altitude, datatype=XSD.double)))

        g.add((Address, CLV["hasGeometry"], Geometry))

    # Set Address final attributes
    g.add((Address, CLV["postCode"], Literal(postCode, datatype=XSD.int)))
    g.add((Address, CLV["hasCity"],
           URIRef(CITIES[config.get("ANNCSU", "ontopia_ref")])))


In [None]:
# Save graph
saveGraph(g, "anncsu")