Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/path improvements #183

Merged
merged 9 commits into from Jun 11, 2018
Expand Up @@ -18,7 +18,7 @@ case class EntityDatasource(entities: Traversable[Entity], entitySchema: EntityS
val matchingPathMap = matchingPaths.toMap
if(matchingPaths.size != requestSchema.typedPaths.size) {
val missingPath = requestSchema.typedPaths.find(tp => !matchingPathMap.contains(tp))
throw new ValidationException("Some requested paths do not exist in data source, e.g. " + missingPath.get.serializeSimplified + "!")
throw new ValidationException("Some requested paths do not exist in data source, e.g. " + missingPath.get.serialize() + "!")
} else {
val matchingPathMap = matchingPaths.toMap
val valuesIndexes = requestSchema.typedPaths.map ( tp => matchingPathMap(tp) )
Expand Down
Expand Up @@ -26,7 +26,7 @@ trait PathCoverageDataSource {
}
}
}
PathCoverage(sourcePath.serializeSimplified, covered, fullyCovered)
PathCoverage(sourcePath.serialize(), covered, fullyCovered)
}
PathCoverageResult(pathCoverages)
}
Expand Down
Expand Up @@ -47,7 +47,7 @@ case class EntitySchema(
}
}

lazy val propertyNames: IndexedSeq[String] = this.typedPaths.map(p => p.serializeSimplified(Prefixes.default))
lazy val propertyNames: IndexedSeq[String] = this.typedPaths.map(p => p.serialize()(Prefixes.default))

def child(path: Path): EntitySchema = copy(subPath = Path(subPath.operators ::: path.operators))

Expand Down
47 changes: 26 additions & 21 deletions silk-core/src/main/scala/org/silkframework/entity/Path.scala
Expand Up @@ -24,18 +24,33 @@ import scala.ref.WeakReference
*/
class Path private[entity](val operators: List[PathOperator]) extends Serializable {

private lazy val serializedFull = serialize()
/**
* The normalized serialization using the Silk RDF path language.
* Guaranties that the following equivalence holds true: path1 == path2 <=> path1.normalizedSerialization == normalizedSerialization
*/
lazy val normalizedSerialization: String = serializePath(Prefixes.empty, stripForwardSlash = true)

/**
* Serializes this path using the Silk RDF path language.
*
* @param stripForwardSlash If true and if the path beginns with a forward operator, the first forward slash is stripped.
* @param prefixes The prefixes used to shorten the path. If no prefixes are provided the normalized serialization is returned.
*/
def serialize(implicit prefixes: Prefixes = Prefixes.empty): String = operators.map(_.serialize).mkString
def serialize(stripForwardSlash: Boolean = true)(implicit prefixes: Prefixes = Prefixes.empty): String = prefixes match {
case Prefixes.empty if stripForwardSlash => normalizedSerialization
case _ => serializePath(prefixes, stripForwardSlash)
}

/**
* Serializes this path using the simplified notation.
* Internal path serialization function.
*/
def serializeSimplified(implicit prefixes: Prefixes = Prefixes.empty): String = {
operators.map(_.serialize).mkString.stripPrefix("/")
private def serializePath(prefixes: Prefixes, stripForwardSlash: Boolean): String = {
val pathStr = operators.map(_.serialize(prefixes)).mkString
if(stripForwardSlash) {
pathStr.stripPrefix("/")
} else {
pathStr
}
}

/**
Expand All @@ -48,12 +63,8 @@ class Path private[entity](val operators: List[PathOperator]) extends Serializab
}

/**
* extracts either the fragment if available or the last path segment
* if neither is available => None
* @return
* Returns the number of operators in this path.
*/
def getLocalName: Option[String] = propertyUri.flatMap(_.localName )

def size: Int = operators.size

/**
Expand All @@ -66,32 +77,26 @@ class Path private[entity](val operators: List[PathOperator]) extends Serializab
*/
def ++(path: Path): Path = Path(operators ::: path.operators)

override def toString: String = serializedFull
override def toString: String = normalizedSerialization

/**
* Tests if this path equals another path
*/
override def equals(other: Any): Boolean = {
//Because of the path cache it is sufficient to compare by reference
// other match {
// case otherPath: Path => this eq otherPath
// case _ => false
// }
// As paths are serializable now, comparing by reference no longer suffices
other match {
case p: Path => serializedFull == p.serializedFull
case p: Path => normalizedSerialization == p.normalizedSerialization
case _ => false
}

}

override def hashCode: Int = toString.hashCode
override def hashCode: Int = normalizedSerialization.hashCode

/** Returns a [[org.silkframework.entity.TypedPath]] from this path with string type values. */
def asStringTypedPath: TypedPath = TypedPath(this.operators, StringValueType, isAttribute = false)
}

object Path {

private var pathCache = Map[String, WeakReference[Path]]()

def empty = new Path(List.empty)
Expand All @@ -103,7 +108,7 @@ object Path {
def apply(operators: List[PathOperator]): Path = {
val path = new Path(operators)

val pathStr = path.serialize
val pathStr = path.serialize()

//Remove all garbage collected paths from the map and try to return a cached path
synchronized {
Expand Down
Expand Up @@ -11,7 +11,7 @@ import scala.xml.Node
* @param ops the path operators
* @param valueType the type that has to be considered during processing.
*/
case class TypedPath(private val ops: List[PathOperator], valueType: ValueType, isAttribute: Boolean) extends Path(ops) {
case class TypedPath(ops: List[PathOperator], valueType: ValueType, isAttribute: Boolean) extends Path(ops) {

def this(path: Path, valueType: ValueType, isAttribute: Boolean) = this(path.operators, valueType, isAttribute)

Expand Down Expand Up @@ -58,7 +58,7 @@ object TypedPath {
implicit val p = writeContext.prefixes
<TypedPath isAttribute={typedPath.isAttribute.toString} >
<Path>
{typedPath.serialize}
{typedPath.normalizedSerialization}
</Path>{XmlSerialization.toXml(typedPath.valueType)}
</TypedPath>
}
Expand Down
Expand Up @@ -143,12 +143,6 @@ object ValueType {
}.toMap
}

object Test {
def main(args: Array[String]): Unit = {
println(ValueType.valueTypeMapByStringId.keys.toSeq.sorted.mkString("\n"))
}
}

/**
* If this value type is set, then the values can be transformed to any valid value that can be inferred from the
* lexical form, e.g. "1" can be an Int, but also a String.
Expand Down
Expand Up @@ -15,7 +15,7 @@
package org.silkframework.entity.rdf

import org.silkframework.config.Prefixes
import org.silkframework.entity.{BackwardOperator, EntitySchema, ForwardOperator, Path}
import org.silkframework.entity.{EntitySchema, Path}
import org.silkframework.runtime.serialization.{ReadContext, WriteContext, XmlFormat}
import org.silkframework.util.Uri

Expand Down Expand Up @@ -103,7 +103,7 @@ object SparqlEntitySchema {
<Restrictions>{desc.restrictions.toSparql}</Restrictions>
<Paths> {
for (path <- desc.paths) yield {
<Path>{path.serialize(Prefixes.empty)}</Path>
<Path>{path.serialize()(Prefixes.empty)}</Path>
}
}
</Paths>
Expand Down
30 changes: 14 additions & 16 deletions silk-core/src/main/scala/org/silkframework/util/Uri.scala
Expand Up @@ -22,18 +22,14 @@ import scala.language.implicitConversions
import scala.util.{Success, Try}

/**
* Represents a URI.
*
* Three notations are supported for representing URIs
* 1. Prefixed notation: prefix:name
* 2. Full URI: <http://dbpedia.org/resource/Berlin>
* 3. Plain Identifiers: Name
* Represents a URI-like identifier.
*
* Note that this class does not enforce that a given URI is valid according to
* <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.
* Call [[isValidUri]] to determine whether an instance represents a valid URI.
*
* @param uri The full (and normalized) representation of the URI-like identifier.
*/
//noinspection ScalaStyle
case class Uri(uri: String) {

/**
Expand Down Expand Up @@ -75,8 +71,14 @@ case class Uri(uri: String) {
case _ => false
}

/**
* Returns the full representation.
*/
override def toString: String = uri

/**
* Generates a Java URI instance if this is a valid URI and fails otherwise.
*/
def toURI: Try[URI] = Try{new URI(uri)}

/**
Expand All @@ -89,10 +91,6 @@ case class Uri(uri: String) {
case Success(u) if u.getPath != null && u.getPath.nonEmpty => Some(u.getPath.substring(u.getPath.lastIndexOf("/") + 1))
case _ => None
}

override def hashCode(): Int = uri.hashCode

override def equals(obj: scala.Any): Boolean = obj.isInstanceOf[Uri] && uri == obj.toString
}

object Uri {
Expand All @@ -114,12 +112,12 @@ object Uri {
}

/**
* Parses an URI in turtle-like notation.
* Parses a URI from a turtle-like notation.
*
* Examples:
* - dbpedia:Berlin
* - <http://dbpedia.org/resource/Berlin>
* - someName
* Three notations are supported for representing URIs
* 1. Prefixed notation: prefix:name
* 2. Full URI: <http://dbpedia.org/resource/Berlin>
* 3. Plain Identifiers: Name
*/
def parse(str: String, prefixes: Prefixes = Prefixes.empty): Uri = {
if (str.startsWith("<")) {
Expand Down

This file was deleted.