+ *
+ * @param file The file to create the mapping from
+ * @return a map containing the key-value pairs from the file
+ */
+ private Map createMappingFromFile(String file) {
+ File mappingFile = new File(file);
+ HashMap newMapping = new HashMap<>();
+ try {
+ BufferedReader reader = new BufferedReader(new FileReader(mappingFile));
+
+ while (reader.ready()) {
+ String line = reader.readLine();
+ line = line.trim();
+ String[] keyValue = line.split(" ");
+
+ if (keyValue.length != 2) {
+ reader.close();
+ throw new TurRuntimeException("Invalid line: " + line);
+ }
+
+ if (newMapping.putIfAbsent(keyValue[0], keyValue[1]) != null) {
+ log.warn("Duplicated key: {}", keyValue[0]);
+ }
+ }
+ reader.close();
+ return newMapping;
+ } catch (FileNotFoundException e) {
+ log.error("Mapping file not found: {}", file, e);
+ throw new IllegalArgumentException("The specified file does not exist: " + file, e);
+ } catch (IOException e) {
+ log.error("Error reading file: {}", file, e);
+ throw new UncheckedIOException("Error occurred while reading the file: " + file, e);
+ }
+ }
+
+ public Map getMapping(String file) {
+ return mappings.get(file);
+ }
+
+ /**
+ * Transforms the given key using the mapping obtained from the specified file.
+ *
+ * @param key the key to be transformed
+ * @param file the file from which the mapping is obtained
+ * @return the transformed value corresponding to the key, or a treated value if the key is not found
+ */
+ public String transform(String key, String file) {
+ Map mapping = getMapping(file);
+ return mapping.getOrDefault(key, getKey(key, file));
+ }
+
+ private String getKey(String key, String file) {
+
+ if (Objects.requireNonNull(notFoundAction) == TurSprinklrNotFound.KEEP_KEY) {
+ log.info("Key found: {} in file: {}", key, file);
+ return key;
+ } else {
+ log.info("Key not found: {} in file: {}", key, file);
+ }
+ return null;
+ }
+
+ public void configure(TurSprinklrNotFound action) {
+ notFoundAction = action;
+ }
+
+ /**
+ * Configures the action to be taken when a key is not found and sets the default text for such cases.
+ *
+ * @param action the action to be taken when a key is not found
+ * @param defaultText the default text to be used when a key is not found
+ */
+ public void configure(TurSprinklrNotFound action, String defaultText) {
+ notFoundAction = action;
+ defaultNotFoundText = defaultText;
+ }
+
+ @Override
+ public String getName() {
+ return "Key Value Transformer Plugin";
+ }
+
+ @Override
+ public String getDescription() {
+ return "Transforms a value into another value based on a key-value mapping file";
+ }
+}
\ No newline at end of file
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrNotFound.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrNotFound.java
new file mode 100644
index 00000000000..d7ed3b4dba3
--- /dev/null
+++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrNotFound.java
@@ -0,0 +1,32 @@
+/*
+ *
+ * Copyright (C) 2016-2024 the original author or authors.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+package com.viglet.turing.connector.sprinklr.commons.plugins;
+/**
+ * Enum representing the actions to take when a key is not found.
+ *
+ *
DEFAULT_VALUE: Returns a default value
+ *
KEEP_KEY: Returns the key itself
+ *
NULL: Returns null
+ *
+ */
+public enum TurSprinklrNotFound {
+ DEFAULT_VALUE,
+ KEEP_KEY,
+ NULL
+}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPlugin.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPlugin.java
new file mode 100644
index 00000000000..a66b02d69f0
--- /dev/null
+++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPlugin.java
@@ -0,0 +1,36 @@
+/*
+ *
+ * Copyright (C) 2016-2024 the original author or authors.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+package com.viglet.turing.connector.sprinklr.commons.plugins;
+
+
+public interface TurSprinklrPlugin {
+ /**
+ * Retrieves the name associated with this plugin.
+ *
+ * @return the name of the plugin
+ */
+ String getName();
+ /**
+ * Retrieves the description of the plugin.
+ *
+ * @return a String representing the description of the plugin.
+ */
+ String getDescription();
+
+}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPluginContext.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPluginContext.java
new file mode 100644
index 00000000000..7ceaa9814ca
--- /dev/null
+++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/plugins/TurSprinklrPluginContext.java
@@ -0,0 +1,50 @@
+/*
+ *
+ * Copyright (C) 2016-2024 the original author or authors.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+package com.viglet.turing.connector.sprinklr.commons.plugins;
+
+import com.viglet.turing.commons.exception.TurRuntimeException;
+import lombok.ToString;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@ToString
+public class TurSprinklrPluginContext {
+ List initializedPlugins = new ArrayList<>();
+
+ public TurSprinklrPluginContext(List initializedPlugins){
+ this.initializedPlugins = initializedPlugins;
+ }
+
+ public TurSprinklrPluginContext(){
+ }
+
+ public void addPlugin(TurSprinklrPlugin plugin){
+ initializedPlugins.add(plugin);
+ }
+
+ public T getPlugin(Class pluginClass){
+ for (TurSprinklrPlugin plugin : initializedPlugins){
+ if (pluginClass.isInstance(plugin)){
+ return pluginClass.cast(plugin);
+ }
+ }
+ throw new TurRuntimeException("Plugin not found" + pluginClass.getName());
+ }
+}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/tools/KeyValueTransformerTool.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/tools/KeyValueTransformerTool.java
new file mode 100644
index 00000000000..2c8adf91bc7
--- /dev/null
+++ b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/commons/tools/KeyValueTransformerTool.java
@@ -0,0 +1,33 @@
+package com.viglet.turing.connector.sprinklr.commons.tools;
+
+import com.viglet.turing.connector.sprinklr.commons.plugins.TurSprinklrKeyValueTransformer;
+import com.viglet.turing.connector.sprinklr.commons.plugins.TurSprinklrPluginContext;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class KeyValueTransformerTool {
+ private final TurSprinklrPluginContext pluginContext;
+
+ public KeyValueTransformerTool(TurSprinklrPluginContext pluginContext) {
+ this.pluginContext = pluginContext;
+ }
+ /**
+ * Transforms a key into a value using the `KeyValueTransformer` and the specified file.
+ *
+ * @param key The key that will be transformed.
+ * @param file The name of the file containing the mapping.
+ * @return The transformed value, or null if the value is not found.
+ */
+ public String transform(String key, String file) {
+ // Retrieves the KeyValueTransformer plugin
+ TurSprinklrKeyValueTransformer transformer = pluginContext.getPlugin(TurSprinklrKeyValueTransformer.class);
+ if (transformer == null) {
+ log.error("KeyValueTransformer not found in PluginContext.");
+ return null;
+ }
+ transformer.loadMapping(file);
+ // Perform the transformation
+ return transformer.transform(key, file);
+ }
+
+}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtCategory.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtCategory.java
deleted file mode 100644
index f1f916fabdb..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtCategory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-import com.viglet.turing.sprinklr.client.service.folder.TurSprinklrFolderService;
-import com.viglet.turing.sprinklr.client.service.token.TurSprinklrAccessToken;
-import lombok.extern.slf4j.Slf4j;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-
-@Slf4j
-public class TurSprinklrExtCategory implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- List categories = context.getSearchResult().getMappingDetails().getFirst().getMappedCategoryIds();
- TurSprinklrAccessToken turSprinklrAccessToken = context.getAccessToken();
- TurSprinklrFolderService turSprinklrFolderService = new TurSprinklrFolderService(turSprinklrAccessToken);
- List categoryNames = new ArrayList<>();
- if (!categories.isEmpty() && turSprinklrAccessToken != null) {
- categories.forEach(categoryId -> turSprinklrFolderService.getByCategoryId(categoryId)
- .ifPresentOrElse(c -> categoryNames.add(c.getName()),
- () -> categoryNames.add(categoryId)));
- return Optional.of(new TurMultiValue(categoryNames));
- } else {
- return Optional.empty();
- }
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtDescription.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtDescription.java
deleted file mode 100644
index 372808069c0..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtDescription.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.commons.utils.TurCommonsUtils;
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public class TurSprinklrExtDescription implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem(
- TurCommonsUtils.html2Description(
- context.getSearchResult().getContent().getMarkUpText(),
- 300)));
-
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtId.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtId.java
deleted file mode 100644
index 049829f1972..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtId.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public class TurSprinklrExtId implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem("sprinklr_" + context.getSearchResult().getId()));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtInterface.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtInterface.java
deleted file mode 100644
index 28fe0d21105..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtInterface.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public interface TurSprinklrExtInterface {
- Optional consume(TurSprinklrContext context);
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocale.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocale.java
deleted file mode 100644
index 4de7c66da89..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocale.java
+++ /dev/null
@@ -1,13 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Locale;
-
-public class TurSprinklrExtLocale implements TurSprinklrExtLocaleInterface {
-
- @Override
- public Locale consume(TurSprinklrContext context) {
- return context.getSearchResult().getLocale();
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocaleInterface.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocaleInterface.java
deleted file mode 100644
index 4940b926345..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtLocaleInterface.java
+++ /dev/null
@@ -1,9 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Locale;
-
-public interface TurSprinklrExtLocaleInterface {
- Locale consume(TurSprinklrContext context);
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtModificationDate.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtModificationDate.java
deleted file mode 100644
index 2a95150d8a4..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtModificationDate.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-import lombok.extern.slf4j.Slf4j;
-
-import java.util.Optional;
-
-@Slf4j
-public class TurSprinklrExtModificationDate implements TurSprinklrExtInterface {
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem(context.getSearchResult().getModifiedTime()));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtPublicationDate.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtPublicationDate.java
deleted file mode 100644
index b49210e83cc..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtPublicationDate.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-import lombok.extern.slf4j.Slf4j;
-
-import java.util.Optional;
-
-@Slf4j
-public class TurSprinklrExtPublicationDate implements TurSprinklrExtInterface {
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem(context.getSearchResult().getPublishingDate()));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTag.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTag.java
deleted file mode 100644
index a0600392821..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTag.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.List;
-import java.util.Optional;
-
-public class TurSprinklrExtTag implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- List tags = context.getSearchResult().getTags();
- if (!tags.isEmpty()) {
- return Optional.of(new TurMultiValue(tags));
- }
- else {
- return Optional.empty();
- }
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtText.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtText.java
deleted file mode 100644
index af9fc974c0e..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtText.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.commons.utils.TurCommonsUtils;
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public class TurSprinklrExtText implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of( TurMultiValue.singleItem(
- TurCommonsUtils.html2Text(context.getSearchResult().getContent().getMarkUpText())));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTitle.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTitle.java
deleted file mode 100644
index 538d440b921..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtTitle.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public class TurSprinklrExtTitle implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem(context.getSearchResult().getContent().getTitle()));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtType.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtType.java
deleted file mode 100644
index b5e1cf3ab48..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtType.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-
-import java.util.Optional;
-
-public class TurSprinklrExtType implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(TurMultiValue.singleItem("%s:%s"
- .formatted(context.getSearchResult().getContent().getContentType(),
- context.getSearchResult().getContent().getContentSubType())));
- }
-}
diff --git a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtUrl.java b/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtUrl.java
deleted file mode 100644
index 362e69a1ead..00000000000
--- a/turing-sprinklr/sprinklr-commons/src/main/java/com/viglet/turing/connector/sprinklr/ext/TurSprinklrExtUrl.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package com.viglet.turing.connector.sprinklr.ext;
-
-import com.viglet.turing.client.sn.TurMultiValue;
-import com.viglet.turing.connector.sprinklr.TurSprinklrContext;
-import com.viglet.turing.sprinklr.client.service.kb.response.TurSprinklrSearchResult;
-
-import java.util.Optional;
-
-public class TurSprinklrExtUrl implements TurSprinklrExtInterface {
-
- @Override
- public Optional consume(TurSprinklrContext context) {
- return Optional.of(Optional.ofNullable(context.getSearchResult())
- .map(TurSprinklrSearchResult::getMappingDetails)
- .filter(turSprinklrMappings -> !turSprinklrMappings.isEmpty())
- .map(turSprinklrMappings ->
- TurMultiValue.singleItem(turSprinklrMappings.getFirst().getCommunityPermalink()))
- .orElse(TurMultiValue.empty()));
-
- }
-}
diff --git a/turing-sprinklr/sprinklr-java-sdk/pom.xml b/turing-sprinklr/sprinklr-java-sdk/pom.xml
index 967bf8e914e..fb9767a7774 100644
--- a/turing-sprinklr/sprinklr-java-sdk/pom.xml
+++ b/turing-sprinklr/sprinklr-java-sdk/pom.xml
@@ -45,6 +45,10 @@
guava33.3.1-jre
+
+ com.viglet.turing
+ turing-commons
+
diff --git a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/TurSprinklrService.java b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/TurSprinklrService.java
index 971fe696ba8..4bfecbfa50f 100644
--- a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/TurSprinklrService.java
+++ b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/TurSprinklrService.java
@@ -18,9 +18,12 @@
package com.viglet.turing.sprinklr.client.service;
+import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.viglet.turing.sprinklr.client.service.token.TurSprinklrAccessToken;
+import com.viglet.turing.sprinklr.client.service.token.TurSprinklrSecretKey;
+import com.viglet.turing.sprinklr.client.service.token.TurSprinklrTokenService;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
@@ -50,7 +53,8 @@ public static R executeService(Class clazz, TurSprinklrAccessToken turSpr
RequestBody requestBody) {
log.info("Post Request: {}", endpoint);
// Creates a client to send a request
- return getResponse(clazz, getRequest(turSprinklrAccessToken, endpoint, requestBody));
+ return getResponse(clazz, getRequest(turSprinklrAccessToken, endpoint, requestBody), turSprinklrAccessToken,
+ true);
}
private static Request getRequest(TurSprinklrAccessToken turSprinklrAccessToken, String endpoint,
@@ -67,13 +71,33 @@ private static Request getRequest(TurSprinklrAccessToken turSprinklrAccessToken,
return request;
}
- private static R getResponse(Class clazz, Request request) {
+ private static R getResponse(Class clazz, Request request, TurSprinklrAccessToken turSprinklrAccessToken,
+ boolean firstTime) {
+ String responseBody = null;
try (Response response = new OkHttpClient().newBuilder().build().newCall(request).execute()) {
- if (response.body() != null) {
- return new ObjectMapper()
- .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
- .readValue(response.body().string(), clazz);
+ log.info("HTTP Status Code: {}", response.code());
+ if (response.code() == 200) {
+ responseBody = response.peekBody(500L).string();
+ if (response.body() != null) {
+ return new ObjectMapper()
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+ .readValue(response.body().string(), clazz);
+ }
}
+ else if (firstTime) {
+ TurSprinklrTokenService turSprinklrTokenService = new TurSprinklrTokenService(
+ TurSprinklrSecretKey.builder()
+ .apiKey(turSprinklrAccessToken.getApiKey())
+ .secretKey(turSprinklrAccessToken.getSecretKey())
+ .environment(turSprinklrAccessToken.getEnvironment())
+ .build()
+ );
+ getResponse(clazz, request, turSprinklrTokenService.renewAccessToken(), false);
+ }
+
+ } catch (JsonParseException e) {
+ log.error("Error parsing the response", e);
+ log.error("The body of the response is: {}", responseBody);
} catch (IOException e) {
log.error(e.getMessage(), e);
}
diff --git a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrAccessToken.java b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrAccessToken.java
index 25392b95194..fbd5ec16d28 100644
--- a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrAccessToken.java
+++ b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrAccessToken.java
@@ -42,4 +42,5 @@ public class TurSprinklrAccessToken implements Serializable {
private int expiresIn;
private Date expirationDate;
private String environment;
+ private String secretKey;
}
diff --git a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrTokenService.java b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrTokenService.java
index 66716fefeb8..793de180339 100644
--- a/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrTokenService.java
+++ b/turing-sprinklr/sprinklr-java-sdk/src/main/java/com/viglet/turing/sprinklr/client/service/token/TurSprinklrTokenService.java
@@ -57,6 +57,10 @@ public TurSprinklrAccessToken getAccessToken() {
}
}
+ public TurSprinklrAccessToken renewAccessToken() {
+ return serializeAccessToken(turSprinklrSecretKey);
+ }
+
private TurSprinklrAccessToken serializeAccessToken(TurSprinklrSecretKey turSprinklrSecretKey) {
TurSprinklrAccessToken turSprinklrAccessToken = generateAccessToken(turSprinklrSecretKey);
if (turSprinklrAccessToken != null) {
@@ -100,6 +104,7 @@ private TurSprinklrAccessToken generateAccessToken(TurSprinklrSecretKey turSprin
turSprinklrAccessToken.setApiKey(turSprinklrSecretKey.getApiKey());
turSprinklrAccessToken.setExpirationDate(getExprirationDate(turSprinklrAccessToken));
turSprinklrAccessToken.setEnvironment(turSprinklrSecretKey.getEnvironment());
+ turSprinklrAccessToken.setSecretKey(turSprinklrSecretKey.getSecretKey());
return turSprinklrAccessToken;
} catch (IOException e) {
log.error(e.getMessage(), e);
diff --git a/turing-ui/projects/console/src/sn/component/site/sn-site-ui-page.component.html b/turing-ui/projects/console/src/sn/component/site/sn-site-ui-page.component.html
index c7994750c95..3877352c9c4 100644
--- a/turing-ui/projects/console/src/sn/component/site/sn-site-ui-page.component.html
+++ b/turing-ui/projects/console/src/sn/component/site/sn-site-ui-page.component.html
@@ -20,6 +20,17 @@
+
+
+
+ Exact match when using double quotes. Will use default field to execute the query.
+
+
Wildcard
@@ -226,6 +237,30 @@
What is the fields that will be used on search results display.
+
+
+
+
+
+
+
+ Exact match field when no field was specified in the query and it uses double quote.
+
+
+
+
+
+
+
+
+
+
+ Default field when no field was specified in the query.
+
+
+
diff --git a/turing-ui/projects/console/src/sn/model/sn-site.model.ts b/turing-ui/projects/console/src/sn/model/sn-site.model.ts
index f6c88a7f455..0a22dea9feb 100644
--- a/turing-ui/projects/console/src/sn/model/sn-site.model.ts
+++ b/turing-ui/projects/console/src/sn/model/sn-site.model.ts
@@ -7,6 +7,8 @@ export interface TurSNSite {
id: string;
name: string;
description: string;
+ exactMatchField: string;
+ defaultField: string;
defaultTitleField: string;
defaultDescriptionField: string;
defaultTextField: string;
@@ -32,4 +34,5 @@ export interface TurSNSite {
facetSort: TurSNSiteFacetSortEnum;
wildcardNoResults: number;
wildcardAlways: number;
+ exactMatch: number;
}
diff --git a/turing-web-crawler/.gitignore b/turing-web-crawler/.gitignore
index b83d22266ac..047f8eeee6d 100644
--- a/turing-web-crawler/.gitignore
+++ b/turing-web-crawler/.gitignore
@@ -1 +1,2 @@
/target/
+/wc-plugin/target/
diff --git a/turing-web-crawler/pom.xml b/turing-web-crawler/pom.xml
index 83013f608f6..7795c95a672 100644
--- a/turing-web-crawler/pom.xml
+++ b/turing-web-crawler/pom.xml
@@ -40,6 +40,7 @@
wc-commonswc-appwc-sample
+ wc-plugin
diff --git a/turing-web-crawler/wc-app/pom.xml b/turing-web-crawler/wc-app/pom.xml
index 63e28338a0c..0cadb444d30 100644
--- a/turing-web-crawler/wc-app/pom.xml
+++ b/turing-web-crawler/wc-app/pom.xml
@@ -65,7 +65,7 @@
io.swagger.core.v3swagger-annotations-jakarta
- 2.2.26
+ 2.2.27compile
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCProcess.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCProcess.java
index 7b259746227..94d8888ba81 100644
--- a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCProcess.java
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCProcess.java
@@ -15,10 +15,7 @@
import com.viglet.turing.connector.webcrawler.commons.ext.TurWCExtLocaleInterface;
import com.viglet.turing.connector.webcrawler.persistence.model.TurWCAttributeMapping;
import com.viglet.turing.connector.webcrawler.persistence.model.TurWCSource;
-import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCAllowUrlRepository;
-import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCAttributeMappingRepository;
-import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCFileExtensionRepository;
-import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCNotAllowUrlRepository;
+import com.viglet.turing.connector.webcrawler.persistence.repository.*;
import generator.RandomUserAgentGenerator;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
@@ -45,14 +42,21 @@ public class TurWCProcess {
public static final String JAVASCRIPT = "javascript:";
public static final String A_HREF = "a[href]";
public static final String ABS_HREF = "abs:href";
+ public static final String WILD_CARD = "*";
private final String turingUrl;
private final String turingApiKey;
+ private final List startingPoints = new ArrayList<>();
+ private final List allowUrls = new ArrayList<>();
+ private final List allowStartsWithUrls = new ArrayList<>();
private final List notAllowUrls = new ArrayList<>();
+ private final List notAllowStartsWithUrls = new ArrayList<>();
private final List notAllowExtensions = new ArrayList<>();
+ private final TurWCStartingPointRepository turWCStartingPointsRepository;
private TurSNJobItems turSNJobItems = new TurSNJobItems();
private final String userAgent = RandomUserAgentGenerator.getNextNonMobile();
private final Set visitedLinks = new HashSet<>();
- private final Queue remainingLinks = new LinkedList<>();
+ private final Set indexedLinks = new HashSet<>();
+ private final Queue queueLinks = new LinkedList<>();
private String website;
private Collection snSites;
private final int timeout;
@@ -74,7 +78,7 @@ public TurWCProcess(@Value("${turing.url}") String turingUrl,
TurWCAllowUrlRepository turWCAllowUrlRepository,
TurWCNotAllowUrlRepository turWCNotAllowUrlRepository,
TurWCFileExtensionRepository turWCFileExtensionRepository,
- TurWCAttributeMappingRepository turWCAttributeMappingRepository) {
+ TurWCAttributeMappingRepository turWCAttributeMappingRepository, TurWCStartingPointRepository turWCStartingPointsRepository) {
this.turingUrl = turingUrl;
this.turingApiKey = turingApiKey;
this.timeout = timeout;
@@ -84,26 +88,45 @@ public TurWCProcess(@Value("${turing.url}") String turingUrl,
this.turWCNotAllowUrlRepository = turWCNotAllowUrlRepository;
this.turWCFileExtensionRepository = turWCFileExtensionRepository;
this.turWCAttributeMappingRepository = turWCAttributeMappingRepository;
+ this.turWCStartingPointsRepository = turWCStartingPointsRepository;
}
public void start(TurWCSource turWCSource) {
reset();
- turWCFileExtensionRepository.findByTurWCSource(turWCSource).ifPresent(source -> source.forEach(turWCFileExtension ->
- this.notAllowExtensions.add(turWCFileExtension.getExtension())));
- turWCNotAllowUrlRepository.findByTurWCSource(turWCSource).ifPresent(source -> source.forEach(turWCNotAllowUrl ->
- this.notAllowUrls.add(turWCNotAllowUrl.getUrl())));
-
+ turWCFileExtensionRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCFileExtension ->
+ this.notAllowExtensions.add(turWCFileExtension.getExtension())));
+ turWCNotAllowUrlRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCNotAllowUrl -> {
+ if (turWCNotAllowUrl.getUrl().trim().endsWith(WILD_CARD)) {
+ this.notAllowStartsWithUrls.add(StringUtils.chop(turWCNotAllowUrl.getUrl()));
+ } else {
+ this.notAllowUrls.add(turWCNotAllowUrl.getUrl());
+ }
+ }
+ ));
+ turWCAllowUrlRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCAllowUrl -> {
+ if (turWCAllowUrl.getUrl().trim().endsWith(WILD_CARD)) {
+ this.allowStartsWithUrls.add(StringUtils.chop(turWCAllowUrl.getUrl().trim()));
+ } else {
+ this.allowUrls.add(turWCAllowUrl.getUrl());
+ }
+ }
+ ));
+ turWCStartingPointsRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCStartingPoint ->
+ this.startingPoints.add(turWCStartingPoint.getUrl())
+ ));
this.website = turWCSource.getUrl();
this.snSites = turWCSource.getTurSNSites();
this.username = turWCSource.getUsername();
this.password = turWCSource.getPassword();
log.info("User Agent: {}", userAgent);
- turWCAllowUrlRepository
- .findByTurWCSource(turWCSource).ifPresent(source ->
- source.forEach(turWCAllowUrl -> {
- remainingLinks.add(this.website + turWCAllowUrl.getUrl());
- getPagesFromQueue(turWCSource);
- }));
+ startingPoints.forEach(url -> {
+ queueLinks.offer(this.website + url);
+ getPagesFromQueue(turWCSource);
+ });
if (turSNJobItems.size() > 0) {
sendToTuring();
getInfoQueue();
@@ -118,12 +141,13 @@ private void reset() {
private void getInfoQueue() {
log.info("Total Job Item: {}", Iterators.size(turSNJobItems.iterator()));
log.info("Total Visited Links: {}", (long) visitedLinks.size());
- log.info("Queue Size: {}", (long) remainingLinks.size());
+ log.info("Total Indexed Links: {}", (long) indexedLinks.size());
+ log.info("Queue Size: {}", (long) queueLinks.size());
}
public void getPagesFromQueue(TurWCSource turWCSource) {
- while (!remainingLinks.isEmpty()) {
- String url = remainingLinks.poll();
+ while (!queueLinks.isEmpty()) {
+ String url = queueLinks.poll();
getPage(turWCSource, url);
sendToTuringWhenMaxSize();
getInfoQueue();
@@ -135,36 +159,52 @@ public TurSNJobItem getPage(TurWCSource turWCSource, String url) {
log.info("{}: {}", url, turWCSource.getTurSNSites());
Document document = getHTML(url);
getPageLinks(document);
- return addTurSNJobItems(turWCSource, document, url);
+ String pageUrl = getPageUrl(url);
+ if (canBeIndexed(pageUrl)) {
+ indexedLinks.add(pageUrl);
+ return addTurSNJobItems(turWCSource, document, url);
+ } else {
+ log.debug("Ignored: {}", url);
+ }
} catch (IOException e) {
- log.error(e.getMessage());
+ log.error(e.getMessage(), e);
}
-
return new TurSNJobItem();
}
- private TurSNJobItem addTurSNJobItems(TurWCSource turWCSource, Document document, String url) {
- TurSNJobItem turSNJobItem = new TurSNJobItem(TurSNJobAction.CREATE, new ArrayList<>(snSites),
- getLocale(turWCSource, document, url),
- getJobItemAttributes(turWCSource, document, url));
- turSNJobItems.add(turSNJobItem);
- return turSNJobItem;
- }
-
private void getPageLinks(Document document) {
document.select(A_HREF).forEach(page -> addPageToQueue(getPageUrl(page.attr(ABS_HREF))));
}
private void addPageToQueue(String pageUrl) {
- if (canBeIndexed(pageUrl)) {
- if (visitedLinks.add(pageUrl) && !remainingLinks.offer(pageUrl)) {
- log.error("Item didn't add to queue: {}", pageUrl);
- }
- } else {
- log.debug("Ignored: {}", pageUrl);
+ if (canBeAddToQueue(pageUrl) && visitedLinks.add(pageUrl) && !queueLinks.offer(pageUrl)) {
+ log.error("Item didn't add to queue: {}", pageUrl);
}
}
+ private boolean isValidToAddQueue(String pageUrl) {
+ return isNotMailUrl(pageUrl)
+ && isNotTelUrl(pageUrl)
+ && !StringUtils.equalsAny(pageUrl, queueLinks.toArray(new String[0]))
+ && !isSharpUrl(pageUrl) && !isPagination(pageUrl) && !isJavascriptUrl(pageUrl)
+ && pageUrl.startsWith(this.website)
+ && (
+ StringUtils.startsWithAny(getRelativePageUrl(pageUrl), allowStartsWithUrls.toArray(new String[0]))
+ || StringUtils.equalsAny(getRelativePageUrl(pageUrl), allowUrls.toArray(new String[0]))
+ )
+ && !StringUtils.startsWithAny(getRelativePageUrl(pageUrl), notAllowStartsWithUrls.toArray(new String[0]))
+ && !StringUtils.equalsAny(getRelativePageUrl(pageUrl), notAllowUrls.toArray(new String[0]))
+ && !StringUtils.endsWithAny(pageUrl, notAllowExtensions.toArray(new String[0]));
+ }
+
+ private TurSNJobItem addTurSNJobItems(TurWCSource turWCSource, Document document, String url) {
+ TurSNJobItem turSNJobItem = new TurSNJobItem(TurSNJobAction.CREATE, new ArrayList<>(snSites),
+ getLocale(turWCSource, document, url),
+ getJobItemAttributes(turWCSource, document, url));
+ turSNJobItems.add(turSNJobItem);
+ return turSNJobItem;
+ }
+
private void sendToTuringWhenMaxSize() {
if (turSNJobItems.size() >= jobSize) {
sendToTuring();
@@ -235,7 +275,6 @@ private static void addItemToArray(Map attributes, String attrib
.stream().map(String.class::cast).toList());
attributeValues.add(attributeValue);
attributes.put(attributeName, attributeValues);
-
}
private void addFirstItemToAttribute(String attributeName,
@@ -244,7 +283,6 @@ private void addFirstItemToAttribute(String attributeName,
attributes.put(attributeName, attributeValue);
}
-
private void sendToTuring() {
if (log.isDebugEnabled()) {
for (TurSNJobItem turSNJobItem : turSNJobItems) {
@@ -288,16 +326,16 @@ public TurWCContext getTurWCContext(Document document, String url) {
}
private boolean canBeIndexed(String pageUrl) {
- return !isSharpUrl(pageUrl) && !isPagination(pageUrl) && !isJavascriptUrl(pageUrl)
- && pageUrl.startsWith(this.website)
- && !StringUtils.startsWithAny(getRelativePageUrl(pageUrl),
- notAllowUrls.toArray(new String[0]))
- && !StringUtils.endsWithAny(pageUrl,
- notAllowExtensions.toArray(new String[0]))
- && !StringUtils.equalsAny(pageUrl,
- visitedLinks.toArray(new String[0]));
+ return isValidToAddQueue(pageUrl)
+ && !StringUtils.equalsAny(pageUrl, indexedLinks.toArray(new String[0]));
+ }
+
+ private boolean canBeAddToQueue(String pageUrl) {
+ return isValidToAddQueue(pageUrl)
+ && !StringUtils.equalsAny(pageUrl, visitedLinks.toArray(new String[0]));
}
+
private static boolean isJavascriptUrl(String pageUrl) {
return pageUrl.contains(JAVASCRIPT);
}
@@ -382,4 +420,4 @@ private String getUrlWithoutParameters(String url) {
return url;
}
}
-}
+}
\ No newline at end of file
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCScheduledTasks.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCScheduledTasks.java
index 37f2ed2a249..4fac8759285 100644
--- a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCScheduledTasks.java
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/TurWCScheduledTasks.java
@@ -1,6 +1,7 @@
package com.viglet.turing.connector.webcrawler;
import com.google.inject.Inject;
+import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCConfigVarRepository;
import com.viglet.turing.connector.webcrawler.persistence.repository.TurWCSourceRepository;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
@@ -14,18 +15,28 @@
@Component
public class TurWCScheduledTasks {
private final TurWCSourceRepository turWCSourceRepository;
+ private final TurWCConfigVarRepository turWCConfigVarRepository;
private final TurWCProcess turWCProcess;
private final SimpleDateFormat dateFormat = new SimpleDateFormat("HH:mm:ss");
+ public static final String FIRST_TIME = "FIRST_TIME";
@Inject
- public TurWCScheduledTasks(TurWCSourceRepository turWCSourceRepository, TurWCProcess turWCProcess) {
+ public TurWCScheduledTasks(TurWCSourceRepository turWCSourceRepository,
+ TurWCConfigVarRepository turWCConfigVarRepository,
+ TurWCProcess turWCProcess) {
this.turWCSourceRepository = turWCSourceRepository;
+ this.turWCConfigVarRepository = turWCConfigVarRepository;
this.turWCProcess = turWCProcess;
}
@Scheduled(fixedDelay = 60, timeUnit = TimeUnit.MINUTES)
public void executeWebCrawler() {
log.info("The time is now {}", dateFormat.format(new Date()));
- turWCSourceRepository.findAll().forEach(turWCProcess::start);
+ if (turWCConfigVarRepository.findById(FIRST_TIME).isEmpty()) {
+ log.info("This is the first time, waiting next schedule.");
+ } else {
+ log.info("Starting indexing");
+ turWCSourceRepository.findAll().forEach(turWCProcess::start);
+ }
}
}
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/TurWCExchangeProcess.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/TurWCExchangeProcess.java
index b60b8118d03..0bcc3e76941 100644
--- a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/TurWCExchangeProcess.java
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/TurWCExchangeProcess.java
@@ -31,7 +31,9 @@
public class TurWCExchangeProcess {
private static final String EXPORT_FILE = "export.json";
private final TurWCSourceRepository turWCSourceRepository;
+
private final TurWCAllowUrlRepository turWCAllowUrlRepository;
+ private final TurWCStartingPointRepository turWCStartingPointRepository;
private final TurWCNotAllowUrlRepository turWCNotAllowUrlRepository;
private final TurWCFileExtensionRepository turWCFileExtensionRepository;
private final TurWCAttributeMappingRepository turWCAttributeMappingRepository;
@@ -39,11 +41,13 @@ public class TurWCExchangeProcess {
@Inject
public TurWCExchangeProcess(TurWCSourceRepository turWCSourceRepository,
TurWCAllowUrlRepository turWCAllowUrlRepository,
+ TurWCStartingPointRepository turWCStartingPointRepository,
TurWCNotAllowUrlRepository turWCNotAllowUrlRepository,
TurWCFileExtensionRepository turWCFileExtensionRepository,
TurWCAttributeMappingRepository turWCAttributeMappingRepository) {
this.turWCSourceRepository = turWCSourceRepository;
this.turWCAllowUrlRepository = turWCAllowUrlRepository;
+ this.turWCStartingPointRepository = turWCStartingPointRepository;
this.turWCNotAllowUrlRepository = turWCNotAllowUrlRepository;
this.turWCFileExtensionRepository = turWCFileExtensionRepository;
this.turWCAttributeMappingRepository = turWCAttributeMappingRepository;
@@ -72,7 +76,6 @@ public StreamingResponseBody exportObject(HttpServletResponse response) {
List turWCSources = turWCSourceRepository.findAll();
-
File exportDir = new File(tmpDir.getAbsolutePath().concat(File.separator + folderName));
File exportFile = new File(exportDir.getAbsolutePath().concat(File.separator + EXPORT_FILE));
try {
@@ -176,7 +179,7 @@ public void importWCSource(TurWCExchange turWCExchange) {
for (TurWCSourceExchange turWCSourceExchange : turWCExchange.getSources()) {
if (turWCSourceRepository.findById(turWCSourceExchange.getId()).isEmpty()) {
TurWCSource turWCSource = TurWCSource.builder()
- .id(turWCSourceExchange.getId())
+ // .id(turWCSourceExchange.getId())
.url(turWCSourceExchange.getUrl())
.username(turWCSourceExchange.getUsername())
.password(turWCSourceExchange.getPassword())
@@ -187,6 +190,11 @@ public void importWCSource(TurWCExchange turWCExchange) {
turWCSourceRepository.save(turWCSource);
+ turWCSourceExchange.getStartingPoints().forEach(url ->
+ turWCStartingPointRepository.save(TurWCStartingPoint.builder()
+ .url(url)
+ .turWCSource(turWCSource)
+ .build()));
turWCSourceExchange.getAllowUrls().forEach(url ->
turWCAllowUrlRepository.save(TurWCAllowUrl.builder()
.url(url)
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/bean/TurWCSourceExchange.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/bean/TurWCSourceExchange.java
index 993f6a670d5..fa3f6e3debe 100644
--- a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/bean/TurWCSourceExchange.java
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/export/bean/TurWCSourceExchange.java
@@ -21,6 +21,8 @@ public class TurWCSourceExchange {
private String username;
private String password;
@Builder.Default
+ private Collection startingPoints = new HashSet<>();
+ @Builder.Default
private Collection allowUrls = new HashSet<>();
@Builder.Default
private Collection notAllowUrls = new HashSet<>();
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCSource.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCSource.java
index 7abecc9fd3a..f6f51c19a75 100644
--- a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCSource.java
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCSource.java
@@ -54,6 +54,12 @@ public class TurWCSource implements Serializable {
@Column(name = SN_SITE, nullable = false)
private Collection turSNSites = new HashSet<>();
+ @Builder.Default
+ @OneToMany(mappedBy = TUR_WC_SOURCE, orphanRemoval = true, fetch = FetchType.LAZY)
+ @Cascade({org.hibernate.annotations.CascadeType.ALL, org.hibernate.annotations.CascadeType.DELETE_ORPHAN})
+ @OnDelete(action = OnDeleteAction.CASCADE)
+ private Collection startingPoints = new HashSet<>();
+
@Builder.Default
@OneToMany(mappedBy = TUR_WC_SOURCE, orphanRemoval = true, fetch = FetchType.LAZY)
@Cascade({org.hibernate.annotations.CascadeType.ALL, org.hibernate.annotations.CascadeType.DELETE_ORPHAN})
@@ -78,6 +84,14 @@ public class TurWCSource implements Serializable {
@OnDelete(action = OnDeleteAction.CASCADE)
private Collection attributeMappings = new HashSet<>();
+ public void setStartingPoints(Collection startingPoints) {
+ this.startingPoints.clear();
+ if (startingPoints != null) {
+ this.startingPoints.addAll(startingPoints);
+ }
+ }
+
+
public void setAllowUrls(Collection allowUrls) {
this.allowUrls.clear();
if (allowUrls != null) {
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCStartingPoint.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCStartingPoint.java
new file mode 100644
index 00000000000..441e39ccacd
--- /dev/null
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/model/TurWCStartingPoint.java
@@ -0,0 +1,37 @@
+package com.viglet.turing.connector.webcrawler.persistence.model;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import jakarta.persistence.Entity;
+import jakarta.persistence.JoinColumn;
+import jakarta.persistence.ManyToOne;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import lombok.NoArgsConstructor;
+import lombok.Setter;
+import lombok.experimental.SuperBuilder;
+
+import java.io.Serial;
+import java.io.Serializable;
+
+@SuperBuilder
+@AllArgsConstructor
+@NoArgsConstructor
+@Getter
+@Setter
+@Entity
+@JsonIgnoreProperties({ "turWCSource" })
+public class TurWCStartingPoint extends TurWCUrl implements Serializable {
+
+ @Serial
+ private static final long serialVersionUID = 1L;
+
+ // bi-directional many-to-one association to TurWCSource
+ @ManyToOne
+ @JoinColumn(name = "wc_source_id", nullable = false)
+ private TurWCSource turWCSource;
+
+ public TurWCStartingPoint(String url, TurWCSource turWCSource) {
+ this.url = url;
+ this.turWCSource = turWCSource;
+ }
+}
diff --git a/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/repository/TurWCStartingPointRepository.java b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/repository/TurWCStartingPointRepository.java
new file mode 100644
index 00000000000..56339c0daee
--- /dev/null
+++ b/turing-web-crawler/wc-app/src/main/java/com/viglet/turing/connector/webcrawler/persistence/repository/TurWCStartingPointRepository.java
@@ -0,0 +1,12 @@
+package com.viglet.turing.connector.webcrawler.persistence.repository;
+
+import com.viglet.turing.connector.webcrawler.persistence.model.TurWCSource;
+import com.viglet.turing.connector.webcrawler.persistence.model.TurWCStartingPoint;
+import org.springframework.data.jpa.repository.JpaRepository;
+
+import java.util.List;
+import java.util.Optional;
+
+public interface TurWCStartingPointRepository extends JpaRepository {
+ Optional> findByTurWCSource(TurWCSource turWCSource);
+}
diff --git a/turing-web-crawler/wc-plugin/pom.xml b/turing-web-crawler/wc-plugin/pom.xml
new file mode 100644
index 00000000000..72481782751
--- /dev/null
+++ b/turing-web-crawler/wc-plugin/pom.xml
@@ -0,0 +1,140 @@
+
+
+ 4.0.0
+
+ turing-web-crawler
+ com.viglet.turing
+ ${revision}
+
+
+ wc-plugin
+ Turing Web Crawler - Connector Plugin
+
+
+ UTF-8
+ UTF-8
+ 21
+ 21
+ 21
+
+
+
+
+ ch.qos.logback
+ logback-classic
+
+
+ com.h2database
+ h2
+
+
+ org.projectlombok
+ lombok
+ true
+
+
+ com.viglet.turing
+ turing-spring
+ ${revision}
+
+
+ com.google.inject
+ guice
+ 7.0.0
+ compile
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-hibernate5-jakarta
+
+
+ com.viglet.turing
+ turing-java-sdk
+
+
+ com.viglet.turing
+ wc-commons
+ ${revision}
+
+
+ com.sezinkarli
+ random-user-agent-generator
+ 1.3
+
+
+ org.springframework.boot
+ spring-boot-starter-data-jpa
+
+
+ org.springframework.boot
+ spring-boot-starter-test
+ test
+
+
+ com.viglet.turing
+ connector-commons
+ 0.3.9
+ compile
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-dependencies
+ ${spring-boot.version}
+ pom
+ import
+
+
+
+
+ turing-web-crawler-plugin
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.13.0
+
+ true
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.6.0
+
+
+
+ shade
+
+
+ turing-web-crawler-plugin
+ true
+
+
+
+ *:*
+
+
+ com.viglet.turing:wc-commons:*
+
+
+
+
+ com.viglet.turing.connector.plugin.webcrawler.TurWCPlugin
+
+
+
+
+
+
+
+
+
diff --git a/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPlugin.java b/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPlugin.java
new file mode 100644
index 00000000000..78e85bb0753
--- /dev/null
+++ b/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPlugin.java
@@ -0,0 +1,43 @@
+/*
+ *
+ * Copyright (C) 2016-2024 the original author or authors.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+package com.viglet.turing.connector.plugin.webcrawler;
+
+import com.google.inject.Inject;
+import com.viglet.turing.client.sn.job.TurSNJobItem;
+import com.viglet.turing.connector.plugin.TurConnectorPlugin;
+import com.viglet.turing.connector.plugin.webcrawler.persistence.repository.TurWCSourceRepository;
+
+public class TurWCPlugin implements TurConnectorPlugin {
+ private final TurWCSourceRepository turWCSourceRepository;
+ private final TurWCPluginProcess turWCPluginProcess;
+
+ @Inject
+ public TurWCPlugin(TurWCSourceRepository turWCSourceRepository, TurWCPluginProcess turWCPluginProcess) {
+ this.turWCSourceRepository = turWCSourceRepository;
+ this.turWCPluginProcess = turWCPluginProcess;
+ }
+
+ public void init() {
+ turWCSourceRepository.findAll().forEach(turWCPluginProcess::start);
+ }
+ @Override
+ public TurSNJobItem getNext() {
+ return turWCPluginProcess.getNext();
+ }
+}
\ No newline at end of file
diff --git a/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPluginProcess.java b/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPluginProcess.java
new file mode 100644
index 00000000000..4d5ba8b5936
--- /dev/null
+++ b/turing-web-crawler/wc-plugin/src/main/java/com/viglet/turing/connector/plugin/webcrawler/TurWCPluginProcess.java
@@ -0,0 +1,376 @@
+package com.viglet.turing.connector.plugin.webcrawler;
+
+import com.google.inject.Inject;
+import com.viglet.turing.client.sn.TurMultiValue;
+import com.viglet.turing.client.sn.job.TurSNJobAction;
+import com.viglet.turing.client.sn.job.TurSNJobItem;
+import com.viglet.turing.client.sn.job.TurSNJobItems;
+import com.viglet.turing.commons.cache.TurCustomClassCache;
+import com.viglet.turing.connector.plugin.webcrawler.persistence.repository.*;
+import com.viglet.turing.connector.webcrawler.commons.TurWCContext;
+import com.viglet.turing.connector.webcrawler.commons.ext.TurWCExtInterface;
+import com.viglet.turing.connector.webcrawler.commons.ext.TurWCExtLocaleInterface;
+import com.viglet.turing.connector.plugin.webcrawler.persistence.model.TurWCAttributeMapping;
+import com.viglet.turing.connector.plugin.webcrawler.persistence.model.TurWCSource;
+import generator.RandomUserAgentGenerator;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.Connection;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Entities;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.util.*;
+
+@Slf4j
+@Component
+public class TurWCPluginProcess {
+ public static final String MAILTO = "mailto";
+ public static final String TEL = "tel:";
+ public static final String JAVASCRIPT = "javascript:";
+ public static final String A_HREF = "a[href]";
+ public static final String ABS_HREF = "abs:href";
+ public static final String WILD_CARD = "*";
+ private final List startingPoints = new ArrayList<>();
+ private final List allowUrls = new ArrayList<>();
+ private final List allowStartsWithUrls = new ArrayList<>();
+ private final List notAllowUrls = new ArrayList<>();
+ private final List notAllowStartsWithUrls = new ArrayList<>();
+ private final List notAllowExtensions = new ArrayList<>();
+ private final TurWCStartingPointRepository turWCStartingPointsRepository;
+ private TurSNJobItems turSNJobItems = new TurSNJobItems();
+ private final String userAgent = RandomUserAgentGenerator.getNextNonMobile();
+ private final Set visitedLinks = new HashSet<>();
+ private final Set indexedLinks = new HashSet<>();
+ private final Queue queueLinks = new LinkedList<>();
+ private String website;
+ private Collection snSites;
+ private final int timeout;
+ private final String referrer;
+ private String username;
+ private String password;
+ private final TurWCAllowUrlRepository turWCAllowUrlRepository;
+ private final TurWCNotAllowUrlRepository turWCNotAllowUrlRepository;
+ private final TurWCFileExtensionRepository turWCFileExtensionRepository;
+ private final TurWCAttributeMappingRepository turWCAttributeMappingRepository;
+
+ @Inject
+ public TurWCPluginProcess(@Value("${turing.wc.timeout:5000}") int timeout,
+ @Value("${turing.wc.referrer:https://www.google.com}") String referrer,
+ TurWCAllowUrlRepository turWCAllowUrlRepository,
+ TurWCNotAllowUrlRepository turWCNotAllowUrlRepository,
+ TurWCFileExtensionRepository turWCFileExtensionRepository,
+ TurWCAttributeMappingRepository turWCAttributeMappingRepository,
+ TurWCStartingPointRepository turWCStartingPointsRepository) {
+ this.timeout = timeout;
+ this.referrer = referrer;
+ this.turWCAllowUrlRepository = turWCAllowUrlRepository;
+ this.turWCNotAllowUrlRepository = turWCNotAllowUrlRepository;
+ this.turWCFileExtensionRepository = turWCFileExtensionRepository;
+ this.turWCAttributeMappingRepository = turWCAttributeMappingRepository;
+ this.turWCStartingPointsRepository = turWCStartingPointsRepository;
+ }
+
+ public void start(TurWCSource turWCSource) {
+ reset();
+ turWCFileExtensionRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCFileExtension ->
+ this.notAllowExtensions.add(turWCFileExtension.getExtension())));
+ turWCNotAllowUrlRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCNotAllowUrl -> {
+ if (turWCNotAllowUrl.getUrl().trim().endsWith(WILD_CARD)) {
+ this.notAllowStartsWithUrls.add(StringUtils.chop(turWCNotAllowUrl.getUrl()));
+ } else {
+ this.notAllowUrls.add(turWCNotAllowUrl.getUrl());
+ }
+ }
+ ));
+ turWCAllowUrlRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCAllowUrl -> {
+ if (turWCAllowUrl.getUrl().trim().endsWith(WILD_CARD)) {
+ this.allowStartsWithUrls.add(StringUtils.chop(turWCAllowUrl.getUrl().trim()));
+ } else {
+ this.allowUrls.add(turWCAllowUrl.getUrl());
+ }
+ }
+ ));
+ turWCStartingPointsRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCStartingPoint ->
+ this.startingPoints.add(turWCStartingPoint.getUrl())
+ ));
+ this.website = turWCSource.getUrl();
+ this.snSites = turWCSource.getTurSNSites();
+ this.username = turWCSource.getUsername();
+ this.password = turWCSource.getPassword();
+ log.info("User Agent: {}", userAgent);
+ startingPoints.forEach(url -> {
+ queueLinks.offer(this.website + url);
+ getPagesFromQueue(turWCSource);
+ });
+ }
+
+ private void reset() {
+ turSNJobItems = new TurSNJobItems();
+ visitedLinks.clear();
+ }
+
+
+ public void getPagesFromQueue(TurWCSource turWCSource) {
+ while (!queueLinks.isEmpty()) {
+ String url = queueLinks.poll();
+ getPage(turWCSource, url);
+ }
+ }
+
+ public void getPage(TurWCSource turWCSource, String url) {
+ try {
+ log.info("{}: {}", url, turWCSource.getTurSNSites());
+ Document document = getHTML(url);
+ getPageLinks(document);
+ String pageUrl = getPageUrl(url);
+ if (canBeIndexed(pageUrl)) {
+ indexedLinks.add(pageUrl);
+ addTurSNJobItems(turWCSource, document, url);
+ return;
+ } else {
+ log.debug("Ignored: {}", url);
+ }
+ } catch (IOException e) {
+ log.error(e.getMessage(), e);
+ }
+ new TurSNJobItem();
+ }
+
+ public TurSNJobItem getNext() {
+ return turSNJobItems.iterator().next();
+ }
+ private void getPageLinks(Document document) {
+ document.select(A_HREF).forEach(page -> addPageToQueue(getPageUrl(page.attr(ABS_HREF))));
+ }
+
+ private void addPageToQueue(String pageUrl) {
+ if (canBeAddToQueue(pageUrl) && visitedLinks.add(pageUrl) && !queueLinks.offer(pageUrl)) {
+ log.error("Item didn't add to queue: {}", pageUrl);
+ }
+ }
+
+ private boolean isValidToAddQueue(String pageUrl) {
+ return isNotMailUrl(pageUrl)
+ && isNotTelUrl(pageUrl)
+ && !StringUtils.equalsAny(pageUrl, queueLinks.toArray(new String[0]))
+ && !isSharpUrl(pageUrl) && !isPagination(pageUrl) && !isJavascriptUrl(pageUrl)
+ && pageUrl.startsWith(this.website)
+ && (
+ StringUtils.startsWithAny(getRelativePageUrl(pageUrl), allowStartsWithUrls.toArray(new String[0]))
+ || StringUtils.equalsAny(getRelativePageUrl(pageUrl), allowUrls.toArray(new String[0]))
+ )
+ && !StringUtils.startsWithAny(getRelativePageUrl(pageUrl), notAllowStartsWithUrls.toArray(new String[0]))
+ && !StringUtils.equalsAny(getRelativePageUrl(pageUrl), notAllowUrls.toArray(new String[0]))
+ && !StringUtils.endsWithAny(pageUrl, notAllowExtensions.toArray(new String[0]));
+ }
+
+ private void addTurSNJobItems(TurWCSource turWCSource, Document document, String url) {
+ TurSNJobItem turSNJobItem = new TurSNJobItem(TurSNJobAction.CREATE, new ArrayList<>(snSites),
+ getLocale(turWCSource, document, url),
+ getJobItemAttributes(turWCSource, document, url));
+ turSNJobItems.add(turSNJobItem);
+ }
+
+ public Map getJobItemAttributes(TurWCSource turWCSource, Document document, String url) {
+ Map turSNJobItemAttributes = new HashMap<>();
+ turWCAttributeMappingRepository.findByTurWCSource(turWCSource).ifPresent(source ->
+ source.forEach(turWCCustomClass ->
+ Optional.ofNullable(turWCCustomClass.getText()).ifPresentOrElse(text ->
+ usesText(turWCCustomClass, text, turSNJobItemAttributes)
+ , () -> {
+ if (!StringUtils.isEmpty(turWCCustomClass.getClassName()))
+ usesCustomClass(document, url, turWCCustomClass, turSNJobItemAttributes);
+ }
+ )));
+ return turSNJobItemAttributes;
+ }
+
+ private void usesCustomClass(Document document, String url, TurWCAttributeMapping turWCCustomClass,
+ Map turSNJobItemAttributes) {
+ getCustomClass(document, url, turWCCustomClass)
+ .ifPresent(turMultiValue -> turMultiValue.forEach(attributeValue -> {
+ if (!StringUtils.isBlank(attributeValue)) {
+ if (turSNJobItemAttributes.containsKey(turWCCustomClass.getName())) {
+ addItemInExistingAttribute(attributeValue,
+ turSNJobItemAttributes, turWCCustomClass.getName());
+ } else {
+ addFirstItemToAttribute(turWCCustomClass.getName(),
+ attributeValue, turSNJobItemAttributes);
+ }
+ }
+ }));
+ }
+
+ private static void usesText(TurWCAttributeMapping turWCCustomClass, String text,
+ Map turSNJobItemAttributes) {
+ turSNJobItemAttributes.put(turWCCustomClass.getName(), text);
+ }
+
+ private Optional getCustomClass(Document document, String url,
+ TurWCAttributeMapping turWCAttributeMapping) {
+ return TurCustomClassCache.getCustomClassMap(turWCAttributeMapping.getClassName())
+ .flatMap(classInstance -> ((TurWCExtInterface) classInstance)
+ .consume(getTurWCContext(document, url)));
+ }
+
+ private static void addItemInExistingAttribute(String attributeValue,
+ Map attributes,
+ String attributeName) {
+ if (attributes.get(attributeName) instanceof ArrayList)
+ addItemToArray(attributes, attributeName, attributeValue);
+ else convertAttributeSingleValueToArray(attributes, attributeName, attributeValue);
+ }
+
+ private static void convertAttributeSingleValueToArray(Map attributes,
+ String attributeName, String attributeValue) {
+ List