forked from drupal-media/url_embed
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ConvertUrlToEmbedFilter.php
191 lines (169 loc) · 7.57 KB
/
ConvertUrlToEmbedFilter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
<?php
namespace Drupal\url_embed\Plugin\Filter;
use Drupal\Component\Utility\Html;
use Drupal\Core\Form\FormStateInterface;
use Drupal\filter\FilterProcessResult;
use Drupal\filter\Plugin\FilterBase;
/**
* Provides a filter to display embedded entities based on data attributes.
*
* @Filter(
* id = "url_embed_convert_links",
* title = @Translation("Convert URLs to URL embeds"),
* description = @Translation("Convert plain URLs to embed elements that can be rendered with the <em>Display embedded URLs</em> filter."),
* type = Drupal\filter\Plugin\FilterInterface::TYPE_TRANSFORM_REVERSIBLE,
* settings = {
* "url_prefix" = "",
* },
* )
*/
class ConvertUrlToEmbedFilter extends FilterBase {
/**
* {@inheritdoc}
*/
public function settingsForm(array $form, FormStateInterface $form_state) {
$form['url_prefix'] = [
'#type' => 'textfield',
'#title' => $this->t('URL prefix'),
'#default_value' => $this->settings['url_prefix'],
'#description' => $this->t('Optional prefix that will be used to indicate which URLs that apply. All URLs that are supported will be converted if empty. Example: EMBED-https://twitter.com/drupal/status/735873777683320832'),
];
return $form;
}
/**
* {@inheritdoc}
*/
public function process($text, $langcode) {
return new FilterProcessResult(static::convertUrls($text, $this->settings['url_prefix']));
}
/**
* Replaces appearances of supported URLs with <drupal-url> embed elements.
*
* Logic of this function is copied from _filter_url() and slightly adopted
* for our use case. _filter_url() is unfortunately not general enough to
* re-use it.
*
* @param string $text
* Text to be processed.
* @param string $url_prefix
* (Optional) Prefix that should be used to manually choose which URLs
* should be converted.
*
* @return string
* Processed text.
*/
public static function convertUrls($text, $url_prefix = '') {
// Tags to skip and not recurse into.
$ignore_tags = 'a|script|style|code|pre';
// Create an array which contains the regexps for each type of link.
// The key to the regexp is the name of a function that is used as
// callback function to process matches of the regexp. The callback function
// is to return the replacement for the match. The array is used and
// matching/replacement done below inside some loops.
$tasks = [];
// Prepare protocols pattern for absolute URLs.
// \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will replace
// any bad protocols with HTTP, so we need to support the identical list.
// While '//' is technically optional for MAILTO only, we cannot cleanly
// differ between protocols here without hard-coding MAILTO, so '//' is
// optional for all protocols.
// @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols()
$protocols = \Drupal::getContainer()->getParameter('filter_protocols');
$protocols = implode(':(?://)?|', $protocols) . ':(?://)?';
$valid_url_path_characters = "[\p{L}\p{M}\p{N}!\*\';:=\+,\.\$\/%#\[\]\-_~@&]";
// Allow URL paths to contain balanced parens
// 1. Used in Wikipedia URLs like /Primer_(film)
// 2. Used in IIS sessions like /S(dfd346)/
$valid_url_balanced_parens = '\('. $valid_url_path_characters . '+\)';
// Valid end-of-path characters (so /foo. does not gobble the period).
// 1. Allow =&# for empty URL parameters and other URL-join artifacts
$valid_url_ending_characters = '[\p{L}\p{M}\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')';
$valid_url_query_chars = '[a-zA-Z0-9!?\*\'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]';
$valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\/]';
//full path
//and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
$valid_url_path = '(?:(?:'.$valid_url_path_characters . '*(?:'.$valid_url_balanced_parens .$valid_url_path_characters . '*)*'. $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\/))';
// Prepare domain name pattern.
// The ICANN seems to be on track towards accepting more diverse top level
// domains, so this pattern has been "future-proofed" to allow for TLDs
// of length 2-64.
$domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b';
$ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}';
$auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@';
$trail = '('.$valid_url_path.'*)?(\\?'.$valid_url_query_chars .'*'.$valid_url_query_ending_chars.')?';
// Match absolute URLs.
$url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?";
$pattern = "`$url_prefix((?:$protocols)(?:$url_pattern))`u";
$tasks['replaceFullLinks'] = $pattern;
// HTML comments need to be handled separately, as they may contain HTML
// markup, especially a '>'. Therefore, remove all comment contents and add
// them back later.
_filter_url_escape_comments('', TRUE);
$text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);
// Split at all tags; ensures that no tags or attributes are processed.
$chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// PHP ensures that the array consists of alternating delimiters and
// literals, and begins and ends with a literal (inserting NULL as
// required). Therefore, the first chunk is always text:
$chunk_type = 'text';
// If a tag of $ignore_tags is found, it is stored in $open_tag and only
// removed when the closing tag is found. Until the closing tag is found,
// no replacements are made.
$open_tag = '';
for ($i = 0; $i < count($chunks); $i++) {
if ($chunk_type == 'text') {
// Only process this text if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// If there is a match, inject a link into this chunk via the callback
// function contained in $task.
$chunks[$i] = preg_replace_callback(
$pattern,
function ($match) {
if (\Drupal::service('url_embed')->getEmbed(Html::decodeEntities($match[1]))) {
return '<drupal-url data-embed-url="' . $match[1] . '"></drupal-url>';
}
else {
return $match[1];
}
},
$chunks[$i]
);
}
// Text chunk is done, so next chunk must be a tag.
$chunk_type = 'tag';
}
else {
// Only process this tag if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// Check whether this tag is contained in $ignore_tags.
if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) {
$open_tag = $matches[1];
}
}
// Otherwise, check whether this is the closing tag for $open_tag.
else {
if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) {
$open_tag = '';
}
}
// Tag chunk is done, so next chunk must be text.
$chunk_type = 'text';
}
}
$text = implode($chunks);
// Revert to the original comment contents
_filter_url_escape_comments('', FALSE);
return preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text);
}
/**
* {@inheritdoc}
*/
public function tips($long = FALSE) {
if ($long) {
return $this->t('<p>You can convert plain URLs to <drupal-url> HTML elements. Those elements are later converted to embeds using "Display embedded URLs" text filter.</p>');
}
else {
return $this->t('You can convert plain URLs to embed elements.');
}
}
}