Skip to content

Commit

Permalink
MDL-31928: Fixing bugs in repository_url
Browse files Browse the repository at this point in the history
- if the same image occurs several times on the page list it only once
- resolve image path correctly if it has a query string
- show images included in CSS
- non-JS file picker ignores thumbnail width and height attributes (this is a temp fix until renderers are implemented)
  • Loading branch information
marinaglancy committed Apr 23, 2012
1 parent ead4f18 commit 8685679
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 47 deletions.
20 changes: 18 additions & 2 deletions repository/filepicker.php
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -139,7 +139,15 @@
echo '<table>'; echo '<table>';
foreach ($search_result['list'] as $item) { foreach ($search_result['list'] as $item) {
echo '<tr>'; echo '<tr>';
echo '<td><img src="'.$item['thumbnail'].'" />'; echo '<td>';
$style = '';
if (isset($item['thumbnail_height'])) {
$style .= 'max-height:'.$item['thumbnail_height'].'px;';
}
if (isset($item['thumbnail_width'])) {
$style .= 'max-width:'.$item['thumbnail_width'].'px;';
}
echo html_writer::empty_tag('img', array('src' => $item['thumbnail'], 'style' => $style));
echo '</td><td>'; echo '</td><td>';
if (!empty($item['url'])) { if (!empty($item['url'])) {
echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank')); echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank'));
Expand Down Expand Up @@ -227,7 +235,15 @@
echo '<table>'; echo '<table>';
foreach ($list['list'] as $item) { foreach ($list['list'] as $item) {
echo '<tr>'; echo '<tr>';
echo '<td><img src="'.$item['thumbnail'].'" />'; echo '<td>';
$style = '';
if (isset($item['thumbnail_height'])) {
$style .= 'max-height:'.$item['thumbnail_height'].'px;';
}
if (isset($item['thumbnail_width'])) {
$style .= 'max-width:'.$item['thumbnail_width'].'px;';
}
echo html_writer::empty_tag('img', array('src' => $item['thumbnail'], 'style' => $style));
echo '</td><td>'; echo '</td><td>';
if (!empty($item['url'])) { if (!empty($item['url'])) {
echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank')); echo html_writer::link($item['url'], $item['title'], array('target'=>'_blank'));
Expand Down
141 changes: 99 additions & 42 deletions repository/url/lib.php
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
require_once(dirname(__FILE__).'/locallib.php'); require_once(dirname(__FILE__).'/locallib.php');


class repository_url extends repository { class repository_url extends repository {
var $processedfiles = array();


/** /**
* @param int $repositoryid * @param int $repositoryid
Expand All @@ -42,16 +43,6 @@ public function __construct($repositoryid, $context = SYSCONTEXTID, $options = a
$this->file_url = optional_param('file', '', PARAM_RAW); $this->file_url = optional_param('file', '', PARAM_RAW);
} }


public function get_file($url, $file = '') {
global $CFG;
//$CFG->repository_no_delete = true;
$path = $this->prepare_file($file);
$fp = fopen($path, 'w');
$c = new curl;
$c->download(array(array('url'=>$url, 'file'=>$fp)));
return array('path'=>$path, 'url'=>$url);
}

public function check_login() { public function check_login() {
if (!empty($this->file_url)) { if (!empty($this->file_url)) {
return true; return true;
Expand All @@ -75,6 +66,7 @@ public function print_login() {


$ret['login'] = array($url); $ret['login'] = array($url);
$ret['login_btn_label'] = get_string('download', 'repository_url'); $ret['login_btn_label'] = get_string('download', 'repository_url');
$ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
return $ret; return $ret;
} else { } else {
echo <<<EOD echo <<<EOD
Expand All @@ -97,48 +89,113 @@ public function print_login() {
public function get_listing($path='', $page='') { public function get_listing($path='', $page='') {
global $CFG, $OUTPUT; global $CFG, $OUTPUT;
$ret = array(); $ret = array();
$ret['list'] = array();
$ret['nosearch'] = true;
$ret['norefresh'] = true;
$ret['nologin'] = true;

$this->parse_file(null, $this->file_url, $ret, true);
return $ret;
}

/**
* Parses one file (either html or css)
*
* @param string $baseurl (optional) URL of the file where link to this file was found
* @param string $relativeurl relative or absolute link to the file
* @param array $list
* @param bool $mainfile true only for main HTML false and false for all embedded/linked files
*/
protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
$relativeurl = $matches[2];
}
if (empty($baseurl)) {
$url = $relativeurl;
} else {
$url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl));
}
if (in_array($url, $this->processedfiles)) {
// avoid endless recursion
return;
}
$this->processedfiles[] = $url;
$curl = new curl; $curl = new curl;
$msg = $curl->head($this->file_url); $msg = $curl->head($url);
$info = $curl->get_info(); $info = $curl->get_info();
if ($info['http_code'] != 200) { if ($info['http_code'] != 200) {
$ret['e'] = $msg; if ($mainfile) {
$list['error'] = $msg;
}
} else { } else {
$ret['list'] = array(); $csstoanalyze = '';
$ret['nosearch'] = true; if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
$ret['nologin'] = true; // parse as html
$filename = $this->guess_filename($info['url'], $info['content_type']); $htmlcontent = $curl->get($info['url']);
if (strstr($info['content_type'], 'text/html') || empty($info['content_type'])) { $ddoc = new DOMDocument();
// analysis this web page, general file list @$ddoc->loadHTML($htmlcontent);
$ret['list'] = array(); // extract <img>
$content = $curl->get($info['url']); $tags = $ddoc->getElementsByTagName('img');
$this->analyse_page($info['url'], $content, $ret); foreach ($tags as $tag) {
} else { $url = $tag->getAttribute('src');
$this->add_image_to_list($info['url'], $url, $list);
}
// analyse embedded css (<style>)
$tags = $ddoc->getElementsByTagName('style');
foreach ($tags as $tag) {
if ($tag->getAttribute('type') == 'text/css') {
$csstoanalyze .= $tag->textContent."\n";
}
}
// analyse links to css (<link type='text/css' href='...'>)
$tags = $ddoc->getElementsByTagName('link');
foreach ($tags as $tag) {
if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
$this->parse_file($info['url'], $tag->getAttribute('href'), $list);
}
}
} else if (strstr($info['content_type'], 'css')) {
// parse as css
$csscontent = $curl->get($info['url']);
$csstoanalyze .= $csscontent."\n";
} else if (strstr($info['content_type'], 'image/')) {
// download this file // download this file
$ret['list'][] = array( $this->add_image_to_list($info['url'], $info['url'], $list);
'title'=>$filename, }
'source'=>$this->file_url,
'thumbnail' => $OUTPUT->pix_url(file_extension_icon($filename, 32))->out(false) // parse all found css styles
); if (strlen($csstoanalyze)) {
$urls = extract_css_urls($csstoanalyze);
if (!empty($urls['property'])) {
foreach ($urls['property'] as $url) {
$this->add_image_to_list($info['url'], $url, $list);
}
}
if (!empty($urls['import'])) {
foreach ($urls['import'] as $cssurl) {
$this->parse_file($info['url'], $cssurl, $list);
}
}
} }
} }
return $ret;
} }
public function analyse_page($baseurl, $content, &$list) { protected function add_image_to_list($baseurl, $url, &$list) {
global $CFG, $OUTPUT; if (empty($list['list'])) {
$urls = extract_html_urls($content); $list['list'] = array();
$images = $urls['img']['src']; }
$pattern = '#img(.+)src="?\'?([[:alnum:]:?=&@/._+-]+)"?\'?#i'; $src = url_to_absolute($baseurl, htmlspecialchars_decode($url));
if (!empty($images)) { foreach ($list['list'] as $image) {
foreach($images as $url) { if ($image['source'] == $src) {
$list['list'][] = array( return;
'title'=>$this->guess_filename($url, ''),
'source'=>url_to_absolute($baseurl, $url),
'thumbnail'=>url_to_absolute($baseurl, $url),
'thumbnail_height'=>84,
'thumbnail_width'=>84
);
} }
} }
$list['list'][] = array(
'title'=>$this->guess_filename($url, ''),
'source'=>$src,
'thumbnail'=>$src,
'thumbnail_height'=>84,
'thumbnail_width'=>84
);
} }
public function guess_filename($url, $type) { public function guess_filename($url, $type) {
$pattern = '#\/([\w_\?\-.]+)$#'; $pattern = '#\/([\w_\?\-.]+)$#';
Expand Down
54 changes: 51 additions & 3 deletions repository/url/locallib.php
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ function url_to_absolute( $baseUrl, $relativeUrl )
if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
return FALSE; return FALSE;
$r['scheme'] = $b['scheme']; $r['scheme'] = $b['scheme'];
if (empty($b['path'])) {
$b['path'] = '';
}


// If relative URL has an authority, clean path and return. // If relative URL has an authority, clean path and return.
if ( isset( $r['host'] ) ) if ( isset( $r['host'] ) )
Expand Down Expand Up @@ -248,11 +251,11 @@ function url_remove_dot_segments( $path )
* the associative array of URL parts, or FALSE if the URL is * the associative array of URL parts, or FALSE if the URL is
* too malformed to recognize any parts. * too malformed to recognize any parts.
*/ */
function split_url( $url, $decode=TRUE ) function split_url( $url, $decode=FALSE)
{ {
// Character sets from RFC3986. // Character sets from RFC3986.
$xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;='; $xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
$xpchar = $xunressub . ':@%'; $xpchar = $xunressub . ':@% ';


// Scheme from RFC3986. // Scheme from RFC3986.
$xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)'; $xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)';
Expand Down Expand Up @@ -382,7 +385,7 @@ function split_url( $url, $decode=TRUE )
* empty string is returned if the $parts array does not contain * empty string is returned if the $parts array does not contain
* any of the needed values. * any of the needed values.
*/ */
function join_url( $parts, $encode=TRUE ) function join_url( $parts, $encode=FALSE)
{ {
if ( $encode ) if ( $encode )
{ {
Expand Down Expand Up @@ -432,6 +435,51 @@ function join_url( $parts, $encode=TRUE )
$url .= '#' . $parts['fragment']; $url .= '#' . $parts['fragment'];
return $url; return $url;
} }

/**
* This function encodes URL to form a URL which is properly
* percent encoded to replace disallowed characters.
*
* RFC3986 specifies the allowed characters in the URL as well as
* reserved characters in the URL. This function replaces all the
* disallowed characters in the URL with their repective percent
* encodings. Already encoded characters are not encoded again,
* such as '%20' is not encoded to '%2520'.
*
* Parameters:
* url the url to encode.
*
* Return values:
* Returns the encoded URL string.
*/
function encode_url($url) {
$reserved = array(
":" => '!%3A!ui',
"/" => '!%2F!ui',
"?" => '!%3F!ui',
"#" => '!%23!ui',
"[" => '!%5B!ui',
"]" => '!%5D!ui',
"@" => '!%40!ui',
"!" => '!%21!ui',
"$" => '!%24!ui',
"&" => '!%26!ui',
"'" => '!%27!ui',
"(" => '!%28!ui',
")" => '!%29!ui',
"*" => '!%2A!ui',
"+" => '!%2B!ui',
"," => '!%2C!ui',
";" => '!%3B!ui',
"=" => '!%3D!ui',
"%" => '!%25!ui',
);

$url = rawurlencode($url);
$url = preg_replace(array_values($reserved), array_keys($reserved), $url);
return $url;
}

/** /**
* Extract URLs from a web page. * Extract URLs from a web page.
* *
Expand Down

0 comments on commit 8685679

Please sign in to comment.