Permalink
Switch branches/tags
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 298 lines (268 sloc) 8.11 KB
#!/usr/bin/php
<?php
/**
* Command line interface for converting CSS file with background images into MHTML & DataURI using base64 encoding.
* So all separate HTTP requests for individual images will become only one request.
*
* @author Joseph Chiang <josephj6802@gmail.com>
* @package dataurize
*
* ./dataurize <input> <base> [--output]
*/
function print_usage($path)
{
echo "Usage: {$path} <input> <base> [options]\n" .
"\n";
echo " -o\n";
echo " --output=<output> Assign an output file. By default it overwrites your original file.\n";
echo " --print Print output directly instead of generating or overwriting a file.\n".
" By default, this option is disabled. \n";
echo " --separate=<output> Separate MHTML to a single file. By default, this option is disabled. \n";
echo " --no-mhtml Don't enable MHTML. Use this option if you don't want to use MHTML for legend IE.\n".
" It however doesn't hurt browser compatibility by adding *background-image(<Original Image Path>).\n".
" By default, this option is disabled.\n";
echo " --size-limit=<bytes> It might still damage website performance if you transform an image with large file size.\n".
" The default file size is 1024 bytes.\n";
echo " -h\n";
echo " --help Show this help.\n" .
"\n";
echo "Sample: {$path} foo.css http://bar.com/ --output=foo2.css\n\n";
exit;
}
function handle_exception($ex)
{
die("Error: ".$ex->getMessage()."\n");
}
set_exception_handler("handle_exception");
// Constants.
define("SEPARATOR", "_MY_BOUNDARY_SEPARATOR");
// Provide usage if user provides no or insufficeient arguments.
$path = array_shift($argv);
if ( ! count($argv) || count($argv) < 2)
{
print_usage($path);
exit;
}
// Check if file exists.
$input = array_shift($argv);
if ( ! file_exists($input))
{
throw new Exception("File you provide doesn't exist. Please check again.");
}
// Define option default values.
$base = array_shift($argv); // Base URL for relative paths in CSS content.
$num = preg_match('/(https?:\/\/[^\s+\"\<\>]+)/i', $base);
$is_local = FALSE;
if ( ! $num)
{
if (file_exists($base) && is_dir($base))
{
$is_local = TRUE;
}
else
{
throw new Exception("You must provide a URL or a directory path.");
}
}
if (strrpos($base, "/") !== strlen($base) - 1) // Always add slash at end.
{
$base .= "/";
}
$output = $input; // Output file.
$size_limit = 1024; // Maximum image size.
$is_print = FALSE; // Only print to console instead of making or overwritting an output file.
$is_mhtml = TRUE; // Enable MHMTL support.
$separate = FALSE; // Separate MHTML to a different file. (Mixed type)
// Loops user inputs arguments.
while ($arg = array_shift($argv))
{
// Check option format.
$num = preg_match('/(-{1,2}([^=]+))=*([^\s]+)*/', $arg, $matches);
if ( ! $num)
{
continue;
}
$val = (isset($matches[3]) ? $matches[3] : "");
switch ($matches[1])
{
// Get output file.
case "-o":
case "--output":
$output = $val;
break;
case "--separate":
$separate = $val;
break;
case "--size-limit":
$size_limit = intval($val);
break;
case "--print":
$is_print = TRUE;
break;
case "--no-mhtml":
$is_mhtml = FALSE;
break;
case "-h":
case "--help":
print_usage($path);
exit;
break;
}
}
// Get MHTML file path.
$mhtml_path = ($separate) ? $base . $separate : $base . basename($output);
// Fetch all the background image attributes using RegExp.
$content = file_get_contents($input);
//echo "/* Original File Size: " . filesize($input) . " */\n";
$pattern = '/(background[^:]*):([^;}]*)url\(([^\)]+)\)([^;}]*)/';
preg_match_all($pattern, $content, $data);
$attrs = $data[0];
$types = $data[1];
$props_start = $data[2];
$urls = $data[3];
$props_end = $data[4];
$urls = array_unique($urls);
$data = array();
$uris = array();
$mhtmls = array();
// Build temporary directory.
$tmp_path = microtime();
$tmp_path = substr(md5($tmp_path), 0, 8);
$tmp_path = "/tmp/$tmp_path/";
mkdir($tmp_path);
$i = 0;
foreach ($urls as $url)
{
$url = str_replace(array("\"", "'"), array("", ""), $url);
// Ignore original DataURIs.
if (strpos($url, "data:") === 0)
{
continue;
}
// Remove heading slash.
$url2 = (strpos($url, "/") === 0) ? substr($url, 1) : $url;
$has_protocol = (strpos($url, "http") === 0) ? TRUE : FALSE;
$target = ($has_protocol) ? $url : $base . $url;
$use_curl = ($has_protocol) ? TRUE : ( ! $is_local);
$target = (strpos($url, "http") === 0 ? $url : $base . $url2);
$filename = basename($target);
// Get file content.
if ($use_curl)
{
$cmd = "curl $target -s > {$tmp_path}{$filename}";
exec($cmd, $return, $error);
}
else
{
if (file_exists($tmp_path . $filename))
{
$cmd = "cp $target {$tmp_path}{$filename}";
exec($cmd, $return, $error);
}
else
{
continue;
}
}
if ($error)
{
//echo "/* {$target} : Error - File doesn't exists. */\n";
continue;
}
// Ignore image exceeds file size limit.
if (filesize($tmp_path.$filename) > $size_limit)
{
//echo "/* {$target} : Error - File size is too large. */\n";
continue;
}
// Get file MIME/TYPE.
$tempfile = $tmp_path.$filename;
$cmd = "/usr/bin/identify -quiet -format \"%m\" $tempfile";
exec($cmd, $return, $error);
if ($error)
{
//echo "/* {$target} : Error - MIME/TYPE detection unknown error. */\n";
continue;
}
$type = mb_strtolower($return[0]);
// Get encoded string.
try
{
$encode = base64_encode(file_get_contents($tempfile));
}
catch (Exception $e)
{
//echo "/* {$target} : Error - Encoding unknown error. */\n";
continue;
}
$data[] = array(
"url" => $url,
"encode" => $encode,
"type" => $type,
);
$mhtmls[] = "--" . SEPARATOR . "\nContent-Location:{$i}\nContent-Transfer-Encoding:base64\n\n" . $encode . "\n\n";
$i++;
}
$new_attrs = array();
$i = 0;
foreach ($attrs as $attr)
{
// Make shortcut and normalize string.
$attr = str_replace(array("\"", "'"), array("", ""), $attr);
// Match URL.
$key = "";
$url = "";
$encode = "";
$type = "";
foreach ($data as $k => $v)
{
if (strpos($attr, $v["url"]) === FALSE)
{
continue;
}
$key = $k;
$url = $v["url"];
$encode = $v["encode"];
$type = $v["type"];
// Save Data URI strings into working array.
$uri = str_replace($url, "\"data:image/$type;base64," . $encode . "\"", $attr);
$url = ($is_mhtml) ? "mhtml:{$mhtml_path}!{$key}" : $url;
if ($types[$i] === "background-image")
{
$uris[] = $uri . ";*background-image:url($url)";
}
else
{
$uris[] = $uri . ";*background:{$props_start[$i]}url($url){$props_end[$i]}";
}
$new_attrs[] = $attr;
break;
}
$i++;
}
// Remove temporary directory.
$cmd = "rm -rf $tmp_path";
exec($cmd, $return, $error);
// New CSS file content.
$mhtml = ($is_mhtml) ? "/*\nContent-Type: multipart/related; boundary=\"" . SEPARATOR . "\"\n\n" . implode($mhtmls, "") . "--".SEPARATOR."--\n*/\n" : "";
$content = str_replace($new_attrs, $uris, $content);
if (function_exists('mb_strlen')) {
$size = mb_strlen($mhtml . $content, '8bit');
} else {
$size = strlen($mhtml . $content);
}
if ($is_print)
{
echo $mhtml . $content;
//echo "/* Output File Size: " . $size . " */\n";
exit;
}
if ($separate)
{
file_put_contents($separate, $mhtml);
file_put_contents($output, $content);
} else {
file_put_contents($output, $mhtml . $content);
}
exit;
?>