From 72d4578e70dccc79cd91ef1a03213380571a0afc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20B=C3=BChler?= Date: Mon, 5 May 2014 10:55:34 +0200 Subject: [PATCH] [doc] add script to generate mimetypes.conf --- contrib/create-mimetypes.conf.pl | 174 +++++++++++++++++++++++++ contrib/mimetypes.conf | 216 +++++++++++++++++-------------- doc/plugin_core.xml | 2 + 3 files changed, 292 insertions(+), 100 deletions(-) create mode 100755 contrib/create-mimetypes.conf.pl diff --git a/contrib/create-mimetypes.conf.pl b/contrib/create-mimetypes.conf.pl new file mode 100755 index 00000000..aa6d4cd2 --- /dev/null +++ b/contrib/create-mimetypes.conf.pl @@ -0,0 +1,174 @@ +#!/usr/bin/perl -w + +# Based on create-mime.assign.pl in debian lighttpd (1.4.x) package +# Creates an example mimetypes.conf from /etc/mime.types + +use strict; + +# text/* subtypes to serve as "text/...; charset=utf-8" +# text/html IS NOT INCLUDED: html has its own method for defining charset +# (), but the standards specify that content-type in HTTP wins over +# the setting in the html document. +my %text_utf8 = map { $_ => 1 } qw( + css + csv + plain + x-bibtex + x-boo + x-c++hdr + x-c++src + x-chdr + x-csh + x-csrc + x-dsrc + x-diff + x-haskell + x-java + x-lilypond + x-literate-haskell + x-makefile + x-moc + x-pascal + x-perl + x-python + x-scala + x-sh + x-tcl + x-tex +); + +# map extension to hash which maps types to the type they should be replaced with +my %manual_conflicts_resolve = ( + '.ra' => { + 'audio/x-pn-realaudio' => 'audio/x-realaudio', + }, +); + +open MIMETYPES, "/etc/mime.types" or die "Can't open mime.types: $!"; + +my %extensions; +sub set { + my ($extension, $mimetype) = @_; + $extensions{$extension} = $mimetype; +} +sub add { + my ($extension, $mimetype) = @_; + my $have = $extensions{$extension}; + + my $r = $manual_conflicts_resolve{$extension}; + # update @_ too for calls to set + $_[1] = $mimetype = $r->{$mimetype} if $r && $r->{$mimetype}; + + # mime.types can have same extension for different mime types + if ($have) { + # application/octet-stream means we couldn't resolve another conflict + return if $have eq $mimetype || $have eq 'application/octet-stream'; + + my ($have_type, $have_subtype) = split /\//, $have, 2; + my ($type, $subtype) = split /\//, $mimetype, 2; + + my $have_x = ($have_type =~ /^x-/ || $have_subtype =~ /^x-/); + my $x = ($type =~ /^x-/ || $subtype =~ /^x-/); + + # entries without x- prefix in type/subtype win: + if ($have_x && !$x) { + return set @_; # overwrite + } elsif ($x && !$have_x) { + return; # ignore + } + + # text/ wins over application/ for same subtype + if ($subtype eq $have_subtype) { + if ($type eq "text" && $have_type eq "application") { + return set @_; # overwrite + } elsif ($have_type eq "text" && $type eq "application") { + return; # ignore + } + } + + print STDERR "Duplicate mimetype: '${extension}' => '${mimetype}' (already have '${have}'), merging to 'application/octet-stream'\n"; + set ($extension, 'application/octet-stream'); + } else { + set @_; + } +} + +sub print_type { + my ($extension, $mimetype) = @_; + if ($mimetype =~ /^text\/(.*)$/) { + $mimetype .= "; charset=utf-8" if $text_utf8{$1}; + } + + print "\t\t\"${extension}\" => \"${mimetype}\",\n"; +} + +while () { + chomp; + s/\#.*//; + next if /^\w*$/; + if (/^([a-z0-9\/+-.]+)\s+((?:[a-z0-9.+-]+[ ]?)+)$/) { + my $mimetype = $1; + my @extensions = split / /, $2; + + foreach my $ext (@extensions) { + add(".${ext}", $mimetype); + } + } +} + +# missing in /etc/mime.types; +# from http://www.iana.org/assignments/media-types/media-types.xhtml +add(".dtd", "application/xml-dtd"); + + +print < "application/andrew-inset", ".anx" => "application/annodex", ".atom" => "application/atom+xml", @@ -29,35 +33,38 @@ setup { ".doc" => "application/msword", ".dot" => "application/msword", ".mxf" => "application/mxf", + ".asn" => "application/octet-stream", ".bin" => "application/octet-stream", + ".ent" => "application/octet-stream", ".oda" => "application/oda", ".ogx" => "application/ogg", ".one" => "application/onenote", - ".onetoc2" => "application/onenote", - ".onetmp" => "application/onenote", ".onepkg" => "application/onenote", + ".onetmp" => "application/onenote", + ".onetoc2" => "application/onenote", ".pdf" => "application/pdf", ".pgp" => "application/pgp-encrypted", ".key" => "application/pgp-keys", ".sig" => "application/pgp-signature", ".prf" => "application/pics-rules", - ".ps" => "application/postscript", ".ai" => "application/postscript", ".eps" => "application/postscript", - ".epsi" => "application/postscript", - ".epsf" => "application/postscript", ".eps2" => "application/postscript", ".eps3" => "application/postscript", + ".epsf" => "application/postscript", + ".epsi" => "application/postscript", + ".ps" => "application/postscript", ".rar" => "application/rar", ".rdf" => "application/rdf+xml", ".rtf" => "application/rtf", ".stl" => "application/sla", ".smi" => "application/smil+xml", ".smil" => "application/smil+xml", - ".xhtml" => "application/xhtml+xml", ".xht" => "application/xhtml+xml", + ".xhtml" => "application/xhtml+xml", ".xml" => "application/xml", ".xsd" => "application/xml", + ".dtd" => "application/xml-dtd", ".xsl" => "application/xslt+xml", ".xslt" => "application/xslt+xml", ".xspf" => "application/xspf+xml", @@ -67,14 +74,14 @@ setup { ".kml" => "application/vnd.google-earth.kml+xml", ".kmz" => "application/vnd.google-earth.kmz", ".xul" => "application/vnd.mozilla.xul+xml", - ".xls" => "application/vnd.ms-excel", ".xlb" => "application/vnd.ms-excel", + ".xls" => "application/vnd.ms-excel", ".xlt" => "application/vnd.ms-excel", ".eot" => "application/vnd.ms-fontobject", ".thmx" => "application/vnd.ms-officetheme", ".cat" => "application/vnd.ms-pki.seccat", - ".ppt" => "application/vnd.ms-powerpoint", ".pps" => "application/vnd.ms-powerpoint", + ".ppt" => "application/vnd.ms-powerpoint", ".odc" => "application/vnd.oasis.opendocument.chart", ".odb" => "application/vnd.oasis.opendocument.database", ".odf" => "application/vnd.oasis.opendocument.formula", @@ -134,13 +141,12 @@ setup { ".cab" => "application/x-cab", ".cbr" => "application/x-cbr", ".cbz" => "application/x-cbz", - ".cdf" => "application/x-cdf", ".cda" => "application/x-cdf", + ".cdf" => "application/x-cdf", ".vcd" => "application/x-cdlink", ".pgn" => "application/x-chess-pgn", ".mph" => "application/x-comsol", ".cpio" => "application/x-cpio", - ".csh" => "application/x-csh", ".deb" => "application/x-debian-package", ".udeb" => "application/x-debian-package", ".dcr" => "application/x-director", @@ -156,8 +162,8 @@ setup { ".sgf" => "application/x-go-sgf", ".gcf" => "application/x-graphing-calculator", ".gtar" => "application/x-gtar", - ".tgz" => "application/x-gtar-compressed", ".taz" => "application/x-gtar-compressed", + ".tgz" => "application/x-gtar-compressed", ".hdf" => "application/x-hdf", ".hwp" => "application/x-hwp", ".ica" => "application/x-ica", @@ -171,10 +177,10 @@ setup { ".jmz" => "application/x-jmol", ".chrt" => "application/x-kchart", ".kil" => "application/x-killustrator", - ".skp" => "application/x-koan", ".skd" => "application/x-koan", - ".skt" => "application/x-koan", ".skm" => "application/x-koan", + ".skp" => "application/x-koan", + ".skt" => "application/x-koan", ".kpr" => "application/x-kpresenter", ".kpt" => "application/x-kpresenter", ".ksp" => "application/x-kspread", @@ -185,25 +191,25 @@ setup { ".lyx" => "application/x-lyx", ".lzh" => "application/x-lzh", ".lzx" => "application/x-lzx", - ".frm" => "application/x-maker", - ".maker" => "application/x-maker", - ".frame" => "application/x-maker", - ".fm" => "application/x-maker", - ".fb" => "application/x-maker", ".book" => "application/x-maker", + ".fb" => "application/x-maker", ".fbdoc" => "application/x-maker", + ".fm" => "application/x-maker", + ".frame" => "application/x-maker", + ".frm" => "application/x-maker", + ".maker" => "application/x-maker", ".md5" => "application/x-md5", ".mif" => "application/x-mif", ".wmd" => "application/x-ms-wmd", ".wmz" => "application/x-ms-wmz", - ".com" => "application/x-msdos-program", - ".exe" => "application/x-msdos-program", ".bat" => "application/x-msdos-program", + ".com" => "application/x-msdos-program", ".dll" => "application/x-msdos-program", + ".exe" => "application/x-msdos-program", ".msi" => "application/x-msi", ".nc" => "application/x-netcdf", - ".pac" => "application/x-ns-proxy-autoconfig", ".dat" => "application/x-ns-proxy-autoconfig", + ".pac" => "application/x-ns-proxy-autoconfig", ".nwc" => "application/x-nwc", ".o" => "application/x-object", ".oza" => "application/x-oz-application", @@ -219,10 +225,9 @@ setup { ".rpm" => "application/x-redhat-package-manager", ".rss" => "application/x-rss+xml", ".rb" => "application/x-ruby", - ".sci" => "application/x-scilab", ".sce" => "application/x-scilab", + ".sci" => "application/x-scilab", ".xcos" => "application/x-scilab-xcos", - ".sh" => "application/x-sh", ".sha1" => "application/x-sha1", ".shar" => "application/x-shar", ".swf" => "application/x-shockwave-flash", @@ -234,14 +239,13 @@ setup { ".sv4cpio" => "application/x-sv4cpio", ".sv4crc" => "application/x-sv4crc", ".tar" => "application/x-tar", - ".tcl" => "application/x-tcl", ".gf" => "application/x-tex-gf", ".pk" => "application/x-tex-pk", - ".texinfo" => "application/x-texinfo", ".texi" => "application/x-texinfo", + ".texinfo" => "application/x-texinfo", + ".roff" => "application/x-troff", ".t" => "application/x-troff", ".tr" => "application/x-troff", - ".roff" => "application/x-troff", ".man" => "application/x-troff-man", ".me" => "application/x-troff-me", ".ms" => "application/x-troff-ms", @@ -261,14 +265,14 @@ setup { ".orc" => "audio/csound", ".sco" => "audio/csound", ".flac" => "audio/flac", + ".kar" => "audio/midi", ".mid" => "audio/midi", ".midi" => "audio/midi", - ".kar" => "audio/midi", - ".mpga" => "audio/mpeg", - ".mpega" => "audio/mpeg", + ".m4a" => "audio/mpeg", ".mp2" => "audio/mpeg", ".mp3" => "audio/mpeg", - ".m4a" => "audio/mpeg", + ".mpega" => "audio/mpeg", + ".mpga" => "audio/mpeg", ".m3u" => "audio/mpegurl", ".oga" => "audio/ogg", ".ogg" => "audio/ogg", @@ -276,14 +280,14 @@ setup { ".spx" => "audio/ogg", ".sid" => "audio/prs.sid", ".aif" => "audio/x-aiff", - ".aiff" => "audio/x-aiff", ".aifc" => "audio/x-aiff", + ".aiff" => "audio/x-aiff", ".gsm" => "audio/x-gsm", - ".wma" => "audio/x-ms-wma", ".wax" => "audio/x-ms-wax", - ".ra" => "audio/x-pn-realaudio", - ".rm" => "audio/x-pn-realaudio", - ".ram" => "audio/x-pn-realaudio", + ".wma" => "audio/x-ms-wma", + ".ra" => "audio/x-realaudio", + ".ram" => "audio/x-realaudio", + ".rm" => "audio/x-realaudio", ".pls" => "audio/x-scpls", ".sd2" => "audio/x-sd2", ".wav" => "audio/x-wav", @@ -291,8 +295,8 @@ setup { ".cac" => "chemical/x-cache", ".cache" => "chemical/x-cache", ".csf" => "chemical/x-cache-csf", - ".cbin" => "chemical/x-cactvs-binary", ".cascii" => "chemical/x-cactvs-binary", + ".cbin" => "chemical/x-cactvs-binary", ".ctab" => "chemical/x-cactvs-binary", ".cdx" => "chemical/x-cdx", ".cer" => "chemical/x-cerius", @@ -303,17 +307,17 @@ setup { ".cml" => "chemical/x-cml", ".cpa" => "chemical/x-compass", ".bsd" => "chemical/x-crossfire", - ".csml" => "chemical/x-csml", ".csm" => "chemical/x-csml", + ".csml" => "chemical/x-csml", ".ctx" => "chemical/x-ctx", - ".cxf" => "chemical/x-cxf", ".cef" => "chemical/x-cxf", + ".cxf" => "chemical/x-cxf", ".emb" => "chemical/x-embl-dl-nucleotide", ".embl" => "chemical/x-embl-dl-nucleotide", ".spc" => "chemical/x-galactic-spc", - ".inp" => "chemical/x-gamess-input", ".gam" => "chemical/x-gamess-input", ".gamin" => "chemical/x-gamess-input", + ".inp" => "chemical/x-gamess-input", ".fch" => "chemical/x-gaussian-checkpoint", ".fchk" => "chemical/x-gaussian-checkpoint", ".cub" => "chemical/x-gaussian-cube", @@ -324,10 +328,10 @@ setup { ".gcg" => "chemical/x-gcg8-sequence", ".gen" => "chemical/x-genbank", ".hin" => "chemical/x-hin", - ".istr" => "chemical/x-isostar", ".ist" => "chemical/x-isostar", - ".jdx" => "chemical/x-jcamp-dx", + ".istr" => "chemical/x-isostar", ".dx" => "chemical/x-jcamp-dx", + ".jdx" => "chemical/x-jcamp-dx", ".kin" => "chemical/x-kinemage", ".mcm" => "chemical/x-macmolecule", ".mmd" => "chemical/x-macromodel-input", @@ -346,11 +350,9 @@ setup { ".zmt" => "chemical/x-mopac-input", ".moo" => "chemical/x-mopac-out", ".mvb" => "chemical/x-mopac-vib", - ".asn" => "chemical/x-ncbi-asn1", ".prt" => "chemical/x-ncbi-asn1-ascii", - ".ent" => "chemical/x-ncbi-asn1-ascii", - ".val" => "chemical/x-ncbi-asn1-binary", ".aso" => "chemical/x-ncbi-asn1-binary", + ".val" => "chemical/x-ncbi-asn1-binary", ".pdb" => "chemical/x-pdb", ".ros" => "chemical/x-rosdal", ".sw" => "chemical/x-swissprot", @@ -362,20 +364,20 @@ setup { ".ief" => "image/ief", ".jp2" => "image/jp2", ".jpg2" => "image/jp2", + ".jpe" => "image/jpeg", ".jpeg" => "image/jpeg", ".jpg" => "image/jpeg", - ".jpe" => "image/jpeg", ".jpm" => "image/jpm", - ".jpx" => "image/jpx", ".jpf" => "image/jpx", + ".jpx" => "image/jpx", ".pcx" => "image/pcx", ".png" => "image/png", ".svg" => "image/svg+xml", ".svgz" => "image/svg+xml", - ".tiff" => "image/tiff", ".tif" => "image/tiff", - ".djvu" => "image/vnd.djvu", + ".tiff" => "image/tiff", ".djv" => "image/vnd.djvu", + ".djvu" => "image/vnd.djvu", ".ico" => "image/vnd.microsoft.icon", ".wbmp" => "image/vnd.wap.wbmp", ".cr2" => "image/x-canon-cr2", @@ -400,77 +402,80 @@ setup { ".xpm" => "image/x-xpixmap", ".xwd" => "image/x-xwindowdump", ".eml" => "message/rfc822", - ".igs" => "model/iges", ".iges" => "model/iges", - ".msh" => "model/mesh", + ".igs" => "model/iges", ".mesh" => "model/mesh", + ".msh" => "model/mesh", ".silo" => "model/mesh", - ".wrl" => "model/vrml", ".vrml" => "model/vrml", + ".wrl" => "model/vrml", + ".x3db" => "model/x3d+binary", ".x3dv" => "model/x3d+vrml", ".x3d" => "model/x3d+xml", - ".x3db" => "model/x3d+binary", ".appcache" => "text/cache-manifest", ".ics" => "text/calendar", ".icz" => "text/calendar", ".css" => "text/css; charset=utf-8", ".csv" => "text/csv; charset=utf-8", ".323" => "text/h323", - ".html" => "text/html; charset=utf-8", - ".htm" => "text/html; charset=utf-8", - ".shtml" => "text/html; charset=utf-8", + ".htm" => "text/html", + ".html" => "text/html", + ".shtml" => "text/html", ".uls" => "text/iuls", ".mml" => "text/mathml", ".asc" => "text/plain; charset=utf-8", - ".txt" => "text/plain; charset=utf-8", - ".text" => "text/plain; charset=utf-8", - ".pot" => "text/plain; charset=utf-8", ".brf" => "text/plain; charset=utf-8", + ".pot" => "text/plain; charset=utf-8", ".srt" => "text/plain; charset=utf-8", + ".text" => "text/plain; charset=utf-8", + ".txt" => "text/plain; charset=utf-8", ".rtx" => "text/richtext", ".sct" => "text/scriptlet", ".wsc" => "text/scriptlet", - ".tm" => "text/texmacs", ".tsv" => "text/tab-separated-values", + ".tm" => "text/texmacs", ".ttl" => "text/turtle", ".jad" => "text/vnd.sun.j2me.app-descriptor", ".wml" => "text/vnd.wap.wml", ".wmls" => "text/vnd.wap.wmlscript", - ".bib" => "text/x-bibtex", - ".boo" => "text/x-boo", - ".h++" => "text/x-c++hdr", - ".hpp" => "text/x-c++hdr", - ".hxx" => "text/x-c++hdr", - ".hh" => "text/x-c++hdr", - ".c++" => "text/x-c++src", - ".cpp" => "text/x-c++src", - ".cxx" => "text/x-c++src", - ".cc" => "text/x-c++src", - ".h" => "text/x-chdr", + ".bib" => "text/x-bibtex; charset=utf-8", + ".boo" => "text/x-boo; charset=utf-8", + ".h++" => "text/x-c++hdr; charset=utf-8", + ".hh" => "text/x-c++hdr; charset=utf-8", + ".hpp" => "text/x-c++hdr; charset=utf-8", + ".hxx" => "text/x-c++hdr; charset=utf-8", + ".c++" => "text/x-c++src; charset=utf-8", + ".cc" => "text/x-c++src; charset=utf-8", + ".cpp" => "text/x-c++src; charset=utf-8", + ".cxx" => "text/x-c++src; charset=utf-8", + ".h" => "text/x-chdr; charset=utf-8", ".htc" => "text/x-component", - ".c" => "text/x-csrc", - ".d" => "text/x-dsrc", - ".diff" => "text/x-diff", - ".patch" => "text/x-diff", - ".hs" => "text/x-haskell", - ".java" => "text/x-java", - ".ly" => "text/x-lilypond", - ".lhs" => "text/x-literate-haskell", - ".moc" => "text/x-moc", - ".p" => "text/x-pascal", - ".pas" => "text/x-pascal", + ".csh" => "text/x-csh; charset=utf-8", + ".c" => "text/x-csrc; charset=utf-8", + ".diff" => "text/x-diff; charset=utf-8", + ".patch" => "text/x-diff; charset=utf-8", + ".d" => "text/x-dsrc; charset=utf-8", + ".hs" => "text/x-haskell; charset=utf-8", + ".java" => "text/x-java; charset=utf-8", + ".ly" => "text/x-lilypond; charset=utf-8", + ".lhs" => "text/x-literate-haskell; charset=utf-8", + ".moc" => "text/x-moc; charset=utf-8", + ".p" => "text/x-pascal; charset=utf-8", + ".pas" => "text/x-pascal; charset=utf-8", ".gcd" => "text/x-pcs-gcd", - ".pl" => "text/x-perl", - ".pm" => "text/x-perl", - ".py" => "text/x-python", - ".scala" => "text/x-scala", + ".pl" => "text/x-perl; charset=utf-8", + ".pm" => "text/x-perl; charset=utf-8", + ".py" => "text/x-python; charset=utf-8", + ".scala" => "text/x-scala; charset=utf-8", ".etx" => "text/x-setext", ".sfv" => "text/x-sfv", - ".tk" => "text/x-tcl", - ".tex" => "text/x-tex", - ".ltx" => "text/x-tex", - ".sty" => "text/x-tex", - ".cls" => "text/x-tex", + ".sh" => "text/x-sh; charset=utf-8", + ".tcl" => "text/x-tcl; charset=utf-8", + ".tk" => "text/x-tcl; charset=utf-8", + ".cls" => "text/x-tex; charset=utf-8", + ".ltx" => "text/x-tex; charset=utf-8", + ".sty" => "text/x-tex; charset=utf-8", + ".tex" => "text/x-tex; charset=utf-8", ".vcs" => "text/x-vcalendar", ".vcf" => "text/x-vcard", ".3gp" => "video/3gpp", @@ -480,18 +485,20 @@ setup { ".dv" => "video/dv", ".fli" => "video/fli", ".gl" => "video/gl", + ".mp4" => "video/mp4", + ".mpe" => "video/mpeg", ".mpeg" => "video/mpeg", ".mpg" => "video/mpeg", - ".mpe" => "video/mpeg", - ".mp4" => "video/mp4", - ".qt" => "video/quicktime", - ".mov" => "video/quicktime", ".ogv" => "video/ogg", + ".mov" => "video/quicktime", + ".qt" => "video/quicktime", ".webm" => "video/webm", ".mxu" => "video/vnd.mpegurl", ".flv" => "video/x-flv", ".lsf" => "video/x-la-asf", ".lsx" => "video/x-la-asf", + ".mkv" => "video/x-matroska", + ".mpv" => "video/x-matroska", ".mng" => "video/x-mng", ".asf" => "video/x-ms-asf", ".asx" => "video/x-ms-asf", @@ -501,15 +508,24 @@ setup { ".wvx" => "video/x-ms-wvx", ".avi" => "video/x-msvideo", ".movie" => "video/x-sgi-movie", - ".mpv" => "video/x-matroska", - ".mkv" => "video/x-matroska", ".ice" => "x-conference/x-cooltalk", ".sisx" => "x-epoc/x-sisx-app", ".vrm" => "x-world/x-vrml", - #custom + # other useful mappings - "README" => "text/plain; charset=utf-8" - ]; + ".tar.gz" => "application/x-gtar-compressed", + ".gz" => "application/x-gzip", + ".tbz" => "application/x-gtar-compressed", + ".tar.bz2" => "application/x-gtar-compressed", + ".bz2" => "application/x-bzip", + ".log" => "text/plain; charset=utf-8", + ".conf" => "text/plain; charset=utf-8", + ".spec" => "text/plain; charset=utf-8", + "README" => "text/plain; charset=utf-8", + "Makefile" => "text/x-makefile; charset=utf-8", + + # custom - put your own entries here (overwriting mappings above) + ]; } diff --git a/doc/plugin_core.xml b/doc/plugin_core.xml index b76cb287..ea4eba0c 100644 --- a/doc/plugin_core.xml +++ b/doc/plugin_core.xml @@ -142,6 +142,8 @@ Default MIME type is "application/octet-stream". The sources contain a "mimetypes example config":http://git.lighttpd.net/lighttpd/lighttpd2.git/tree/contrib/mimetypes.conf with many standard mappings. + + The longest matching suffix is used (@".tar.gz"@ always wins over @".gz"@), and in case of duplicate entries the last one is used.