Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisit mime types for languages #139921

Merged
merged 1 commit into from Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
228 changes: 1 addition & 227 deletions src/vs/base/common/mime.ts
Expand Up @@ -3,12 +3,7 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { ParsedPattern, parse } from 'vs/base/common/glob';
import { Schemas } from 'vs/base/common/network';
import { basename, extname, posix } from 'vs/base/common/path';
import { DataUri } from 'vs/base/common/resources';
import { startsWithUTF8BOM } from 'vs/base/common/strings';
import { URI } from 'vs/base/common/uri';
import { extname } from 'vs/base/common/path';

export namespace Mimes {
export const text = 'text/plain';
Expand All @@ -18,227 +13,6 @@ export namespace Mimes {
export const latex = 'text/latex';
}

export interface ITextMimeAssociation {
readonly id: string;
readonly mime: string;
readonly filename?: string;
readonly extension?: string;
readonly filepattern?: string;
readonly firstline?: RegExp;
readonly userConfigured?: boolean;
}

interface ITextMimeAssociationItem extends ITextMimeAssociation {
readonly filenameLowercase?: string;
readonly extensionLowercase?: string;
readonly filepatternLowercase?: ParsedPattern;
readonly filepatternOnPath?: boolean;
}

let registeredAssociations: ITextMimeAssociationItem[] = [];
let nonUserRegisteredAssociations: ITextMimeAssociationItem[] = [];
let userRegisteredAssociations: ITextMimeAssociationItem[] = [];

/**
* Associate a text mime to the registry.
*/
export function registerTextMime(association: ITextMimeAssociation, warnOnOverwrite = false): void {

// Register
const associationItem = toTextMimeAssociationItem(association);
registeredAssociations.push(associationItem);
if (!associationItem.userConfigured) {
nonUserRegisteredAssociations.push(associationItem);
} else {
userRegisteredAssociations.push(associationItem);
}

// Check for conflicts unless this is a user configured association
if (warnOnOverwrite && !associationItem.userConfigured) {
registeredAssociations.forEach(a => {
if (a.mime === associationItem.mime || a.userConfigured) {
return; // same mime or userConfigured is ok
}

if (associationItem.extension && a.extension === associationItem.extension) {
console.warn(`Overwriting extension <<${associationItem.extension}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.filename && a.filename === associationItem.filename) {
console.warn(`Overwriting filename <<${associationItem.filename}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.filepattern && a.filepattern === associationItem.filepattern) {
console.warn(`Overwriting filepattern <<${associationItem.filepattern}>> to now point to mime <<${associationItem.mime}>>`);
}

if (associationItem.firstline && a.firstline === associationItem.firstline) {
console.warn(`Overwriting firstline <<${associationItem.firstline}>> to now point to mime <<${associationItem.mime}>>`);
}
});
}
}

function toTextMimeAssociationItem(association: ITextMimeAssociation): ITextMimeAssociationItem {
return {
id: association.id,
mime: association.mime,
filename: association.filename,
extension: association.extension,
filepattern: association.filepattern,
firstline: association.firstline,
userConfigured: association.userConfigured,
filenameLowercase: association.filename ? association.filename.toLowerCase() : undefined,
extensionLowercase: association.extension ? association.extension.toLowerCase() : undefined,
filepatternLowercase: association.filepattern ? parse(association.filepattern.toLowerCase()) : undefined,
filepatternOnPath: association.filepattern ? association.filepattern.indexOf(posix.sep) >= 0 : false
};
}

/**
* Clear text mimes from the registry.
*/
export function clearTextMimes(onlyUserConfigured?: boolean): void {
if (!onlyUserConfigured) {
registeredAssociations = [];
nonUserRegisteredAssociations = [];
userRegisteredAssociations = [];
} else {
registeredAssociations = registeredAssociations.filter(a => !a.userConfigured);
userRegisteredAssociations = [];
}
}

/**
* Given a file, return the best matching mime type for it
*/
export function guessMimeTypes(resource: URI | null, firstLine?: string): string[] {
let path: string | undefined;
if (resource) {
switch (resource.scheme) {
case Schemas.file:
path = resource.fsPath;
break;
case Schemas.data: {
const metadata = DataUri.parseMetaData(resource);
path = metadata.get(DataUri.META_DATA_LABEL);
break;
}
default:
path = resource.path;
}
}

if (!path) {
return [Mimes.unknown];
}

path = path.toLowerCase();

const filename = basename(path);

// 1.) User configured mappings have highest priority
const configuredMime = guessMimeTypeByPath(path, filename, userRegisteredAssociations);
if (configuredMime) {
return [configuredMime, Mimes.text];
}

// 2.) Registered mappings have middle priority
const registeredMime = guessMimeTypeByPath(path, filename, nonUserRegisteredAssociations);
if (registeredMime) {
return [registeredMime, Mimes.text];
}

// 3.) Firstline has lowest priority
if (firstLine) {
const firstlineMime = guessMimeTypeByFirstline(firstLine);
if (firstlineMime) {
return [firstlineMime, Mimes.text];
}
}

return [Mimes.unknown];
}

function guessMimeTypeByPath(path: string, filename: string, associations: ITextMimeAssociationItem[]): string | null {
let filenameMatch: ITextMimeAssociationItem | null = null;
let patternMatch: ITextMimeAssociationItem | null = null;
let extensionMatch: ITextMimeAssociationItem | null = null;

// We want to prioritize associations based on the order they are registered so that the last registered
// association wins over all other. This is for https://github.com/microsoft/vscode/issues/20074
for (let i = associations.length - 1; i >= 0; i--) {
const association = associations[i];

// First exact name match
if (filename === association.filenameLowercase) {
filenameMatch = association;
break; // take it!
}

// Longest pattern match
if (association.filepattern) {
if (!patternMatch || association.filepattern.length > patternMatch.filepattern!.length) {
const target = association.filepatternOnPath ? path : filename; // match on full path if pattern contains path separator
if (association.filepatternLowercase?.(target)) {
patternMatch = association;
}
}
}

// Longest extension match
if (association.extension) {
if (!extensionMatch || association.extension.length > extensionMatch.extension!.length) {
if (filename.endsWith(association.extensionLowercase!)) {
extensionMatch = association;
}
}
}
}

// 1.) Exact name match has second highest priority
if (filenameMatch) {
return filenameMatch.mime;
}

// 2.) Match on pattern
if (patternMatch) {
return patternMatch.mime;
}

// 3.) Match on extension comes next
if (extensionMatch) {
return extensionMatch.mime;
}

return null;
}

function guessMimeTypeByFirstline(firstLine: string): string | null {
if (startsWithUTF8BOM(firstLine)) {
firstLine = firstLine.substr(1);
}

if (firstLine.length > 0) {

// We want to prioritize associations based on the order they are registered so that the last registered
// association wins over all other. This is for https://github.com/microsoft/vscode/issues/20074
for (let i = registeredAssociations.length - 1; i >= 0; i--) {
const association = registeredAssociations[i];
if (!association.firstline) {
continue;
}

const matches = firstLine.match(association.firstline);
if (matches && matches.length > 0) {
return association.mime;
}
}
}

return null;
}

interface MapExtToMediaMimes {
[index: string]: string;
}
Expand Down
121 changes: 1 addition & 120 deletions src/vs/base/test/common/mime.test.ts
Expand Up @@ -4,129 +4,10 @@
*--------------------------------------------------------------------------------------------*/

import * as assert from 'assert';
import { guessMimeTypes, normalizeMimeType, registerTextMime } from 'vs/base/common/mime';
import { URI } from 'vs/base/common/uri';
import { normalizeMimeType } from 'vs/base/common/mime';

suite('Mime', () => {

test('Dynamically Register Text Mime', () => {
let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['application/unknown']);

registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

registerTextMime({ id: 'codefile', filename: 'Codefile', mime: 'text/code' });
guess = guessMimeTypes(URI.file('Codefile'));
assert.deepStrictEqual(guess, ['text/code', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.Codefile'));
assert.deepStrictEqual(guess, ['application/unknown']);

registerTextMime({ id: 'docker', filepattern: 'Docker*', mime: 'text/docker' });
guess = guessMimeTypes(URI.file('Docker-debug'));
assert.deepStrictEqual(guess, ['text/docker', 'text/plain']);

guess = guessMimeTypes(URI.file('docker-PROD'));
assert.deepStrictEqual(guess, ['text/docker', 'text/plain']);

registerTextMime({ id: 'niceregex', mime: 'text/nice-regex', firstline: /RegexesAreNice/ });
guess = guessMimeTypes(URI.file('Randomfile.noregistration'), 'RegexesAreNice');
assert.deepStrictEqual(guess, ['text/nice-regex', 'text/plain']);

guess = guessMimeTypes(URI.file('Randomfile.noregistration'), 'RegexesAreNotNice');
assert.deepStrictEqual(guess, ['application/unknown']);

guess = guessMimeTypes(URI.file('Codefile'), 'RegexesAreNice');
assert.deepStrictEqual(guess, ['text/code', 'text/plain']);
});

test('Mimes Priority', () => {
registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
registerTextMime({ id: 'foobar', mime: 'text/foobar', firstline: /foobar/ });

let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco'), 'foobar');
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

registerTextMime({ id: 'docker', filename: 'dockerfile', mime: 'text/winner' });
registerTextMime({ id: 'docker', filepattern: 'dockerfile*', mime: 'text/looser' });
guess = guessMimeTypes(URI.file('dockerfile'));
assert.deepStrictEqual(guess, ['text/winner', 'text/plain']);

registerTextMime({ id: 'azure-looser', mime: 'text/azure-looser', firstline: /azure/ });
registerTextMime({ id: 'azure-winner', mime: 'text/azure-winner', firstline: /azure/ });
guess = guessMimeTypes(URI.file('azure'), 'azure');
assert.deepStrictEqual(guess, ['text/azure-winner', 'text/plain']);
});

test('Specificity priority 1', () => {
registerTextMime({ id: 'monaco2', extension: '.monaco2', mime: 'text/monaco2' });
registerTextMime({ id: 'monaco2', filename: 'specific.monaco2', mime: 'text/specific-monaco2' });

assert.deepStrictEqual(guessMimeTypes(URI.file('specific.monaco2')), ['text/specific-monaco2', 'text/plain']);
assert.deepStrictEqual(guessMimeTypes(URI.file('foo.monaco2')), ['text/monaco2', 'text/plain']);
});

test('Specificity priority 2', () => {
registerTextMime({ id: 'monaco3', filename: 'specific.monaco3', mime: 'text/specific-monaco3' });
registerTextMime({ id: 'monaco3', extension: '.monaco3', mime: 'text/monaco3' });

assert.deepStrictEqual(guessMimeTypes(URI.file('specific.monaco3')), ['text/specific-monaco3', 'text/plain']);
assert.deepStrictEqual(guessMimeTypes(URI.file('foo.monaco3')), ['text/monaco3', 'text/plain']);
});

test('Mimes Priority - Longest Extension wins', () => {
registerTextMime({ id: 'monaco', extension: '.monaco', mime: 'text/monaco' });
registerTextMime({ id: 'monaco', extension: '.monaco.xml', mime: 'text/monaco-xml' });
registerTextMime({ id: 'monaco', extension: '.monaco.xml.build', mime: 'text/monaco-xml-build' });

let guess = guessMimeTypes(URI.file('foo.monaco'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco.xml'));
assert.deepStrictEqual(guess, ['text/monaco-xml', 'text/plain']);

guess = guessMimeTypes(URI.file('foo.monaco.xml.build'));
assert.deepStrictEqual(guess, ['text/monaco-xml-build', 'text/plain']);
});

test('Mimes Priority - User configured wins', () => {
registerTextMime({ id: 'monaco', extension: '.monaco.xnl', mime: 'text/monaco', userConfigured: true });
registerTextMime({ id: 'monaco', extension: '.monaco.xml', mime: 'text/monaco-xml' });

let guess = guessMimeTypes(URI.file('foo.monaco.xnl'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);
});

test('Mimes Priority - Pattern matches on path if specified', () => {
registerTextMime({ id: 'monaco', filepattern: '**/dot.monaco.xml', mime: 'text/monaco' });
registerTextMime({ id: 'other', filepattern: '*ot.other.xml', mime: 'text/other' });

let guess = guessMimeTypes(URI.file('/some/path/dot.monaco.xml'));
assert.deepStrictEqual(guess, ['text/monaco', 'text/plain']);
});

test('Mimes Priority - Last registered mime wins', () => {
registerTextMime({ id: 'monaco', filepattern: '**/dot.monaco.xml', mime: 'text/monaco' });
registerTextMime({ id: 'other', filepattern: '**/dot.monaco.xml', mime: 'text/other' });

let guess = guessMimeTypes(URI.file('/some/path/dot.monaco.xml'));
assert.deepStrictEqual(guess, ['text/other', 'text/plain']);
});

test('Data URIs', () => {
registerTextMime({ id: 'data', extension: '.data', mime: 'text/data' });

assert.deepStrictEqual(guessMimeTypes(URI.parse(`data:;label:something.data;description:data,`)), ['text/data', 'text/plain']);
});

test('normalize', () => {
assert.strictEqual(normalizeMimeType('invalid'), 'invalid');
assert.strictEqual(normalizeMimeType('invalid', true), undefined);
Expand Down