Skip to content

Commit

Permalink
Language tag matching expands to extended form
Browse files Browse the repository at this point in the history
  • Loading branch information
ptr727 committed Apr 6, 2023
1 parent f3a22b9 commit 7ca575f
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 10 deletions.
20 changes: 16 additions & 4 deletions PlexCleaner/Language.cs
Expand Up @@ -152,11 +152,9 @@ public static bool IsUndefined(string language)
return string.IsNullOrEmpty(language) || language.Equals(Undefined, StringComparison.OrdinalIgnoreCase);
}

public static bool IsMatch(string prefix, string language)
public bool IsMatch(string prefix, string language)
{
// TODO: Is there an easy to use C# BCP 47 matcher?
// https://r12a.github.io/app-subtags/
// https://www.loc.gov/standards/iso639-2/php/langcodes-search.php

// zh match: zh: zh, zh-Hant, zh-Hans, zh-cmn-Hant
// zho not: zh
Expand All @@ -176,11 +174,25 @@ public static bool IsMatch(string prefix, string language)
return true;
}

// Get the extended format of the language
// E.g. cmn-Hant should be expanded to zh-cmn-Hant else zh will not match

// Find a matching RFC 5646 record
var rfc5646 = Rfc5646.Find(language, false);
if (rfc5646 != null)
{
// If the lookup is different then rematch
if (!string.Equals(language, rfc5646.TagAny, StringComparison.OrdinalIgnoreCase))
{
return IsMatch(prefix, rfc5646.TagAny);
}
}

// No match
return false;
}

public static bool IsMatch(string language, IEnumerable<string> prefixList)
public bool IsMatch(string language, IEnumerable<string> prefixList)
{
// Match language with any of the prefixes
return prefixList.Any(prefix => IsMatch(prefix, language));
Expand Down
2 changes: 1 addition & 1 deletion PlexCleaner/ProcessFile.cs
Expand Up @@ -1910,7 +1910,7 @@ public SelectMediaInfo FindUnwantedLanguageTracks()
// Select tracks with wanted languages, or the original language if set to keep
// Selected is Keep
// NotSelected is Remove
SelectMediaInfo selectMediaInfo = new(MkvMergeInfo, item => Language.IsMatch(item.LanguageIetf, Program.Config.ProcessOptions.KeepLanguages) ||
SelectMediaInfo selectMediaInfo = new(MkvMergeInfo, item => Language.Singleton.IsMatch(item.LanguageIetf, Program.Config.ProcessOptions.KeepLanguages) ||
(Program.Config.ProcessOptions.KeepOriginalLanguage && item.Flags.HasFlag(TrackInfo.FlagsType.Original)));

// Keep at least one video track if any
Expand Down
7 changes: 4 additions & 3 deletions PlexCleanerTests/LanguageTests.cs
Expand Up @@ -27,11 +27,12 @@ public void Convert_Iso_To_Ietf(string tag, string ietf)
[InlineData("en", "en-US")]
[InlineData("en", "en-GB")]
[InlineData("en-GB", "en-GB")]
[InlineData("zh", "zh-Hant")]
[InlineData("zh", "zh-cmn-Hant")]
[InlineData("zh", "cmn-Hant")]
[InlineData("sr-Latn", "sr-Latn-RS")]
public void Match_Language_Tags(string prefix, string tag)
{
Assert.True(Language.IsMatch(prefix, tag));
Assert.True(Language.Singleton.IsMatch(prefix, tag));
}

[Theory]
Expand All @@ -40,7 +41,7 @@ public void Match_Language_Tags(string prefix, string tag)
[InlineData("zh-Hant", "zh-Hans")]
public void NotMatch_Language_Tags(string prefix, string tag)
{
Assert.False(Language.IsMatch(prefix, tag));
Assert.False(Language.Singleton.IsMatch(prefix, tag));
}

[Theory]
Expand Down
3 changes: 1 addition & 2 deletions README.md
Expand Up @@ -27,7 +27,7 @@ Docker images are published on [Docker Hub](https://hub.docker.com/u/ptr727/plex

- Version 3.0:
- Switched docker base image from `ubuntu:latest` to `archlinux:latest`.
- The up to date FFmpeg and HandBrake PPA installations provided by Rob Savoury are [no longer freely available](https://launchpad.net/~savoury1), a big historic thank you to Rob.
- The always up to date FFmpeg and HandBrake PPA installations provided by Rob Savoury are [no longer generally available](https://launchpad.net/~savoury1), a big historic thank you to Rob.
- Switched to [Arch Linux](https://archlinux.org/) with up to date media tools found in the [Arch Linux Package Repository](https://archlinux.org/packages/).
- Switched from .NET 6 to .NET 7.
- Utilizing some new capabilities, e.g. `GeneratedRegex` and `LibraryImport`.
Expand All @@ -51,7 +51,6 @@ Docker images are published on [Docker Hub](https://hub.docker.com/u/ptr727/plex
- If you care and can, please do communicate the need for IETF language support to the FFmpeg and HandBrake development teams.
- Added warnings and attempt to repair when the Language and LanguageIetf are set and are invalid or do not match.
- `MkvMerge --identify` added the `--normalize-language-ietf extlang` option to reported e.g. `zh-cmn-Hant` vs. the normalized `cmn-Hant`.
- Old MkvMerge sidecar information will be out of data, recommend recreating sidecar files using `createsidecar` option.
- Added `ProcessOptions:KeepOriginalLanguage` to keep tracks marked as [original language](https://www.ietf.org/archive/id/draft-ietf-cellar-matroska-15.html#name-original-flag).
- Added `ProcessOptions:RemoveClosedCaptions` to conditionally vs. always remove closed captions.
- Added `ProcessOptions:SetTrackFlags` to set track flags based on track title keywords, e.g. `SDH` -> `HearingImpaired`.
Expand Down

0 comments on commit 7ca575f

Please sign in to comment.