Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

external_files: detect locales with a region like en-US #13916

Merged
merged 4 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
55 changes: 54 additions & 1 deletion misc/language.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <stdint.h>

#include "common/common.h"
#include "misc/bstr.h"
#include "misc/ctype.h"
kasper93 marked this conversation as resolved.
Show resolved Hide resolved

#define L(s) { #s, sizeof(#s) - 1 }

Expand Down Expand Up @@ -296,3 +296,56 @@ int mp_match_lang(char **langs, const char *lang)
talloc_free(ta_ctx);
return best_score;
}

bstr mp_guess_lang_from_filename(bstr name, int *lang_start)
{
name = bstr_strip(bstr_strip_ext(name));

if (name.len < 2)
return (bstr){0};

int lang_length = 0;
int i = name.len - 1;
int suffixes_length = 0;

char delimiter = '.';
if (name.start[i] == ')') {
delimiter = '(';
i--;
}
if (name.start[i] == ']') {
delimiter = '[';
i--;
}

while (true) {
while (i >= 0 && mp_isalpha(name.start[i])) {
lang_length++;
i--;
}

// According to
// https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags
// subtags after the first are composed of 1 to 8 letters.
if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8)
return (bstr){0};

if (i >= 0 && name.start[i] == '-') {
lang_length++;
i--;
suffixes_length = lang_length;
} else {
break;
}
}

// The primary subtag can have 2 or 3 letters.
if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 ||
i <= 0 || name.start[i] != delimiter)
return (bstr){0};

if (lang_start)
*lang_start = i;

return (bstr){name.start + i + 1, lang_length};
}
3 changes: 3 additions & 0 deletions misc/language.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H

#include "misc/bstr.h"

// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
bstr mp_guess_lang_from_filename(bstr name, int *lang_start);

#endif /* MP_LANGUAGE_H */
3 changes: 2 additions & 1 deletion player/command.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
#include "options/path.h"
#include "screenshot.h"
#include "misc/dispatch.h"
#include "misc/language.h"
#include "misc/node.h"
#include "misc/thread_pool.h"
#include "misc/thread_tools.h"
Expand Down Expand Up @@ -6005,7 +6006,7 @@ static void cmd_track_reload(void *p)
struct track *nt = mpctx->tracks[nt_num];

if (!nt->lang)
nt->lang = mp_guess_lang_from_filename(nt, nt->external_filename);
nt->lang = bstrto0(nt, mp_guess_lang_from_filename(bstr0(nt->external_filename), NULL));

mp_switch_track(mpctx, nt->type, nt, 0);
print_track_list(mpctx, "Reloaded:");
Expand Down
50 changes: 4 additions & 46 deletions player/external_files.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#include "common/common.h"
#include "common/global.h"
#include "common/msg.h"
#include "misc/ctype.h"
#include "misc/charset_conv.h"
#include "misc/language.h"
#include "options/options.h"
#include "options/path.h"
#include "external_files.h"
Expand Down Expand Up @@ -108,46 +108,6 @@ static int compare_sub_priority(const void *a, const void *b)
return strcoll(s1->fname, s2->fname);
}

static struct bstr guess_lang_from_filename(struct bstr name, int *fn_start)
{
if (name.len < 2)
return (struct bstr){NULL, 0};

int n = 0;
int i = name.len - 1;

char thing = '.';
if (name.start[i] == ')') {
thing = '(';
i--;
}
if (name.start[i] == ']') {
thing = '[';
i--;
}

while (i >= 0 && mp_isalpha(name.start[i])) {
n++;
if (n > 3)
return (struct bstr){NULL, 0};
i--;
}

if (n < 2 || i == 0 || name.start[i] != thing)
return (struct bstr){NULL, 0};

*fn_start = i;
return (struct bstr){name.start + i + 1, n};
}

char *mp_guess_lang_from_filename(void* ctx, const char *filename)
{
bstr filename_no_ext = bstr_strip_ext(bstr0(filename));
int start = 0; // only used in append_dir_subtitles()
char *lang = bstrto0(ctx, guess_lang_from_filename(filename_no_ext, &start));
return lang;
}

static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
struct subfn **slist, int *nsub,
struct bstr path, const char *fname,
Expand All @@ -160,7 +120,6 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
struct bstr f_fname = mp_iconv_to_utf8(log, f_fbname,
"UTF-8-MAC", MP_NO_LATIN1_FALLBACK);
struct bstr f_fname_noext = bstrdup(tmpmem, bstr_strip_ext(f_fname));
bstr_lower(f_fname_noext);
struct bstr f_fname_trim = bstr_strip(f_fname_noext);

if (f_fbname.start != f_fname.start)
Expand All @@ -183,7 +142,6 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
"UTF-8-MAC", MP_NO_LATIN1_FALLBACK);
// retrieve various parts of the filename
struct bstr tmp_fname_noext = bstrdup(tmpmem2, bstr_strip_ext(dename));
bstr_lower(tmp_fname_noext);
struct bstr tmp_fname_ext = bstr_get_ext(dename);
struct bstr tmp_fname_trim = bstr_strip(tmp_fname_noext);

Expand Down Expand Up @@ -215,13 +173,13 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
// higher prio -> auto-selection may prefer it (0 = not loaded)
int prio = 0;

if (bstrcmp(tmp_fname_trim, f_fname_trim) == 0)
if (bstrcasecmp(tmp_fname_trim, f_fname_trim) == 0)
prio |= 32; // exact movie name match

bstr lang = {0};
int start = 0;
lang = guess_lang_from_filename(tmp_fname_trim, &start);
if (bstr_startswith(tmp_fname_trim, f_fname_trim)) {
lang = mp_guess_lang_from_filename(dename, &start);
if (bstr_case_startswith(tmp_fname_trim, f_fname_trim)) {
if (lang.len && start == f_fname_trim.len)
prio |= 16; // exact movie name + followed by lang

Expand Down
1 change: 0 additions & 1 deletion player/external_files.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,5 @@ struct subfn *find_external_files(struct mpv_global *global, const char *fname,

bool mp_might_be_subtitle_file(const char *filename);
void mp_update_subtitle_exts(struct MPOpts *opts);
char *mp_guess_lang_from_filename(void *talloc_ctx, const char *filename);

#endif /* MPLAYER_FINDFILES_H */
23 changes: 23 additions & 0 deletions test/language.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,27 @@ int main(void)
assert_int_equal(mp_match_lang(LANGS("ax") , NULL) , 0);
assert_int_equal(mp_match_lang(LANGS("") , "ax") , 0);
assert_int_equal(mp_match_lang((char*[]){NULL} , "ax") , 0);

void *ta_ctx = talloc_new(NULL);
int start; // this is actually the position of the delimiter.

assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en.srt"), &start)), "en");
assert_int_equal(start, 3);
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.eng.srt"), NULL)), "eng");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.e.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.engg.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.00.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0(NULL), NULL)), "");

assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-US.srt"), NULL)), "en-US");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-simple.srt"), NULL)), "en-simple");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.sgn-FSL.srt"), NULL)), "sgn-FSL");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.gsw-u-sd-chzh.srt"), NULL)), "gsw-u-sd-chzh");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-US-.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-aaaaaaaaa.srt"), NULL)), "");
assert_string_equal(bstrto0(ta_ctx, mp_guess_lang_from_filename(bstr0("foo.en-0.srt"), NULL)), "");
kasper93 marked this conversation as resolved.
Show resolved Hide resolved

talloc_free(ta_ctx);
}