Skip to content
Open
11 changes: 11 additions & 0 deletions docs/LANGUAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ The contents of the file should be as follows:
```json
{
"name": string,
"quotationMarks": {
"primary": tuple[string, string],
"secondary": tuple[string, string]
},
"rightToLeft": boolean,
"joiningScript": boolean,
"orderedByFrequency": boolean,
Expand All @@ -29,6 +33,13 @@ The contents of the file should be as follows:
```

It is recommended that you familiarize yourselves with JSON before adding a language. For the `name` field, put the name of your language.
`quotationMarks` indicates the quotation marks used for that language, at depth 1 (`primary`) and 2 (`secondary`) respectively, and for the left quotation mark and the right one respectively. For instance, American English uses `“”` at depth 1 and `‘’` at depth 2. In that case it would look like:
```json
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
```
`rightToLeft` indicates how the language is written. If it is written right to left then put `true`, otherwise put `false`.
`joiningScript` indicates whether the language requires joining letters to render correctly. Set it to `true` if characters must join with surrounding characters or if their shapes change based on position in a word (initial, medial, final, or isolated), or if they use connecting marks (matras/vowel signs) that reshape the base characters. Otherwise, set it to `false.`
For `bcp47` put your languages [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag).
Expand Down
18 changes: 9 additions & 9 deletions frontend/__tests__/test/british-english.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,23 @@ describe("british-english", () => {
});

it("should convert double quotes to single quotes", async () => {
await expect(replace('"hello"', "")).resolves.toEqual("'hello'");
await expect(replace('"test"', "")).resolves.toEqual("'test'");
await expect(replace('"Hello World"', "")).resolves.toEqual(
"'Hello World'",
await expect(replace("“hello”", "")).resolves.toEqual("hello");
await expect(replace("“test”", "")).resolves.toEqual("test");
await expect(replace("“Hello World”", "")).resolves.toEqual(
"Hello World",
);
});

it("should convert double quotes and replace words", async () => {
await expect(replace('"color"', "")).resolves.toEqual("'colour'");
await expect(replace('"math"', "")).resolves.toEqual("'maths'");
await expect(replace('"Color"', "")).resolves.toEqual("'Colour'");
await expect(replace("“color”", "")).resolves.toEqual("colour");
await expect(replace("“math”", "")).resolves.toEqual("maths");
await expect(replace("“Color”", "")).resolves.toEqual("Colour");
});

it("should handle multiple double quotes in a word", async () => {
await expect(
replace('He said "hello" and "goodbye"', ""),
).resolves.toEqual("He said 'hello' and 'goodbye'");
replace("He said hello and goodbye”", ""),
).resolves.toEqual("He said hello and goodbye");
});

it("should not affect words without double quotes", async () => {
Expand Down
13 changes: 10 additions & 3 deletions frontend/src/ts/test/british-english.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ export async function replace(
previousWord: string,
): Promise<string> {
// Convert American-style double quotes to British-style single quotes
if (word.includes('"')) {
word = word.replace(/"/g, "'");
}
const us2ukQuotes = {
"“": "‘",
"”": "’",
"‘": "“",
"’": "”",
};
word = word.replace(
/[“”‘’]/g,
(char) => us2ukQuotes[char as keyof typeof us2ukQuotes],
);

if (word.includes("-")) {
//this handles hyphenated words (for example "cream-colored") to make sure
Expand Down
32 changes: 15 additions & 17 deletions frontend/src/ts/test/words-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ export async function punctuateWord(
currentWord: string,
index: number,
maxindex: number,
language: LanguageObject,
): Promise<string> {
let word = currentWord;

const currentLanguage = Config.language.split("_")[0];
const currentLanguage = language.name.split("_")[0];

const lastChar = Strings.getLastChar(previousWord);

Expand Down Expand Up @@ -140,22 +141,18 @@ export async function punctuateWord(
}
}
}
} else if (
random() < 0.01 &&
lastChar !== "," &&
lastChar !== "." &&
currentLanguage !== "russian"
) {
word = `"${word}"`;
} else if (
random() < 0.011 &&
lastChar !== "," &&
lastChar !== "." &&
currentLanguage !== "russian" &&
currentLanguage !== "ukrainian" &&
currentLanguage !== "slovak"
) {
word = `'${word}'`;
} else if (random() < 0.01 && lastChar !== "," && lastChar !== ".") {
if (language?.quotationMarks !== undefined) {
word = `${language.quotationMarks.primary[0]}${word}${language.quotationMarks.primary[1]}`;
} else {
word = `"${word}"`;
}
} else if (random() < 0.011 && lastChar !== "," && lastChar !== ".") {
if (language?.quotationMarks !== undefined) {
word = `${language.quotationMarks.secondary[0]}${word}${language.quotationMarks.secondary[1]}`;
} else {
word = `'${word}'`;
}
} else if (random() < 0.012 && lastChar !== "," && lastChar !== ".") {
if (currentLanguage === "code") {
const r = random();
Expand Down Expand Up @@ -941,6 +938,7 @@ export async function getNextWord(
randomWord,
wordIndex,
wordsBound,
currentLanguage,
);
}

Expand Down
19 changes: 17 additions & 2 deletions frontend/src/ts/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,23 @@ export function isWordRightToLeft(
}

export const CHAR_EQUIVALENCE_SETS = [
new Set(["’", "‘", "'", "ʼ", "׳", "ʻ", "᾽", "᾽"]),
new Set([`"`, "”", "“", "„"]),
new Set([
"’",
"‘",
"'",
"ʼ",
"׳",
"ʻ",
"᾽",
"᾽",
"‛",
"‚",
"‹",
"›",
"『",
"』",
]),
new Set([`"`, "”", "“", "‟", "„", "⹂", "«", "»", "《", "》", "「", "」"]),
new Set(["–", "—", "-", "‐"]),
new Set([",", "‚"]),
];
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/afrikaans_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "afrikaans_10k",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
"words": [
"aan",
"aanbetref",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/afrikaans_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "afrikaans_1k",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["‘", "’"]
},
"noLazyMode": true,
"words": [
"sterker",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/albanian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "albanian",
"quotationMarks": {
"primary": ["„", "”"],
"secondary": ["‘", "’"]
},
"words": [
"të",
"e",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/albanian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "albanian_1k",
"quotationMarks": {
"primary": ["„", "”"],
"secondary": ["‘", "’"]
},
"words": [
"I",
"e tij",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"እግዚአብሔር",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"መለየት",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/amharic_5k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "amharic_5k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["‹", "›"]
},
"bcp47": "am-ET",
"words": [
"ሙዚቀኝነት",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-SA",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_10k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-SA",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_egypt.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_egypt",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-EG",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_egypt_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_egypt_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"bcp47": "ar-EG",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/arabic_morocco.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "arabic_morocco",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"rightToLeft": true,
"joiningScript": true,
"orderedByFrequency": false,
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"noLazyMode": true,
"orderedByFrequency": false,
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"noLazyMode": true,
"orderedByFrequency": false,
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_western.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_western",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"bcp47": "hyw",
"words": [
"կանանց",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/armenian_western_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "armenian_western_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"bcp47": "hyw",
"words": [
"թարգմանուած",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/azerbaijani.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "azerbaijani",
"quotationMarks": {
"primary": ["“", "”"],
"secondary": ["\"", "\""]
},
"bcp47": "az-AZ",
"words": [
"dərs",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/azerbaijani_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "azerbaijani_1k",
"quotationMarks": {
"primary": ["", ""],
"secondary": ["\"", "\""]
},
"bcp47": "az-AZ",
"words": [
"dərs",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"noLazyMode": true,
"bcp47": "be-BY",
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_100k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_100k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_10k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_10k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["", ""]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_1k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_1k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"noLazyMode": true,
"bcp47": "be-BY",
"words": [
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_25k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_25k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
4 changes: 4 additions & 0 deletions frontend/static/languages/belarusian_50k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"name": "belarusian_50k",
"quotationMarks": {
"primary": ["«", "»"],
"secondary": ["„", "“"]
},
"bcp47": "be-BY",
"words": [
"а",
Expand Down
Loading
Loading