Skip to content

Commit

Permalink
Merge pull request #29 from mdaubie/master
Browse files Browse the repository at this point in the history
Releasing v1.0.0
  • Loading branch information
Matthieu Daubié committed May 4, 2023
2 parents e082737 + abb326b commit 3b36202
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 41 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,30 @@ Library for parsing subtitles files using regexes
- Currently: SubRip
- Upcoming: MicroDVD, WebVTT, SSA, ASS

### How to use

- Parse your file with the Parser class, you will need the format of the file and its path
- Apply your changes: for example fix the overlapping subtitles in the file with the dedicated tool
- Serialize your file back, you will need the format of the file and the path of the new file

```java
public class Main {
public static void main(String[] args) throws IOException {
File mySubtitlesFile = new File("path/myFile");
SubRipFile parsedFile = new Parser<>(SUB_RIP_FORMAT).parseFile(mySubtitlesFile);
FixOverlappingSubtitles.fixOverlapping(parsedFile, FixOverlappingSubtitles.STRATEGY.MERGE);
new Serializer<>(SUB_RIP_FORMAT).writeToFile(parsedFile, new File("path/myFileFixed"));
}
}
```

### Upcoming features

- New subtitles formats: MicroDVD, WebVTT, SSA, ASS
- New tool: subtitles shifting
- Format conversion: parse a SubRip file and serialize it as an SSA file to customize it for example
- Format auto-detection: might be done simply on file extension, or by pattern recognition

### Related projects

I am working on a web app project to handle my collection of downloaded movies and series, you can find it
Expand Down
70 changes: 70 additions & 0 deletions src/main/java/io/github/mdaubie/subtitlesparser/Serializer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package io.github.mdaubie.subtitlesparser;

import io.github.mdaubie.subtitlesparser.model.Format;
import io.github.mdaubie.subtitlesparser.model.PatternedObject;
import io.github.mdaubie.subtitlesparser.model.SubtitlesFile;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.rmi.UnexpectedException;
import java.time.LocalTime;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public record Serializer<SF extends SubtitlesFile>(Format<SF> format) {
public void writeToFile(SubtitlesFile subtitlesFile, File file) throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(file));
writer.append(serialize(subtitlesFile));
writer.close();
}

public String serialize(SubtitlesFile sf) throws UnexpectedException {
return dynamicSerialize(sf);
}

private String dynamicSerialize(PatternedObject object) throws UnexpectedException {
String template = patternToStringTemplate(PatternHolder.getPattern(object.getClass()));
try {
Class<?> type = object.getClass();
for (Field field : type.getFields()) {
boolean isAccessible = field.canAccess(object);
if (!isAccessible) field.setAccessible(true);
template = template.replace(field.getName(), serializeAttribute(object, field));
if (!isAccessible) field.setAccessible(false);
}
return template;
} catch (IllegalAccessException e) {
e.printStackTrace();
return null;
}
}

private String serializeAttribute(Object object, Field field) throws UnexpectedException, IllegalAccessException {
Class<?> type = field.getType();
if (type == String.class) return String.valueOf(field.get(object));
if (type == Integer.class) return String.valueOf(field.get(object));
if (type == LocalTime.class) return ((LocalTime) field.get(object)).format(format.timestampsFormat());
if (type == List.class) return serializeList(object, field);
throw new UnexpectedException(String.format("Type %s is not handled by Serializer", type));
}

private String serializeList(Object object, Field field) throws UnexpectedException, IllegalAccessException {
StringBuilder content = new StringBuilder();
List<PatternedObject> list = (List<PatternedObject>) field.get(object);
//TODO include primitive objects (non-patterned)
for (PatternedObject o : list) content.append(dynamicSerialize(o));
return content.toString();
}

// method to transform regex Pattern into corresponding String template, with placeholders to be replaced by actual values
// TODO we should actually use something similar as the pattern holder, to not reprocess it everytime
public static String patternToStringTemplate(Pattern pattern) {
String regex = "(\\(\\?<(?<groupName>.*?)>.*?\\))";
Matcher matcher = Pattern.compile(regex).matcher(pattern.pattern());
return matcher.replaceAll("${groupName}").replace("\\n", "\n");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package io.github.mdaubie.subtitlesparser.toolbox;

import io.github.mdaubie.subtitlesparser.model.Subtitle;
import io.github.mdaubie.subtitlesparser.model.SubtitlesFile;

import java.security.InvalidParameterException;
import java.util.List;

public class FixOverlappingSubtitles {
/**
* The available strategies
*/
public enum STRATEGY {
/**
* Merges two consecutive subtitles if they are overlapping
*/
MERGE,
/**
* Shorten the length of the first subtitle of two consecutive subtitles if they are overlapping
*/
SHORTEN_FIRST,
/**
* Delay the start of the second subtitle of two consecutive subtitles if they are overlapping
*/
DELAY_SECOND,
}

/**
* Fix overlapping subtitles in SubtitlesFile object
*
* @param sf The SubtitleFile object for which to fix the subtitles
* @param strategy The strategy with which to fix the subtitles
*/
public static void fixOverlapping(SubtitlesFile sf, STRATEGY strategy) {
//TODO the subtitles number should be shifted too
List<? extends Subtitle> subtitles = sf.getSubtitles();
subtitles.get(0).number = 1;
for (int i = 0; i < subtitles.size() - 1; i++) {
Subtitle current = subtitles.get(i);
Subtitle next = subtitles.get(i + 1);
//TODO test this, is the List updated properly ?
if (current.start.compareTo(next.start) > 0) {
Subtitle temp = current;
current = next;
next = temp;
}
if (overlaps(current, next)) {
switch (strategy) {
case SHORTEN_FIRST -> current.end = next.start;
case DELAY_SECOND -> next.start = current.end;
case MERGE -> {
current.end = next.end;
current.content = current.content + System.lineSeparator() + next.content;
subtitles.remove(i + 1);
next = subtitles.get(i + 1);
i--;
}
default -> throw new InvalidParameterException("Invalid strategy provided");
}
}
if (current.number != next.number + 1)
next.number = current.number + 1;
}
}

/**
* Check that the two subtitles provided do not overlap
*
* @param first first Subtitle, is expected to be prior to the second
* @param second second Subtitle, is expected to be later than the first
* @return boolean: whether the subtitles overlap or not
*/
protected static boolean overlaps(Subtitle first, Subtitle second) {
if (first.start.compareTo(second.start) > 0)
throw new InvalidParameterException("first Subtitle should be prior to second Subtitle");
return second.start.compareTo(first.end) < 0;
}
}
47 changes: 6 additions & 41 deletions src/test/java/io/github/mdaubie/subtitlesparser/ParserTest.java
Original file line number Diff line number Diff line change
@@ -1,64 +1,29 @@
package io.github.mdaubie.subtitlesparser;

import io.github.mdaubie.subtitlesparser.model.Format;
import io.github.mdaubie.subtitlesparser.model.SubRipFile;
import io.github.mdaubie.subtitlesparser.model.SubRipSubtitle;
import io.github.mdaubie.subtitlesparser.model.SubtitlesFile;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.time.LocalTime;
import java.util.ArrayList;
import java.util.function.Supplier;
import java.util.stream.Stream;

import static io.github.mdaubie.subtitlesparser.constants.SUB_FILE_FORMATS.*;
import static io.github.mdaubie.subtitlesparser.constants.SUB_FILE_FORMATS.SUB_RIP_FORMAT;
import static org.assertj.core.api.Assertions.assertThat;


class ParserTest {
@ParameterizedTest
@MethodSource("parse")
void parse(String text, Format<? extends SubtitlesFile> format, SubtitlesFile result) throws Exception {
void parse(Format<? extends SubtitlesFile> format, String text, Supplier<SubtitlesFile> expectedResult) throws Exception {
SubtitlesFile actualResult = new Parser<>(format).parse(text);
Assertions.assertThat(actualResult)
.usingRecursiveComparison()
.isEqualTo(result);
assertThat(actualResult).usingRecursiveComparison().isEqualTo(expectedResult.get());
}

public static Stream<Arguments> parse() {
String fileContent = """
1
00:00:51,093 --> 00:00:52,635
(ALARM RINGING)
2
00:00:56,473 --> 00:00:58,266
(FOOTSTEPS THUDDING)
""";
SubRipFile sf = new SubRipFile();
sf.subtitles = new ArrayList<>() {{
add(getSubRipSubtitle(1,
LocalTime.of(0, 0, 51, 93 * 1000000),
LocalTime.of(0, 0, 52, 635 * 1000000),
"(ALARM RINGING)"));
add(getSubRipSubtitle(2,
LocalTime.of(0, 0, 56, 473 * 1000000),
LocalTime.of(0, 0, 58, 266 * 1000000),
"(FOOTSTEPS THUDDING)"));
}};
return Stream.of(
Arguments.of(fileContent, SUB_RIP_FORMAT, sf)
Arguments.of(SUB_RIP_FORMAT, TestObjects.SubRip.text1, TestObjects.SubRip.object1)
);
}

private static SubRipSubtitle getSubRipSubtitle(int number, LocalTime start, LocalTime end, String content) {
SubRipSubtitle subRipSubtitle = new SubRipSubtitle();
subRipSubtitle.number = number;
subRipSubtitle.start = start;
subRipSubtitle.end = end;
subRipSubtitle.content = content;
return subRipSubtitle;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package io.github.mdaubie.subtitlesparser;

import io.github.mdaubie.subtitlesparser.model.Format;
import io.github.mdaubie.subtitlesparser.model.SubtitlesFile;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import static io.github.mdaubie.subtitlesparser.constants.SUB_FILE_FORMATS.SUB_RIP_FORMAT;
import static org.junit.jupiter.api.Assertions.assertEquals;

class SerializerTest {
@ParameterizedTest
@MethodSource("serialize")
void serialize(Format<? extends SubtitlesFile> format, Supplier<SubtitlesFile> input, String expectedResult) throws Exception {
String actualResult = new Serializer<>(format).serialize(input.get());
assertEquals(expectedResult, actualResult);
}

public static Stream<Arguments> serialize() {
return Stream.of(
Arguments.of(SUB_RIP_FORMAT, TestObjects.SubRip.object1, TestObjects.SubRip.text1)
);
}


@ParameterizedTest
@MethodSource("patternToStringTemplate")
void patternToStringTemplate(Pattern pattern, String expectedResult) {
String actualResult = Serializer.patternToStringTemplate(pattern);
assertEquals(expectedResult, actualResult);
}

public static Stream<Arguments> patternToStringTemplate() {

return Stream.of(
Arguments.of("(?<subtitles>.*)", "subtitles"),
Arguments.of("(?<number>[0-9]+)\\n(?<start>.*?) --> (?<end>.*?)\\n(?<content>.*?)\\n\\n", "number\nstart --> end\ncontent\n\n")
);
}
}
68 changes: 68 additions & 0 deletions src/test/java/io/github/mdaubie/subtitlesparser/TestObjects.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package io.github.mdaubie.subtitlesparser;

import io.github.mdaubie.subtitlesparser.model.SubRipFile;
import io.github.mdaubie.subtitlesparser.model.SubRipSubtitle;

import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Supplier;

//Using supplier to deep copy and keep the objects independent between tests
public class TestObjects {
public static class SubRip {
public static final Supplier<SubRipSubtitle> sub1 = () -> getSubRipSubtitle(1,
LocalTime.of(0, 0, 51, 93 * 1000000),
LocalTime.of(0, 0, 52, 635 * 1000000),
"This is the first dialogue");
public static final Supplier<SubRipSubtitle> sub2 = () -> getSubRipSubtitle(2,
LocalTime.of(0, 0, 56, 473 * 1000000),
LocalTime.of(0, 0, 58, 266 * 1000000),
"This is the second dialogue");
public static final Supplier<SubRipSubtitle> sub3 = () -> getSubRipSubtitle(3,
LocalTime.of(0, 0, 57, 908 * 1000000),
LocalTime.of(0, 0, 59, 701 * 1000000),
"This is the third dialogue");
public static final Supplier<SubRipSubtitle> sub3FixedDelay = () -> getSubRipSubtitle(3,
LocalTime.of(0, 0, 58, 266 * 1000000),
LocalTime.of(0, 0, 59, 701 * 1000000),
"This is the third dialogue");
public static final Supplier<SubRipSubtitle> sub3FixedMerge = () -> getSubRipSubtitle(2,
LocalTime.of(0, 0, 56, 473 * 1000000),
LocalTime.of(0, 0, 59, 701 * 1000000),
"This is the second dialogue"+System.lineSeparator()+"This is the third dialogue");
public static final Supplier<SubRipFile> object1 = () -> getSubRipFile(sub1.get(), sub2.get(), sub3.get());
public static final Supplier<SubRipFile> object1FixedDelay = ()->getSubRipFile(sub1.get(), sub2.get(), sub3FixedDelay.get());
public static final Supplier<SubRipFile> object2 = () -> getSubRipFile(sub2.get(), sub1.get(), sub3.get());
public static final Supplier<SubRipFile> object2FixedMerge = ()->getSubRipFile(sub1.get(), sub3FixedMerge.get());
public static final String text1 = """
1
00:00:51,093 --> 00:00:52,635
This is the first dialogue
2
00:00:56,473 --> 00:00:58,266
This is the second dialogue
3
00:00:57,908 --> 00:00:59,701
This is the third dialogue
""";

private static SubRipFile getSubRipFile(SubRipSubtitle... subtitles) {
SubRipFile file = new SubRipFile();
file.subtitles = new ArrayList<>(List.of(subtitles));
return file;
}

private static SubRipSubtitle getSubRipSubtitle(int number, LocalTime start, LocalTime end, String content) {
SubRipSubtitle subRipSubtitle = new SubRipSubtitle();
subRipSubtitle.number = number;
subRipSubtitle.start = start;
subRipSubtitle.end = end;
subRipSubtitle.content = content;
return subRipSubtitle;
}
}
}
Loading

0 comments on commit 3b36202

Please sign in to comment.