Skip to content

Commit

Permalink
Merge pull request #39 from mdaubie/master
Browse files Browse the repository at this point in the history
Releasing v1.0.2
  • Loading branch information
Matthieu Daubié committed May 7, 2023
2 parents 58a36cc + edef0db commit 6407dd5
Show file tree
Hide file tree
Showing 16 changed files with 261 additions and 44 deletions.
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Subtitles Parser
<div style="text-align: center;">
<h1>Subtitles Parser</h1>

[![version](https://img.shields.io/github/v/release/mdaubie/subtitles-parser?display_name=tag)](https://github.com/mdaubie/subtitles-parser/releases/latest)
[![release status](https://github.com/mdaubie/subtitles-parser/actions/workflows/release.yml/badge.svg)](https://github.com/mdaubie/subtitles-parser/actions/workflows/release.yml)
[![checks status](https://github.com/mdaubie/subtitles-parser/actions/workflows/test-all.yml/badge.svg)](https://github.com/mdaubie/subtitles-parser/actions/workflows/test-all.yml)
[![release status](https://github.com/mdaubie/subtitles-parser/actions/workflows/release.yml/badge.svg)](https://github.com/mdaubie/subtitles-parser/actions/workflows/release.yml)
[![publish status](https://github.com/mdaubie/subtitles-parser/actions/workflows/publish.yml/badge.svg)](https://github.com/mdaubie/subtitles-parser/actions/workflows/publish.yml)
<br>
[![license](https://img.shields.io/github/license/mdaubie/subtitles-parser)](https://github.com/mdaubie/subtitles-parser/blob/master/LICENSE)

[![version](https://img.shields.io/github/v/release/mdaubie/subtitles-parser?display_name=tag)](https://github.com/mdaubie/subtitles-parser/releases/latest)
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.github.mdaubie/subtitles-parser/badge.svg)](https://maven-badges.herokuapp.com/maven-central/io.github.mdaubie/subtitles-parser)
</div>
Library for parsing subtitles files using regexes

### Handled formats
Expand Down
58 changes: 49 additions & 9 deletions src/main/java/io/github/mdaubie/subtitlesparser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,38 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A class to parse the content of a subtitles file into a SubtitlesFile object.
* The constructor requires a Format object, corresponding to the type of SubtitlesFile to parse.
* @see Format
* @param <SF> The type of SubtitlesFile to parse
*/
public record Parser<SF extends SubtitlesFile>(Format<SF> format) {

/**
* Parse the provided file into a SubtitlesFile object
*
* @param file The file to parse
* @throws IOException If the provided file is not valid or an I/O error occurs
*/
@SuppressWarnings("unused")
public SF parseFile(File file) throws IOException {
return parse(Files.readString(file.toPath()));
}

/**
* Parse the provided String into a SubtitlesFile object
*
* @param text The file content to parse
* @throws UnexpectedException If the file content is not structured as expected by the defined format
*/
public SF parse(String text) throws UnexpectedException {
Matcher matcher = PatternHolder.getPattern(format.baseClass()).matcher(text);
if (!matcher.matches())
throw new UnexpectedException(String.format("Parser could not parse %s with format %s", text, format));
return dynamicParse(matcher, format.baseClass());
}

private <T> T dynamicParse(Matcher matcher, Class<T> type) throws UnexpectedException {
private <T extends PatternedObject> T dynamicParse(Matcher matcher, Class<T> type) throws UnexpectedException {
try {
T object = type.getConstructor().newInstance();
for (Field field : type.getFields()) {
Expand All @@ -42,28 +60,50 @@ private <T> T dynamicParse(Matcher matcher, Class<T> type) throws UnexpectedExce
}
return object;
} catch (ReflectiveOperationException e) {
e.printStackTrace();
return null;
throw new UnexpectedException("Reflective operation unexpectedly failed", e);
}
}

/**
* Parse the provided text into an object corresponding to the type of the field specified
*
* @param value The text to parse
* @param field The field in which the parsed object will be injected
* @return The parsed object
* @throws UnexpectedException If the field type is not handled by the parser
*/
//TODO we might want to register some Functions<> to handle the types
private Object parseObject(String value, Field field) throws UnexpectedException {
Class<?> type = field.getType();
if (type == String.class) return value;
if (type == Integer.class) return Integer.parseInt(value);
if (type == LocalTime.class) return LocalTime.parse(value, format.timestampsFormat());
if (type == List.class) return parseList(value, (ParameterizedType) field.getGenericType());
if (type == List.class)
return parseList(value, ((ParameterizedType) field.getGenericType()).getActualTypeArguments()[0]);
throw new UnexpectedException(String.format("Type %s is not handled by parser", type));
}

private List<Object> parseList(String value, ParameterizedType type) throws UnexpectedException {
Type elementType = type.getActualTypeArguments()[0];
/**
* Parse the provided String into a list of the specified type (expected to be a PatternedObject)
*
* @param value The text to parse
* @param type The type of the list elements
* @param <T> The generic type of the lists elements
* @return The parsed list
* @throws UnexpectedException If the specified type does not correspond to the generic type of if a parsing exception happens recursively
*/
private <T extends PatternedObject> List<T> parseList(String value, Type type) throws UnexpectedException {
//TODO might need to implement basic types handling for some of the formats
Class<? extends PatternedObject> elementClass = (Class<? extends PatternedObject>) elementType;
Class<T> elementClass;
try {
//noinspection unchecked
elementClass = (Class<T>) type;
} catch (ClassCastException e) {
throw new UnexpectedException(String.format("Provided type %s does not correspond to expected PatternedObject class", type));
}
Pattern pattern = PatternHolder.getPattern(elementClass);
Matcher matcher = pattern.matcher(value);
List<Object> list = new ArrayList<>();
List<T> list = new ArrayList<>();
while (matcher.find())
list.add(dynamicParse(matcher, elementClass));
return list;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,21 @@
import java.util.HashMap;
import java.util.regex.Pattern;

/**
* A class registering and distributing the model patterns
*/
public class PatternHolder extends HashMap<Class<? extends PatternedObject>, Pattern> {
private static final PatternHolder instance = new PatternHolder();

private PatternHolder() {
super();
}

/**
* Map the class of a patterned object to its corresponding pattern
* @param patternedObjectClass The class of the patterned object
* @return The corresponding pattern
*/
public static Pattern getPattern(Class<? extends PatternedObject> patternedObjectClass) {
if (!instance.containsKey(patternedObjectClass)) {
try {
Expand Down
98 changes: 72 additions & 26 deletions src/main/java/io/github/mdaubie/subtitlesparser/Serializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,52 +15,98 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* A class to serialize a SubtitlesFile object into a text, ready to be written to a file.
* The constructor requires a Format object, corresponding to the type of SubtitlesFile to serialize.
* @see Format
* @param <SF> The type of SubtitlesFile to serialize
*/
public record Serializer<SF extends SubtitlesFile>(Format<SF> format) {
public void writeToFile(SubtitlesFile subtitlesFile, File file) throws IOException {
/**
* Serialize the provided SubtitlesFile and write it to the provided File
*
* @param subtitlesFile Subtitles file to serialize
* @param file File to which to write the serialized result
* @throws IOException If the provided file is not valid or an I/O error occurs
*/
@SuppressWarnings("unused")
public void writeToFile(SF subtitlesFile, File file) throws IOException {
BufferedWriter writer = new BufferedWriter(new FileWriter(file));
writer.append(serialize(subtitlesFile));
writer.close();
}

public String serialize(SubtitlesFile sf) throws UnexpectedException {
/**
* Serialize a SubtitlesFile object into a ready-to-write String
*
* @param sf The subtitles file object to serialize
* @return The serialized file, ready to be written to a file
* @throws UnexpectedException If
*/
public String serialize(SF sf) throws UnexpectedException {
return dynamicSerialize(sf);
}

private String dynamicSerialize(PatternedObject object) throws UnexpectedException {
String template = patternToStringTemplate(PatternHolder.getPattern(object.getClass()));
try {
Class<?> type = object.getClass();
for (Field field : type.getFields()) {
boolean isAccessible = field.canAccess(object);
if (!isAccessible) field.setAccessible(true);
template = template.replace(field.getName(), serializeAttribute(object, field));
if (!isAccessible) field.setAccessible(false);
}
return template;
} catch (IllegalAccessException e) {
e.printStackTrace();
return null;
}
Class<?> type = object.getClass();
for (Field field : type.getFields())
template = template.replace(field.getName(), serializeAttribute(object, field));
return template;
}

private String serializeAttribute(Object object, Field field) throws UnexpectedException, IllegalAccessException {
/**
* Serialize the specified field of a given patterned object
*
* @param patternedObject The object being serialized
* @param field The field to serialize
* @return The serialized field
* @throws UnexpectedException If the type of the field is not supported by the serializer (can happen if you are using custom patterned objects)
*/
private String serializeAttribute(PatternedObject patternedObject, Field field) throws UnexpectedException {
Class<?> type = field.getType();
if (type == String.class) return String.valueOf(field.get(object));
if (type == Integer.class) return String.valueOf(field.get(object));
if (type == LocalTime.class) return ((LocalTime) field.get(object)).format(format.timestampsFormat());
if (type == List.class) return serializeList(object, field);
throw new UnexpectedException(String.format("Type %s is not handled by Serializer", type));
Object fieldObject;
try {
boolean isAccessible = field.canAccess(patternedObject);
if (!isAccessible) field.setAccessible(true);
fieldObject = field.get(patternedObject);
if (!isAccessible) field.setAccessible(false);
} catch (IllegalAccessException e) {
throw new UnexpectedException("reflect library failed to make field accessible", e);
}
if (type == String.class) return String.valueOf(fieldObject);
if (type == Integer.class) return String.valueOf(fieldObject);
if (type == LocalTime.class) return ((LocalTime) fieldObject).format(format.timestampsFormat());
if (type == List.class) return serializeList(fieldObject);
throw new UnexpectedException(String.format("Type %s is not handled by serializer when serializing class %s", type, fieldObject.getClass()));
}

private String serializeList(Object object, Field field) throws UnexpectedException, IllegalAccessException {
/**
* Cast the provided object into a list and serialize its elements (for now only list of PatternedObject are supported)
*
* @param object The object to serialize as a list
* @return The serialized object
* @throws UnexpectedException If the type of the list elements is not PatternedObject
*/
private String serializeList(Object object) throws UnexpectedException {
StringBuilder content = new StringBuilder();
List<PatternedObject> list = (List<PatternedObject>) field.get(object);
//TODO include primitive objects (non-patterned)
for (PatternedObject o : list) content.append(dynamicSerialize(o));
List<?> list = (List<?>) object;
for (Object o : list) {
if (o instanceof PatternedObject)
content.append(dynamicSerialize((PatternedObject) o));
else
//TODO include primitive objects (non-patterned)
throw new UnexpectedException(String.format("Unexpected type %s found when serializing list for format %s", object.getClass(), format));
}
return content.toString();
}

// method to transform regex Pattern into corresponding String template, with placeholders to be replaced by actual values
/**
* Reverse engineer a regex Pattern into its corresponding String template, with placeholders to be replaced by actual values
*
* @param pattern The pattern to convert into a template
* @return The template
*/
// TODO we should actually use something similar as the pattern holder, to not reprocess it everytime
public static String patternToStringTemplate(Pattern pattern) {
String regex = "(\\(\\?<(?<groupName>.*?)>.*?\\))";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
import io.github.mdaubie.subtitlesparser.model.Format;
import io.github.mdaubie.subtitlesparser.model.SubRipFile;

/**
* A collection of the pre-built formats handled by the library
*/
public final class SUB_FILE_FORMATS {
/**
* Format for SubRip files
*/
public static final Format<SubRipFile> SUB_RIP_FORMAT = new Format<>("SubRip", "srt", SubRipFile.class, TIMESTAMP_FORMATS.ISO_8601_COMA.value);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

import java.time.format.DateTimeFormatter;

/**
* An object describing a subtitles format and providing the rules for it to be parsed and serialized within the library
* @param <SF> The base class of the format
* @see io.github.mdaubie.subtitlesparser.constants.SUB_FILE_FORMATS
*/
public record Format<SF extends SubtitlesFile>(String name, String extension,
Class<SF> baseClass,
DateTimeFormatter timestampsFormat) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

import java.util.regex.Pattern;

/**
* Base class of the model, any component of a subtitles format should extend this class
*/
public abstract class PatternedObject {
/**
* @return The pattern with which to parse or serialize the component
*/
public abstract Pattern getPattern();
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import java.util.List;
import java.util.regex.Pattern;

/**
* A class describing SubRip subtitles file
*/
public class SubRipFile extends SubtitlesFile {
public List<SubRipSubtitle> subtitles;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.util.regex.Pattern;

/**
* A class describing a SubRip subtitle
*/
public class SubRipSubtitle extends Subtitle {
@Override
public Pattern getPattern() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,24 @@

import java.time.LocalTime;

/**
* A base class to describe a subtitle component
*/
public abstract class Subtitle extends PatternedObject {
/**
* The index of the subtitle (starting from 1)
*/
public Integer number;
/**
* The timestamp to start displaying the subtitle
*/
public LocalTime start;
/**
* The timestamp to end displaying the subtitle
*/
public LocalTime end;
/**
* The content of the subtitle
*/
public String content;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

import java.util.List;

/**
* A base class to describe a subtitles file
* @see Format
*/
public abstract class SubtitlesFile extends PatternedObject {
/**
* @return The list of subtitles of the file
*/
public abstract List<? extends Subtitle> getSubtitles();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package io.github.mdaubie.subtitlesparser.toolbox;

import io.github.mdaubie.subtitlesparser.model.Subtitle;
import io.github.mdaubie.subtitlesparser.model.SubtitlesFile;

import java.security.InvalidParameterException;

public class ShiftSubtitles {
/**
* Shifts the subtitle timestamps by a given offset
*
* @param sf The SubtitleFile object for which to shift the subtitles
* @param millisOffset The offset, from which the subtitles are shifted, can be negative
* @throws InvalidParameterException if the offset provided is negative and larger than the first timestamp (resulting in a negative timestamp)
*/
static void shift(SubtitlesFile sf, long millisOffset) {
long nanosOffset = millisOffset * 1000000;
if (millisOffset < 0 && sf.getSubtitles().get(0).start.toNanoOfDay() + nanosOffset < 0)
throw new InvalidParameterException("Too large negative offset provided");
for (int i = 0; i < sf.getSubtitles().size() - 1; i++) {
Subtitle first = sf.getSubtitles().get(i);
first.start = first.start.plusNanos(nanosOffset);
first.end = first.end.plusNanos(nanosOffset);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
class ParserTest {
@ParameterizedTest
@MethodSource("parse")
void parse(Format<? extends SubtitlesFile> format, String text, Supplier<SubtitlesFile> expectedResult) throws Exception {
<SF extends SubtitlesFile> void parse(Format<SF> format, String text, Supplier<SF> expectedResult) throws Exception {
SubtitlesFile actualResult = new Parser<>(format).parse(text);
assertThat(actualResult).usingRecursiveComparison().isEqualTo(expectedResult.get());
}
Expand Down
Loading

0 comments on commit 6407dd5

Please sign in to comment.