Skip to content

Commit

Permalink
Fix an issue where the charset in the link preview of some pages was …
Browse files Browse the repository at this point in the history
…not identified correctly.
  • Loading branch information
razieli authored and clark-signal committed Jul 12, 2023
1 parent 5ca0255 commit 23ef8c7
Showing 1 changed file with 22 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
package org.thoughtcrime.securesms.util;

import androidx.annotation.NonNull;
import androidx.core.text.HtmlCompat;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import okhttp3.MediaType;
import okhttp3.ResponseBody;

public final class OkHttpUtil {

private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=[\"']?([a-zA-Z0-9\\\\-]+)[\"']?");

private OkHttpUtil() {}

public static byte[] readAsBytes(@NonNull InputStream bodyStream, long sizeLimit) throws IOException {
Expand Down Expand Up @@ -41,8 +46,24 @@ public static String readAsString(@NonNull ResponseBody body, long sizeLimit) th

byte[] data = readAsBytes(body.byteStream(), sizeLimit);
MediaType contentType = body.contentType();
Charset charset = contentType != null ? contentType.charset(StandardCharsets.UTF_8) : StandardCharsets.UTF_8;
Charset charset = contentType != null ? contentType.charset(null) : null;

charset = charset == null ? getHtmlCharset(new String(data)) : charset;

return new String(data, Objects.requireNonNull(charset));
}

private static @NonNull Charset getHtmlCharset(String html) {
Matcher charsetMatcher = CHARSET_PATTERN.matcher(html);
if (charsetMatcher.find() && charsetMatcher.groupCount() > 0) {
try {
return Objects.requireNonNull(Charset.forName(fromDoubleEncoded(charsetMatcher.group(1))));
} catch (Exception ignored) {}
}
return StandardCharsets.UTF_8;
}

private static @NonNull String fromDoubleEncoded(@NonNull String html) {
return HtmlCompat.fromHtml(HtmlCompat.fromHtml(html, 0).toString(), 0).toString();
}
}

0 comments on commit 23ef8c7

Please sign in to comment.