Skip to content

Commit

Permalink
Parse message entities, export in JSON.
Browse files Browse the repository at this point in the history
  • Loading branch information
john-preston committed Jun 21, 2018
1 parent 1e254b9 commit 1a24ba8
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 5 deletions.
76 changes: 75 additions & 1 deletion Telegram/SourceFiles/export/data/export_data_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,76 @@ Utf8String ParseString(const MTPstring &data) {
return data.v;
}

std::vector<TextPart> ParseText(
const MTPstring &data,
const QVector<MTPMessageEntity> &entities) {
using Type = TextPart::Type;
const auto text = QString::fromUtf8(data.v);
const auto size = data.v.size();
const auto mid = [&](int offset, int length) {
return text.mid(offset, length).toUtf8();
};
auto result = std::vector<TextPart>();
auto offset = 0;
auto addTextPart = [&](int till) {
if (till > offset) {
auto part = TextPart();
part.text = mid(offset, till - offset);
result.push_back(std::move(part));
offset = till;
}
};
for (const auto &entity : entities) {
const auto start = entity.match([](const auto &data) {
return data.voffset.v;
});
const auto length = entity.match([](const auto &data) {
return data.vlength.v;
});

if (start < offset || length <= 0 || start + length > size) {
continue;
}

addTextPart(start);

auto part = TextPart();
part.type = entity.match(
[](const MTPDmessageEntityUnknown&) { return Type::Unknown; },
[](const MTPDmessageEntityMention&) { return Type::Mention; },
[](const MTPDmessageEntityHashtag&) { return Type::Hashtag; },
[](const MTPDmessageEntityBotCommand&) {
return Type::BotCommand; },
[](const MTPDmessageEntityUrl&) { return Type::Url; },
[](const MTPDmessageEntityEmail&) { return Type::Email; },
[](const MTPDmessageEntityBold&) { return Type::Bold; },
[](const MTPDmessageEntityItalic&) { return Type::Italic; },
[](const MTPDmessageEntityCode&) { return Type::Code; },
[](const MTPDmessageEntityPre&) { return Type::Pre; },
[](const MTPDmessageEntityTextUrl&) { return Type::TextUrl; },
[](const MTPDmessageEntityMentionName&) {
return Type::MentionName; },
[](const MTPDinputMessageEntityMentionName&) {
return Type::MentionName; },
[](const MTPDmessageEntityPhone&) { return Type::Phone; },
[](const MTPDmessageEntityCashtag&) { return Type::Cashtag; });
part.text = mid(start, length);
part.additional = entity.match(
[](const MTPDmessageEntityPre &data) {
return ParseString(data.vlanguage);
}, [](const MTPDmessageEntityTextUrl &data) {
return ParseString(data.vurl);
}, [](const MTPDmessageEntityMentionName &data) {
return NumberToString(data.vuser_id.v);
}, [](const auto &) { return Utf8String(); });

result.push_back(std::move(part));
offset = start + length;
}
addTextPart(size);
return result;
}

Utf8String FillLeft(const Utf8String &data, int length, char filler) {
if (length <= data.size()) {
return data;
Expand Down Expand Up @@ -812,7 +882,11 @@ Message ParseMessage(
mediaFolder);
context.botId = 0;
}
result.text = ParseString(data.vmessage);
result.text = ParseText(
data.vmessage,
(data.has_entities()
? data.ventities.v
: QVector<MTPMessageEntity>{}));
}, [&](const MTPDmessageService &data) {
result.id = data.vid.v;
const auto peerId = ParsePeerId(data.vto_id);
Expand Down
25 changes: 24 additions & 1 deletion Telegram/SourceFiles/export/data/export_data_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,29 @@ ServiceAction ParseServiceAction(
const MTPMessageAction &data,
const QString &mediaFolder);

struct TextPart {
enum class Type {
Text,
Unknown,
Mention,
Hashtag,
BotCommand,
Url,
Email,
Bold,
Italic,
Code,
Pre,
TextUrl,
MentionName,
Phone,
Cashtag,
};
Type type = Type::Text;
Utf8String text;
Utf8String additional;
};

struct Message {
int32 id = 0;
int32 chatId = 0;
Expand All @@ -425,7 +448,7 @@ struct Message {
Utf8String signature;
int32 viaBotId = 0;
int32 replyToMsgId = 0;
Utf8String text;
std::vector<TextPart> text;
Media media;
ServiceAction action;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ struct Result;
class Stats;

enum class Format {
Json,
Text,
Yaml,
Html,
Json,
};

class AbstractWriter {
Expand Down
60 changes: 59 additions & 1 deletion Telegram/SourceFiles/export/output/export_output_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,64 @@ QByteArray SerializeArray(
return result;
}

QByteArray SerializeText(
Context &context,
const std::vector<Data::TextPart> &data) {
using Type = Data::TextPart::Type;

if (data.empty()) {
return SerializeString("");
}
const auto text = ranges::view::all(
data
) | ranges::view::transform([&](const Data::TextPart &part) {
if (part.type == Type::Text) {
return SerializeString(part.text);
}
const auto typeString = [&] {
switch (part.type) {
case Type::Unknown: return "unknown";
case Type::Mention: return "mention";
case Type::Hashtag: return "hashtag";
case Type::BotCommand: return "bot_command";
case Type::Url: return "link";
case Type::Email: return "email";
case Type::Bold: return "bold";
case Type::Italic: return "italic";
case Type::Code: return "code";
case Type::Pre: return "pre";
case Type::TextUrl: return "text_link";
case Type::MentionName: return "mention_name";
case Type::Phone: return "phone";
case Type::Cashtag: return "cashtag";
}
Unexpected("Type in SerializeText.");
}();
const auto additionalName = (part.type == Type::MentionName)
? "user_id"
: (part.type == Type::Pre)
? "language"
: (part.type == Type::TextUrl)
? "href"
: "none";
const auto additionalValue = (part.type == Type::MentionName)
? part.additional
: (part.type == Type::Pre || part.type == Type::TextUrl)
? SerializeString(part.additional)
: QByteArray();
return SerializeObject(context, {
{ "type", SerializeString(typeString) },
{ "text", SerializeString(part.text) },
{ additionalName, additionalValue },
});
}) | ranges::to_vector;

if (data.size() == 1 && data[0].type == Data::TextPart::Type::Text) {
return text[0];
}
return SerializeArray(context, text);
}

Data::Utf8String FormatUsername(const Data::Utf8String &username) {
return username.isEmpty() ? username : ('@' + username);
}
Expand Down Expand Up @@ -493,7 +551,7 @@ QByteArray SerializeMessage(
Unexpected("Unsupported message.");
}, [](const base::none_type &) {});

push("text", message.text);
pushBare("text", SerializeText(context, message.text));

return serialized();
}
Expand Down
7 changes: 6 additions & 1 deletion Telegram/SourceFiles/export/output/export_output_text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,12 @@ QByteArray SerializeMessage(
Unexpected("Unsupported message.");
}, [](const base::none_type &) {});

push("Text", message.text);
auto value = JoinList(QByteArray(), ranges::view::all(
message.text
) | ranges::view::transform([](const Data::TextPart &part) {
return part.text;
}) | ranges::to_vector);
push("Text", value);

return SerializeKeyValue(std::move(values));
}
Expand Down

0 comments on commit 1a24ba8

Please sign in to comment.