Skip to content

Commit

Permalink
Don't use UTF8 conversion for non-UTF8 encoded files
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Parsons committed Jun 5, 2015
1 parent 3e96dfe commit 1344e20
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 2 deletions.
46 changes: 44 additions & 2 deletions SRT to VTT Converter/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <codecvt>
#include <dirent.h>
#include <iostream>
#include "Utils.h"
#include "text_encoding_detect.h"

Expand All @@ -31,6 +32,7 @@ void Utils::openFile(const string& filepath, wifstream& stream)
fin.close();

AutoIt::TextEncodingDetect encodingDetector;
dumpEncodingType(buffer, bytes);
switch (encodingDetector.DetectEncoding(buffer, bytes)) {
// UTF-16
case AutoIt::TextEncodingDetect::UTF16_LE_BOM:
Expand All @@ -45,14 +47,54 @@ void Utils::openFile(const string& filepath, wifstream& stream)
throw runtime_error("Converting UTF-16 encoded files is not supported on your platform.");
#endif

// ASCII, ANSI, UTF-8, none (treat as UTF-8)
default:
case AutoIt::TextEncodingDetect::UTF8_BOM:
case AutoIt::TextEncodingDetect::UTF8_NOBOM:
stream.open(filepath);
stream.imbue(locale(fin.getloc(), new codecvt_utf8<wchar_t, 0x10ffff, consume_header>));
break;

// ASCII, ANSI, none
default:
stream.open(filepath);
stream.imbue(locale(fin.getloc(), new codecvt<wchar_t, char, mbstate_t>));
break;
}
}

void Utils::dumpEncodingType(const unsigned char *pBuffer, size_t size)
{
AutoIt::TextEncodingDetect encodingDetector;
switch (encodingDetector.DetectEncoding(pBuffer, size)) {
case AutoIt::TextEncodingDetect::None:
cout << " No text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::ANSI:
cout << " ANSI text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::ASCII:
cout << " ASCII text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF8_BOM:
cout << " UTF8_BOM text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF8_NOBOM:
cout << " UTF8_NOBOM text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF16_LE_BOM:
cout << " UTF16_LE_BOM text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF16_LE_NOBOM:
cout << " UTF16_LE_NOBOM text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF16_BE_BOM:
cout << " UTF16_BE_BOM text encoding detected. ";
break;
case AutoIt::TextEncodingDetect::UTF16_BE_NOBOM:
cout << " UTF16_BE_NOBOM text encoding detected. ";
break;
}
}

bool Utils::isDir(const string& path)
{
#if defined(_WIN32) || defined(WIN32)
Expand Down
6 changes: 6 additions & 0 deletions SRT to VTT Converter/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ class Utils
* @returns A reference to the input string (subject).
*/
static std::wstring& wstr_replace(std::wstring& subject, std::wstring search, std::wstring replace);

private:
/**
* Prints out the detected encoding type.
*/
static void dumpEncodingType(const unsigned char *pBuffer, size_t size);
};

#endif
Binary file modified bin/Mac-OSX/srt-vtt
Binary file not shown.

0 comments on commit 1344e20

Please sign in to comment.