Skip to content

Commit

Permalink
Merge pull request JDimproved#1277 from JDimproved/add-guess-encoding…
Browse files Browse the repository at this point in the history
…-from-text-option

add guess encoding from text option
  • Loading branch information
ma8ma committed Oct 21, 2023
2 parents 058a6eb + 42db53c commit 7e23cb3
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 16 deletions.
23 changes: 17 additions & 6 deletions src/board/preference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,25 +211,33 @@ Preferences::Preferences( Gtk::Window* parent, const std::string& url, const std
m_radio_encoding_http_header.set_group( m_group_encoding );
m_radio_encoding_http_header.set_label( "HTTPヘッダーのエンコーディング情報を使う" );

m_radio_encoding_guess.set_group( m_group_encoding );
m_radio_encoding_guess.set_label( "テキストからエンコーディングを推測する" );

m_vbox_encoding_analysis_method.set_margin_start( 30 );
m_vbox_encoding_analysis_method.set_margin_end( 30 );
m_vbox_encoding_analysis_method.set_margin_top( 10 );
m_vbox_encoding_analysis_method.set_margin_bottom( 10 );
m_vbox_encoding_analysis_method.pack_start( m_label_encoding_analysis_method, Gtk::PACK_SHRINK );
m_vbox_encoding_analysis_method.pack_start( m_radio_encoding_default, Gtk::PACK_SHRINK );
m_vbox_encoding_analysis_method.pack_start( m_radio_encoding_http_header, Gtk::PACK_SHRINK );
m_vbox_encoding_analysis_method.pack_start( m_radio_encoding_guess, Gtk::PACK_SHRINK );

m_revealer_encoding.add( m_vbox_encoding_analysis_method );

m_vbox_encoding.pack_start( m_hbox_encoding, Gtk::PACK_SHRINK );
m_vbox_encoding.pack_start( m_revealer_encoding, Gtk::PACK_SHRINK );

if( const int method = DBTREE::board_encoding_analysis_method( get_url() );
method == EncodingAnalysisMethod::http_header ) {
m_radio_encoding_http_header.set_active( true );
}
else {
m_radio_encoding_default.set_active( true );
switch( DBTREE::board_encoding_analysis_method( get_url() ) ) {
case EncodingAnalysisMethod::http_header:
m_radio_encoding_http_header.set_active( true );
break;
case EncodingAnalysisMethod::guess:
m_radio_encoding_guess.set_active( true );
break;
default:
m_radio_encoding_default.set_active( true );
break;
}
}

Expand Down Expand Up @@ -624,6 +632,9 @@ void Preferences::slot_ok_clicked()
if( m_radio_encoding_http_header.get_active() ) {
DBTREE::board_set_encoding_analysis_method( get_url(), EncodingAnalysisMethod::http_header );
}
else if( m_radio_encoding_guess.get_active() ) {
DBTREE::board_set_encoding_analysis_method( get_url(), EncodingAnalysisMethod::guess );
}
else {
DBTREE::board_set_encoding_analysis_method( get_url(), EncodingAnalysisMethod::use_default );
}
Expand Down
1 change: 1 addition & 0 deletions src/board/preference.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ namespace BOARD
Gtk::RadioButtonGroup m_group_encoding;
Gtk::RadioButton m_radio_encoding_default;
Gtk::RadioButton m_radio_encoding_http_header;
Gtk::RadioButton m_radio_encoding_guess;

// ネットワーク設定
Gtk::Box m_vbox_network;
Expand Down
3 changes: 2 additions & 1 deletion src/jdencoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ struct EncodingAnalysisMethod
{
static constexpr const int use_default = 0; ///< デフォルト設定を使う
static constexpr const int http_header = 1; ///< HTTPヘッダーのエンコーディング情報を使う
static constexpr const int max = http_header;
static constexpr const int guess = 2; ///< テキストからエンコーディングを推測する
static constexpr const int max = guess;
};

#endif
35 changes: 26 additions & 9 deletions src/skeleton/loadable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ enum class Loadable::CharsetDetection
{
parse_header, ///< HTTP header を解析する
parse_meta, ///< HTML meta 要素を解析する
finished, ///< 検出を終えた
use_default, ///< デフォルト設定を使う
finished, ///< 検出を終えた、またはデフォルト設定を使う
guess, ///< テキストからエンコーディングを推測する
};


Expand Down Expand Up @@ -113,12 +113,17 @@ bool Loadable::start_load( const JDLIB::LOADERDATA& data )

// 情報初期化
// m_date_modified, m_cookie は初期化しない
if( data.encoding_analysis_method == EncodingAnalysisMethod::http_header ) {
m_charset_det = CharsetDetection::parse_header;
}
else {
m_charset_det = CharsetDetection::use_default;
set_encoding( m_default_encoding );
switch( data.encoding_analysis_method ) {
case EncodingAnalysisMethod::http_header:
m_charset_det = CharsetDetection::parse_header;
break;
case EncodingAnalysisMethod::guess:
m_charset_det = CharsetDetection::guess;
break;
default:
m_charset_det = CharsetDetection::finished;
set_encoding( m_default_encoding );
break;
}
m_code = HTTP_INIT;
m_str_code = std::string();
Expand Down Expand Up @@ -156,7 +161,19 @@ void Loadable::receive( const char* data, size_t size )
if( ! m_total_length && m_code != HTTP_INIT ) m_total_length = get_loader_length();
m_current_length += size;

if( m_charset_det == CharsetDetection::parse_header ) {
if( m_charset_det == CharsetDetection::finished ) {
receive_data( std::string_view{ data, size } );
return;
}

if( m_charset_det == CharsetDetection::guess ) {
const Encoding enc = MISC::detect_encoding( std::string_view{ data, size } );
if( enc != Encoding::unknown ) {
set_encoding( enc );
m_charset_det = CharsetDetection::finished;
}
}
else if( m_charset_det == CharsetDetection::parse_header ) {
const Encoding enc = get_loader_content_charset();
if( enc != Encoding::unknown ) {
set_encoding( enc );
Expand Down

0 comments on commit 7e23cb3

Please sign in to comment.