Skip to content

Commit

Permalink
fixed UTF-8 print in windows
Browse files Browse the repository at this point in the history
  • Loading branch information
qinwf committed Nov 6, 2014
1 parent ddd75e6 commit 295de8b
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 46 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -9,3 +9,4 @@ packrat/lib*/
.Rprofile
*src-i386/
*src-x64/
*.txt
12 changes: 8 additions & 4 deletions R/keywords.R
Expand Up @@ -34,7 +34,10 @@ keywords <- function(code, jiebar) {
}

keyl <- function(code, jiebar, encoding) {

if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}
input.r <- file(code, open = "r")
OUT <- FALSE
tryCatch({
Expand All @@ -52,6 +55,9 @@ keyl <- function(code, jiebar, encoding) {
return(out.lines)
}, finally = {
try(close(input.r), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})
}

Expand All @@ -66,8 +72,6 @@ keyw <- function(code, jiebar) {
result <- jiebar$worker$tag(code)

if (.Platform$OS.type == "windows") {

return(iconv(result, "UTF-8", "GBK"))
}
Encoding(result)<-"UTF-8"}
return(result)
}
12 changes: 8 additions & 4 deletions R/segment.R
Expand Up @@ -59,13 +59,14 @@ segment <- function(code, jiebar) {
}

cutl <- function(code, jiebar, symbol, lines, output, encoding, write_file,FILESMODE) {
if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}
nlines <- lines
input.r <- file(code, open = "r")
if(write_file==T){
if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}

if (.Platform$OS.type == "windows") {
output.w <- file(output, open = "ab", encoding = "UTF-8")
} else {
Expand Down Expand Up @@ -127,6 +128,9 @@ cutl <- function(code, jiebar, symbol, lines, output, encoding, write_file,FILES
}
, finally = {
try(close(input.r), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})
return(result)
}
Expand Down
24 changes: 17 additions & 7 deletions R/simhash.R
Expand Up @@ -36,7 +36,10 @@ simhash <- function(code, jiebar) {
}

simhashl <- function(code, jiebar, encoding) {

if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}
input.r <- file(code, open = "r")
OUT <- FALSE
tryCatch({
Expand All @@ -53,6 +56,9 @@ simhashl <- function(code, jiebar, encoding) {
return(out.lines)
}, finally = {
try(close(input.r), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})
}

Expand All @@ -67,9 +73,7 @@ simhashw <- function(code, jiebar) {
result <- jiebar$worker$simhash(code,jiebar$topn)

if (.Platform$OS.type == "windows") {
result$keyword<-iconv(result$keyword, "UTF-8", "GBK")

return(result)
Encoding(result$keyword)<-"UTF-8"
}
return(result)
}
Expand Down Expand Up @@ -111,15 +115,18 @@ distance<-function(codel,coder,jiebar){
}
result<-jiebar$worker$distance(codel,coder,jiebar$topn)
if (.Platform$OS.type == "windows") {
result$lhs<-iconv(result$lhs, "UTF-8", "GBK")
result$rhs<-iconv(result$rhs, "UTF-8", "GBK")
Encoding(result$rhs)<-"UTF-8"
Encoding(result$lhs)<-"UTF-8"
}
result
}


distancel <- function(code, jiebar, encoding) {

if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}
input.r <- file(code, open = "r")
OUT <- FALSE
tryCatch({
Expand All @@ -140,5 +147,8 @@ distancel <- function(code, jiebar, encoding) {
return(tmp.lines)
}, finally = {
try(close(input.r), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})
}
16 changes: 12 additions & 4 deletions R/tagger.R
Expand Up @@ -42,6 +42,10 @@ tag<- function(code, jiebar) {
}

tagl <- function(code, jiebar, symbol, lines, output, encoding, write_file,FILESMODE) {
if (.Platform$OS.type == "windows"){
old.locale <- Sys.getlocale("LC_CTYPE")
Sys.setlocale(category = "LC_CTYPE", locale = "chs")
}
nlines <- lines
input.r <- file(code, open = "r")
if(write_file==T){
Expand Down Expand Up @@ -76,6 +80,9 @@ tagl <- function(code, jiebar, symbol, lines, output, encoding, write_file,FILES
, finally = {
try(close(input.r), silent = TRUE)
try(close(output.w), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})
OUT <- TRUE
cat(paste("Output file: ", output, "\n"))
Expand Down Expand Up @@ -103,6 +110,9 @@ tagl <- function(code, jiebar, symbol, lines, output, encoding, write_file,FILES
}
, finally = {
try(close(input.r), silent = TRUE)
if (.Platform$OS.type == "windows"){
Sys.setlocale(category = "LC_CTYPE", locale = old.locale)
}
})

return(result)
Expand All @@ -126,9 +136,7 @@ tagw <- function(code, jiebar, symbol, FILESMODE) {
if (symbol == F && FILESMODE ==F) {
result <- grep("[^[:space:]]", result, value = T)
}
if (.Platform$OS.type == "windows" && FILESMODE == F) {

return(iconv(result, "UTF-8", "GBK"))
}
if (.Platform$OS.type == "windows") {
Encoding(result)<-"UTF-8"}
return(result)
}
27 changes: 0 additions & 27 deletions RR.txt

This file was deleted.

0 comments on commit 295de8b

Please sign in to comment.