Skip to content

Commit

Permalink
Add basic encoding support and String byte operations
Browse files Browse the repository at this point in the history
  • Loading branch information
meh committed Oct 23, 2013
1 parent 77b6f82 commit 423af6c
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 1 deletion.
123 changes: 123 additions & 0 deletions corelib/encoding.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
class Encoding
def self.register(name, options = {}, &block)
names = [name] + (options[:aliases] || [])
encoding = Class.new(self, &block).
new(name, names, options[:ascii] || false, options[:dummy] || false)

names.each {|name|
const_set name.sub('-', '_'), encoding
}
end

def self.find(name)
return name if self === name

constants.each {|const|
encoding = const_get(const)

if encoding.name == name || encoding.names.include?(name)
return encoding
end
}

raise ArgumentError, "unknown encoding name - #{name}"
end

attr_reader :name, :names

def initialize(name, names, ascii, dummy)
@name = name
@names = names
@ascii = ascii
@dummy = dummy
end

def ascii_compatible?
@ascii
end

def dummy?
@dummy
end

def to_s
@name
end

def inspect
"#<Encoding:#{@name}#{" (dummy)" if @dummy}>"
end

# methods to implement per encoding
def each_byte(*)
raise NotImplementedError
end

def getbyte(*)
raise NotImplementedError
end

def bytesize(*)
raise NotImplementedError
end
end

Encoding.register "UTF-8", aliases: ["CP65001"], ascii: true do
def each_byte(string, &block)
%x{
for (var i = 0, length = string.length; i < length; i++) {
var code = string.charCodeAt(i);
if (code <= 0x7f) {
#{yield `code`};
}
else {
var encoded = encodeURIComponent(string.charAt(i)).substr(1).split('%');
for (var j = 0, encoded_length = encoded.length; j < encoded_length; j++) {
#{yield `parseInt(encoded[j], 16)`};
}
}
}
}
end

def bytesize
bytes.length
end
end

Encoding.register "UTF-16LE" do
def each_byte(string, &block)
%x{
for (var i = 0, length = string.length; i < length; i++) {
var code = string.charCodeAt(i);
#{yield `code & 0xff`};
#{yield `code >> 8`};
}
}
end

def bytesize
bytes.length
end
end

Encoding.register "ASCII-8BIT", aliases: ["BINARY"], ascii: true do
def each_byte(string, &block)
%x{
for (var i = 0, length = string.length; i < length; i++) {
#{yield `string.charCodeAt(i) & 0xff`};
}
}
end

def bytesize
bytes.length
end
end

class String
`def.encoding = #{Encoding::UTF_16LE}`
end
1 change: 1 addition & 0 deletions corelib/opal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
require 'hash'
require 'string'
require 'match_data'
require 'encoding'
require 'numeric'
require 'proc'
require 'range'
Expand Down
35 changes: 34 additions & 1 deletion corelib/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,14 @@ def [](index, length = undefined)
}
end

def bytes
each_byte.to_a
end

def bytesize
@encoding.bytesize(self)
end

def capitalize
`self.charAt(0).toUpperCase() + self.substr(1).toLowerCase()`
end
Expand Down Expand Up @@ -227,6 +235,14 @@ def downcase
`self.toLowerCase()`
end

def each_byte(&block)
return enum_for :each_byte unless block_given?

@encoding.each_byte(self, &block)

self
end

def each_char(&block)
return enum_for :each_char unless block_given?

Expand Down Expand Up @@ -264,6 +280,10 @@ def empty?
`self.length === 0`
end

def encoding
@encoding
end

def end_with?(*suffixes)
%x{
for (var i = 0, length = suffixes.length; i < length; i++) {
Expand All @@ -281,8 +301,21 @@ def end_with?(*suffixes)
alias eql? ==
alias equal? ===

def force_encoding(encoding)
encoding = Encoding.find(encoding)

return self if encoding == @encoding

%x{
var result = new native_string(self);
result.encoding = encoding;
return result;
}
end

def getbyte(idx)
`#{self}.charCodeAt(idx)`
@encoding.getbyte(self, idx)
end

def gsub(pattern, replace = undefined, &block)
Expand Down

0 comments on commit 423af6c

Please sign in to comment.