diff --git a/Gemfile b/Gemfile deleted file mode 100644 index 8e3af3b..0000000 --- a/Gemfile +++ /dev/null @@ -1,4 +0,0 @@ -source "http://rubygems.org" - -# Specify the gem's dependencies in twitter-text.gemspec -gemspec diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index 381a08a..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,38 +0,0 @@ -PATH - remote: . - specs: - twitter-text (1.4.8) - activesupport - -GEM - remote: http://rubygems.org/ - specs: - activesupport (3.0.3) - diff-lcs (1.1.2) - nokogiri (1.4.4) - nokogiri (1.4.4-java) - weakling (>= 0.0.3) - rake (0.8.7) - rspec (2.3.0) - rspec-core (~> 2.3.0) - rspec-expectations (~> 2.3.0) - rspec-mocks (~> 2.3.0) - rspec-core (2.3.1) - rspec-expectations (2.3.0) - diff-lcs (~> 1.1.2) - rspec-mocks (2.3.0) - simplecov (0.3.7) - simplecov-html (>= 0.3.7) - simplecov-html (0.3.9) - weakling (0.0.4-java) - -PLATFORMS - java - ruby - -DEPENDENCIES - nokogiri - rake - rspec - simplecov - twitter-text! diff --git a/LICENSE b/LICENSE deleted file mode 100644 index ae31f94..0000000 --- a/LICENSE +++ /dev/null @@ -1,188 +0,0 @@ -Copyright 2011 Twitter, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this work except in compliance with the License. -You may obtain a copy of the License below, or at: - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. diff --git a/README.rdoc b/README.rdoc index d0885f5..e232b91 100644 --- a/README.rdoc +++ b/README.rdoc @@ -1,115 +1,4 @@ == twitter-text -A gem that provides text processing routines for Twitter Tweets. The major -reason for this is to unify the various auto-linking and extraction of -usernames, lists, hashtags and URLs. - -== Extraction Examples - - # Extraction - class MyClass - include Twitter::Extractor - usernames = extract_mentioned_screen_names("Mentioning @twitter and @jack") - # usernames = ["twitter", "jack"] - end - - # Extraction with a block argument - class MyClass - include Twitter::Extractor - extract_reply_screen_name("@twitter are you hiring?").do |username| - # username = "twitter" - end - end - -== Auto-linking Examples - - # Auto-link - class MyClass - include Twitter::Autolink - - html = auto_link("link @user, please #request") - end - - # For Ruby on Rails you want to add this to app/helpers/application_helper.rb - module ApplicationHelper - include Twitter::Autolink - end - - # Now the auto_link function is available in every view. So in index.html.erb: - <%= auto_link("link @user, please #request") %> - -=== Usernames - -Username extraction and linking matches all valid Twitter usernames but does -not verify that the username is a valid Twitter account. - -=== Lists - -Auto-link and extract list names when they are written in @user/list-name -format. - -=== Hashtags - -Auto-link and extract hashtags, where a hashtag can contain most letters or -numbers but cannot be solely numbers and cannot contain punctuation. - -=== URLs - -Auto-linking and extraction of URLs differs from the Rails default so that it -will work correctly in Tweets written in languages that do not include spaces -between words. - -=== International - -Special care has been taken to be sure that auto-linking and extraction work -in Tweets of all languages. This means that languages without spaces between -words should work equally well. - -=== Hit Highlighting - -Use to provide emphasis around the "hits" returned from the Search API, built -to work against text that has been auto-linked already. - -=== Conformance - -To run the Conformance suite, you'll need to add that project as a git submodule. From the root twitter-text-rb directory, run: - - git submodule add git@github.com:twitter/twitter-text-conformance.git test/twitter-text-conformance/ - git submodule init - git submodule update - -=== Thanks - -Thanks to everybody who has filed issues, provided feedback or contributed patches. Patches courtesy of: - -* At Twitter … - * Matt Sanford - http://github.com/mzsanford - * Raffi Krikorian - http://github.com/r - * Ben Cherry - http://github.com/bcherry - * Patrick Ewing - http://github.com/hoverbird - * Jeff Smick - http://github.com/sprsquish - * Kenneth Kufluk - https://github.com/kennethkufluk - * Keita Fujii - https://github.com/keitaf - * Yoshimasa Niwa - https://github.com/niw - -* Patches from the community … - * Jean-Philippe Bougie - http://github.com/jpbougie - * Erik Michaels-Ober - https://github.com/sferik - -* Anyone who has filed an issue. It helps. Really. - -=== Copyright and License - - Copyright 2011 Twitter, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this work except in compliance with the License. - You may obtain a copy of the License in the LICENSE file, or at: - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file +The canonical version of twitter-text-rb can now be found at +https://github.com/twitter/twitter-text-rb diff --git a/Rakefile b/Rakefile deleted file mode 100644 index 692e774..0000000 --- a/Rakefile +++ /dev/null @@ -1,62 +0,0 @@ -require 'bundler' -Bundler::GemHelper.install_tasks - -task :default => ["spec", "test:conformance"] - -require 'rspec/core/rake_task' -RSpec::Core::RakeTask.new(:spec) - -def conformance_version(dir) - require 'digest' - Dir[File.join(dir, '*')].inject(Digest::SHA1.new){|digest, file| digest.update(Digest::SHA1.file(file).hexdigest) } -end - -namespace :test do - namespace :conformance do - desc "Update conformance testing data" - task :update do - puts "Updating conformance data ... " - system("git submodule init") || raise("Failed to init submodule") - system("git submodule update") || raise("Failed to update submodule") - puts "Updating conformance data ... DONE" - end - - desc "Change conformance test data to the lastest version" - task :latest => ['conformance:update'] do - current_dir = File.dirname(__FILE__) - submodule_dir = File.join(File.dirname(__FILE__), "test", "twitter-text-conformance") - version_before = conformance_version(submodule_dir) - system("cd #{submodule_dir} && git pull origin master") || raise("Failed to pull submodule version") - system("cd #{current_dir}") - if conformance_version(submodule_dir) != version_before - system("cd #{current_dir} && git add #{submodule_dir}") || raise("Failed to add upgrade files") - system("git commit -m \"Upgraded to the latest conformance suite\" #{submodule_dir}") || raise("Failed to commit upgraded conformacne data") - puts "Upgraded conformance suite." - else - puts "No conformance suite changes." - end - end - - desc "Run conformance test suite" - task :run do - ruby '-rubygems', "test/conformance_test.rb" - end - end - - desc "Run conformance test suite" - task :conformance => ['conformance:latest', 'conformance:run'] do - end -end - -require 'rake/rdoctask' -namespace :doc do - Rake::RDocTask.new do |rd| - rd.main = "README.rdoc" - rd.rdoc_dir = 'doc' - rd.rdoc_files.include("README.rdoc", "lib/**/*.rb") - end -end - -desc "Run cruise control build" -task :cruise => [:spec, 'test:conformance'] do -end diff --git a/TODO b/TODO deleted file mode 100644 index f115a1d..0000000 --- a/TODO +++ /dev/null @@ -1,3 +0,0 @@ -TODO: - - * @mentions preceded by a dash should work. "I am great -@greatguy" diff --git a/lib/autolink.rb b/lib/autolink.rb deleted file mode 100644 index e4e4693..0000000 --- a/lib/autolink.rb +++ /dev/null @@ -1,183 +0,0 @@ -require 'set' - -module Twitter - # A module for including Tweet auto-linking in a class. The primary use of this is for helpers/views so they can auto-link - # usernames, lists, hashtags and URLs. - module Autolink extend self - # Default CSS class for auto-linked URLs - DEFAULT_URL_CLASS = "tweet-url" - # Default CSS class for auto-linked lists (along with the url class) - DEFAULT_LIST_CLASS = "list-slug" - # Default CSS class for auto-linked usernames (along with the url class) - DEFAULT_USERNAME_CLASS = "username" - # Default CSS class for auto-linked hashtags (along with the url class) - DEFAULT_HASHTAG_CLASS = "hashtag" - # Default target for auto-linked urls (nil will not add a target attribute) - DEFAULT_TARGET = nil - # HTML attribute for robot nofollow behavior (default) - HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\"" - # Options which should not be passed as HTML attributes - OPTIONS_NOT_ATTRIBUTES = [:url_class, :list_class, :username_class, :hashtag_class, - :username_url_base, :list_url_base, :hashtag_url_base, - :username_url_block, :list_url_block, :hashtag_url_block, :link_url_block, - :suppress_lists, :suppress_no_follow] - - HTML_ENTITIES = { - '&' => '&', - '>' => '>', - '<' => '<', - '"' => '"', - "'" => ''' - } - - def html_escape(text) - text && text.to_s.gsub(/[&"'><]/) do |character| - HTML_ENTITIES[character] - end - end - - # Add tags around the usernames, lists, hashtags and URLs in the provided text. The - # tags can be controlled with the following entries in the options - # hash: - # - # :url_class:: class to add to all tags - # :list_class:: class to add to list tags - # :username_class:: class to add to username tags - # :hashtag_class:: class to add to hashtag tags - # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. - # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. - # :hashtag_url_base:: the value for href attribute on hashtag links. The #hashtag (minus the #) will be appended at the end of this. - # :suppress_lists:: disable auto-linking to lists - # :suppress_no_follow:: Do not add rel="nofollow" to auto-linked items - # :target:: add target="window_name" to auto-linked items - def auto_link(text, options = {}) - auto_link_usernames_or_lists( - auto_link_urls_custom( - auto_link_hashtags(text, options), - options), - options) - end - - # Add tags around the usernames and lists in the provided text. The - # tags can be controlled with the following entries in the options - # hash: - # - # :url_class:: class to add to all tags - # :list_class:: class to add to list tags - # :username_class:: class to add to username tags - # :username_url_base:: the value for href attribute on username links. The @username (minus the @) will be appended at the end of this. - # :list_url_base:: the value for href attribute on list links. The @username/list (minus the @) will be appended at the end of this. - # :suppress_lists:: disable auto-linking to lists - # :suppress_no_follow:: Do not add rel="nofollow" to auto-linked items - # :target:: add target="window_name" to auto-linked items - def auto_link_usernames_or_lists(text, options = {}) # :yields: list_or_username - options = options.dup - options[:url_class] ||= DEFAULT_URL_CLASS - options[:list_class] ||= DEFAULT_LIST_CLASS - options[:username_class] ||= DEFAULT_USERNAME_CLASS - options[:username_url_base] ||= "http://twitter.com/" - options[:list_url_base] ||= "http://twitter.com/" - options[:target] ||= DEFAULT_TARGET - - extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow] - - Twitter::Rewriter.rewrite_usernames_or_lists(text) do |at, username, slash_listname| - name = "#{username}#{slash_listname}" - chunk = block_given? ? yield(name) : name - - if slash_listname && !options[:suppress_lists] - href = if options[:list_url_block] - options[:list_url_block].call(name.downcase) - else - "#{html_escape(options[:list_url_base])}#{html_escape(name.downcase)}" - end - %(#{at}#{html_escape(chunk)}) - else - href = if options[:username_url_block] - options[:username_url_block].call(chunk) - else - "#{html_escape(options[:username_url_base])}#{html_escape(chunk)}" - end - %(#{at}#{html_escape(chunk)}) - end - end - end - - # Add tags around the hashtags in the provided text. The - # tags can be controlled with the following entries in the options - # hash: - # - # :url_class:: class to add to all tags - # :hashtag_class:: class to add to hashtag tags - # :hashtag_url_base:: the value for href attribute. The hashtag text (minus the #) will be appended at the end of this. - # :suppress_no_follow:: Do not add rel="nofollow" to auto-linked items - # :target:: add target="window_name" to auto-linked items - def auto_link_hashtags(text, options = {}) # :yields: hashtag_text - options = options.dup - options[:url_class] ||= DEFAULT_URL_CLASS - options[:hashtag_class] ||= DEFAULT_HASHTAG_CLASS - options[:hashtag_url_base] ||= "http://twitter.com/search?q=%23" - options[:target] ||= DEFAULT_TARGET - extra_html = HTML_ATTR_NO_FOLLOW unless options[:suppress_no_follow] - - Twitter::Rewriter.rewrite_hashtags(text) do |hash, hashtag| - hashtag = yield(hashtag) if block_given? - href = if options[:hashtag_url_block] - options[:hashtag_url_block].call(hashtag) - else - "#{options[:hashtag_url_base]}#{html_escape(hashtag)}" - end - %(#{html_escape(hash)}#{html_escape(hashtag)}) - end - end - - # Add tags around the URLs in the provided text. Any - # elements in the href_options hash will be converted to HTML attributes - # and place in the tag. Unless href_options contains :suppress_no_follow - # the rel="nofollow" attribute will be added. - def auto_link_urls_custom(text, href_options = {}) - options = href_options.dup - options[:rel] = "nofollow" unless options.delete(:suppress_no_follow) - options[:class] = options.delete(:url_class) - html_attrs = html_attrs_for_options(options) - - Twitter::Rewriter.rewrite_urls(text) do |url| - href = if options[:link_url_block] - options.delete(:link_url_block).call(url) - else - html_escape(url) - end - %(#{html_escape(url)}) - end - end - - private - - BOOLEAN_ATTRIBUTES = Set.new([:disabled, :readonly, :multiple, :checked]).freeze - - def html_attrs_for_options(options) - html_attrs options.reject{|k, v| OPTIONS_NOT_ATTRIBUTES.include?(k)} - end - - def html_attrs(options) - options.inject("") do |attrs, (key, value)| - if BOOLEAN_ATTRIBUTES.include?(key) - value = value ? key : nil - end - if !value.nil? - attrs << %( #{html_escape(key)}="#{html_escape(value)}") - end - attrs - end - end - - def target_tag(options) - target_option = options[:target].to_s - if target_option.empty? - "" - else - "target=\"#{html_escape(target_option)}\"" - end - end - end -end diff --git a/lib/extractor.rb b/lib/extractor.rb deleted file mode 100644 index 6b01a0a..0000000 --- a/lib/extractor.rb +++ /dev/null @@ -1,207 +0,0 @@ -class String - # Helper function to count the character length by first converting to an - # array. This is needed because with unicode strings, the return value - # of length may be incorrect - def char_length - if respond_to? :codepoints - length - else - chars.kind_of?(Enumerable) ? chars.to_a.size : chars.size - end - end - - # Helper function to convert this string into an array of unicode characters. - def to_char_a - @to_char_a ||= if chars.kind_of?(Enumerable) - chars.to_a - else - char_array = [] - 0.upto(char_length - 1) { |i| char_array << [chars.slice(i)].pack('U') } - char_array - end - end -end - -# Helper functions to return character offsets instead of byte offsets. -class MatchData - def char_begin(n) - if string.respond_to? :codepoints - self.begin(n) - else - string[0, self.begin(n)].char_length - end - end - - def char_end(n) - if string.respond_to? :codepoints - self.end(n) - else - string[0, self.end(n)].char_length - end - end -end - -module Twitter - # A module for including Tweet parsing in a class. This module provides function for the extraction and processing - # of usernames, lists, URLs and hashtags. - module Extractor extend self - - # Extracts a list of all usernames mentioned in the Tweet text. If the - # text is nil or contains no username mentions an empty array - # will be returned. - # - # If a block is given then it will be called for each username. - def extract_mentioned_screen_names(text) # :yields: username - screen_names_only = extract_mentioned_screen_names_with_indices(text).map{|mention| mention[:screen_name] } - screen_names_only.each{|mention| yield mention } if block_given? - screen_names_only - end - - # Extracts a list of all usernames mentioned in the Tweet text - # along with the indices for where the mention ocurred. If the - # text is nil or contains no username mentions, an empty array - # will be returned. - # - # If a block is given, then it will be called with each username, the start - # index, and the end index in the text. - def extract_mentioned_screen_names_with_indices(text) # :yields: username, start, end - return [] unless text - - possible_screen_names = [] - text.to_s.scan(Twitter::Regex[:extract_mentions]) do |before, sn, after| - extract_mentions_match_data = $~ - unless after =~ Twitter::Regex[:end_screen_name_match] - start_position = extract_mentions_match_data.char_begin(2) - 1 - end_position = extract_mentions_match_data.char_end(2) - possible_screen_names << { - :screen_name => sn, - :indices => [start_position, end_position] - } - end - end - if block_given? - possible_screen_names.each do |mention| - yield mention[:screen_name], mention[:indices].first, mention[:indices].last - end - end - possible_screen_names - end - - # Extracts a list of all usernames or lists mentioned in the Tweet text - # along with the indices for where the mention ocurred. If the - # text is nil or contains no username or list mentions, an empty array - # will be returned. - # - # If a block is given, then it will be called with each username, list slug, the start - # index, and the end index in the text. The list_slug will be an empty stirng - # if this is a username mention. - def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end - return [] unless text - - possible_entries = [] - text.to_s.scan(Twitter::Regex[:extract_mentions_or_lists]) do |before, sn, list_slug, after| - extract_mentions_match_data = $~ - unless after =~ Twitter::Regex[:end_screen_name_match] - start_position = extract_mentions_match_data.char_begin(2) - 1 - end_position = extract_mentions_match_data.char_end(list_slug.nil? ? 2 : 3) - possible_entries << { - :screen_name => sn, - :list_slug => list_slug || "", - :indices => [start_position, end_position] - } - end - end - - if block_given? - possible_entries.each do |mention| - yield mention[:screen_name], mention[:list_slug], mention[:indices].first, mention[:indices].last - end - end - - possible_entries - end - - # Extracts the username username replied to in the Tweet text. If the - # text is nil or is not a reply nil will be returned. - # - # If a block is given then it will be called with the username replied to (if any) - def extract_reply_screen_name(text) # :yields: username - return nil unless text - - possible_screen_name = text.match(Twitter::Regex[:extract_reply]) - return unless possible_screen_name.respond_to?(:captures) - screen_name = possible_screen_name.captures.first - yield screen_name if block_given? - screen_name - end - - # Extracts a list of all URLs included in the Tweet text. If the - # text is nil or contains no URLs an empty array - # will be returned. - # - # If a block is given then it will be called for each URL. - def extract_urls(text) # :yields: url - urls_only = extract_urls_with_indices(text).map{|url| url[:url] } - urls_only.each{|url| yield url } if block_given? - urls_only - end - - # Extracts a list of all URLs included in the Tweet text along - # with the indices. If the text is nil or contains no - # URLs an empty array will be returned. - # - # If a block is given then it will be called for each URL. - def extract_urls_with_indices(text) # :yields: url, start, end - return [] unless text - urls = [] - position = 0 - text.to_s.scan(Twitter::Regex[:valid_url]) do |all, before, url, protocol, domain, path, query| - valid_url_match_data = $~ - if protocol && !protocol.empty? - start_position = valid_url_match_data.char_begin(3) - end_position = valid_url_match_data.char_end(3) - urls << { - :url => url, - :indices => [start_position, end_position] - } - end - end - urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last } if block_given? - urls - end - - # Extracts a list of all hashtags included in the Tweet text. If the - # text is nil or contains no hashtags an empty array - # will be returned. The array returned will not include the leading # - # character. - # - # If a block is given then it will be called for each hashtag. - def extract_hashtags(text) # :yields: hashtag_text - hashtags_only = extract_hashtags_with_indices(text).map{|hash| hash[:hashtag] } - hashtags_only.each{|hash| yield hash } if block_given? - hashtags_only - end - - # Extracts a list of all hashtags included in the Tweet text. If the - # text is nil or contains no hashtags an empty array - # will be returned. The array returned will not include the leading # - # character. - # - # If a block is given then it will be called for each hashtag. - def extract_hashtags_with_indices(text) # :yields: hashtag_text, start, end - return [] unless text - - tags = [] - text.scan(Twitter::Regex[:auto_link_hashtags]) do |before, hash, hash_text| - start_position = $~.char_begin(2) - end_position = $~.char_end(3) - tags << { - :hashtag => hash_text, - :indices => [start_position, end_position] - } - end - tags.each{|tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given? - tags - end - end -end diff --git a/lib/hithighlighter.rb b/lib/hithighlighter.rb deleted file mode 100644 index eb6e67e..0000000 --- a/lib/hithighlighter.rb +++ /dev/null @@ -1,88 +0,0 @@ -module Twitter - # Module for doing "hit highlighting" on tweets that have been auto-linked already. - # Useful with the results returned from the Search API. - module HitHighlighter extend self - # Default Tag used for hit highlighting - DEFAULT_HIGHLIGHT_TAG = "em" - - # Add tags around the hits provided in the text. The - # hits should be an array of (start, end) index pairs, relative to the original - # text, before auto-linking (but the text may already be auto-linked if desired) - # - # The tags can be overridden using the :tag option. For example: - # - # irb> hit_highlight("test hit here", [[5, 8]], :tag => 'strong') - # => "test hit here" - def hit_highlight(text, hits = [], options = {}) - if hits.empty? - return text - end - - tag_name = options[:tag] || DEFAULT_HIGHLIGHT_TAG - tags = ["<" + tag_name + ">", ""] - - chunks = text.split(/[<>]/) - - result = "" - chunk_index, chunk = 0, chunks[0] - chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk - prev_chunks_len = 0 - chunk_cursor = 0 - start_in_chunk = false - for hit, index in hits.flatten.each_with_index do - tag = tags[index % 2] - - placed = false - until chunk.nil? || hit < prev_chunks_len + chunk.length do - result << chunk_chars[chunk_cursor..-1] - if start_in_chunk && hit == prev_chunks_len + chunk_chars.length - result << tag - placed = true - end - - # correctly handle highlights that end on the final character. - if tag_text = chunks[chunk_index+1] - result << "<#{tag_text}>" - end - - prev_chunks_len += chunk_chars.length - chunk_cursor = 0 - chunk_index += 2 - chunk = chunks[chunk_index] - chunk_chars = chunk.respond_to?("mb_chars") ? chunk.mb_chars : chunk.respond_to?("chars") && chunk.chars.respond_to?("[]") ? chunk.chars : chunk - start_in_chunk = false - end - - if !placed && !chunk.nil? - hit_spot = hit - prev_chunks_len - result << chunk_chars[chunk_cursor...hit_spot].to_s + tag - chunk_cursor = hit_spot - if index % 2 == 0 - start_in_chunk = true - else - start_in_chunk = false - end - placed = true - end - - # ultimate fallback, hits that run off the end get a closing tag - if !placed - result << tag - end - end - - if chunk - if chunk_cursor < chunk_chars.length - result << chunk_chars[chunk_cursor..-1] - end - (chunk_index+1).upto(chunks.length-1).each do |index| - result << (index.even? ? chunks[index] : "<#{chunks[index]}>") - end - end - - result - rescue - text - end - end -end diff --git a/lib/regex.rb b/lib/regex.rb deleted file mode 100644 index 6a59bad..0000000 --- a/lib/regex.rb +++ /dev/null @@ -1,261 +0,0 @@ -# encoding: utf-8 -module Twitter - # A collection of regular expressions for parsing Tweet text. The regular expression - # list is frozen at load time to ensure immutability. These reular expressions are - # used throughout the Twitter classes. Special care has been taken to make - # sure these reular expressions work with Tweets in all languages. - class Regex - REGEXEN = {} # :nodoc: - - def self.regex_range(from, to = nil) # :nodoc: - if $RUBY_1_9 - if to - "\\u{#{from.to_s(16).rjust(4, '0')}}-\\u{#{to.to_s(16).rjust(4, '0')}}" - else - "\\u{#{from.to_s(16).rjust(4, '0')}}" - end - else - if to - [from].pack('U') + '-' + [to].pack('U') - else - [from].pack('U') - end - end - end - - # Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand - # to access both the list of characters and a pattern suitible for use with String#split - # Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE - UNICODE_SPACES = [ - (0x0009..0x000D).to_a, # White_Space # Cc [5] .. - 0x0020, # White_Space # Zs SPACE - 0x0085, # White_Space # Cc - 0x00A0, # White_Space # Zs NO-BREAK SPACE - 0x1680, # White_Space # Zs OGHAM SPACE MARK - 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR - (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE - 0x2028, # White_Space # Zl LINE SEPARATOR - 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR - 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE - 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE - 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE - ].flatten.freeze - SPACE_CHAR_CLASS_VALUE = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('')) - REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|')) - - REGEXEN[:at_signs] = /[@@]/ - REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o - REGEXEN[:extract_mentions_or_lists] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?(?=(.|$))/o - REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o - - major, minor, patch = RUBY_VERSION.split('.') - if major.to_i >= 2 || major.to_i == 1 && minor.to_i >= 9 || (defined?(RUBY_ENGINE) && ["jruby", "rbx"].include?(RUBY_ENGINE)) - REGEXEN[:list_name] = /[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}/ - else - # This line barfs at compile time in Ruby 1.9, JRuby, or Rubinius. - REGEXEN[:list_name] = eval("/[a-zA-Z][a-zA-Z0-9_\\-\x80-\xff]{0,24}/") - end - - # Latin accented characters - # Excludes 0xd7 from the range (the multiplication sign, confusable with "x"). - # Also excludes 0xf7, the division sign - LATIN_ACCENTS = [ - regex_range(0xc0, 0xd6), - regex_range(0xd8, 0xf6), - regex_range(0xf8, 0xff), - regex_range(0x015f) - ].join('').freeze - - NON_LATIN_HASHTAG_CHARS = [ - # Cyrillic (Russian, Ukrainian, etc.) - regex_range(0x0400, 0x04ff), # Cyrillic - regex_range(0x0500, 0x0527), # Cyrillic Supplement - regex_range(0x2de0, 0x2dff), # Cyrillic Extended A - regex_range(0xa640, 0xa69f), # Cyrillic Extended B - # Hangul (Korean) - regex_range(0x1100, 0x11ff), # Hangul Jamo - regex_range(0x3130, 0x3185), # Hangul Compatibility Jamo - regex_range(0xA960, 0xA97F), # Hangul Jamo Extended-A - regex_range(0xAC00, 0xD7AF), # Hangul Syllables - regex_range(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B - regex_range(0xFFA1, 0xFFDC) # Half-width Hangul - ].join('').freeze - REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o - - REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o - - CJ_HASHTAG_CHARACTERS = [ - regex_range(0x30A1, 0x30FA), regex_range(0x30FC, 0x30FE), # Katakana (full-width) - regex_range(0xFF66, 0xFF9F), # Katakana (half-width) - regex_range(0xFF10, 0xFF19), regex_range(0xFF21, 0xFF3A), regex_range(0xFF41, 0xFF5A), # Latin (full-width) - regex_range(0x3041, 0x3096), regex_range(0x3099, 0x309E), # Hiragana - regex_range(0x3400, 0x4DBF), # Kanji (CJK Extension A) - regex_range(0x4E00, 0x9FFF), # Kanji (Unified) - regex_range(0x20000, 0x2A6DF), # Kanji (CJK Extension B) - regex_range(0x2A700, 0x2B73F), # Kanji (CJK Extension C) - regex_range(0x2B740, 0x2B81F), # Kanji (CJK Extension D) - regex_range(0x2F800, 0x2FA1F), regex_range(0x3005), regex_range(0x303B) # Kanji (CJK supplement) - ].join('').freeze - - HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/ - - # A hashtag must contain latin characters, numbers and underscores, but not all numbers. - HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io - HASHTAG_ALPHANUMERIC = /[a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io - - HASHTAG = /(#{HASHTAG_BOUNDARY})(#|#)(#{HASHTAG_ALPHANUMERIC}*#{HASHTAG_ALPHA}#{HASHTAG_ALPHANUMERIC}*)(?=#{HASHTAG_BOUNDARY})/io - - REGEXEN[:auto_link_hashtags] = /#{HASHTAG}/io - - REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^|RT:?)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?/o - REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/ - - # URL related hash regex collection - REGEXEN[:valid_preceding_chars] = /(?:[^-\/"':!=A-Z0-9_@@]|^|\:)/i - - DOMAIN_EXCLUDE_PART = "[:punct:][:space:][:blank:]#{[0x00A0].pack('U')}" - REGEXEN[:valid_subdomain] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[_-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]\./ - REGEXEN[:valid_domain_name] = /(?:[^#{DOMAIN_EXCLUDE_PART}](?:[-]|[^#{DOMAIN_EXCLUDE_PART}])*)?[^#{DOMAIN_EXCLUDE_PART}]/ - REGEXEN[:valid_domain] = /#{REGEXEN[:valid_subdomain]}*#{REGEXEN[:valid_domain_name]}\.(?:xn--[a-z0-9]{2,}|[a-z]{2,})(?::[0-9]+)?/i - - REGEXEN[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~|#{LATIN_ACCENTS}]/i - # Allow URL paths to contain balanced parens - # 1. Used in Wikipedia URLs like /Primer_(film) - # 2. Used in IIS sessions like /S(dfd346)/ - REGEXEN[:wikipedia_disambiguation] = /(?:\(#{REGEXEN[:valid_general_url_path_chars]}+\))/i - # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user - REGEXEN[:valid_url_path_chars] = /(?: - #{REGEXEN[:wikipedia_disambiguation]}| - @#{REGEXEN[:valid_general_url_path_chars]}+\/| - [\.,]#{REGEXEN[:valid_general_url_path_chars]}?| - #{REGEXEN[:valid_general_url_path_chars]}+ - )/ix - # Valid end-of-path chracters (so /foo. does not gobble the period). - # 1. Allow =&# for empty URL parameters and other URL-join artifacts - REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=_#\/\+\-#{LATIN_ACCENTS}]|#{REGEXEN[:wikipedia_disambiguation]}/io - REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i - REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#\/]/i - REGEXEN[:valid_url] = %r{ - ( # $1 total match - (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter - ( # $3 URL - (https?:\/\/) # $4 Protocol - (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number - (/ - (?: - #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}| # 1+ path chars and a valid last char - #{REGEXEN[:valid_url_path_chars]}+#{REGEXEN[:valid_url_path_ending_chars]}?| # Optional last char to handle /@foo/ case - #{REGEXEN[:valid_url_path_ending_chars]} # Just a # case - )? - )? # $6 URL Path and anchor - (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $7 Query String - ) - ) - }iox; - - # These URL validation pattern strings are based on the ABNF from RFC 3986 - REGEXEN[:validate_url_unreserved] = /[a-z0-9\-._~]/i - REGEXEN[:validate_url_pct_encoded] = /(?:%[0-9a-f]{2})/i - REGEXEN[:validate_url_sub_delims] = /[!$&'()*+,;=]/i - REGEXEN[:validate_url_pchar] = /(?: - #{REGEXEN[:validate_url_unreserved]}| - #{REGEXEN[:validate_url_pct_encoded]}| - #{REGEXEN[:validate_url_sub_delims]}| - :|@ - )/iox - - REGEXEN[:validate_url_scheme] = /(?:[a-z][a-z0-9+\-.]*)/i - REGEXEN[:validate_url_userinfo] = /(?: - #{REGEXEN[:validate_url_unreserved]}| - #{REGEXEN[:validate_url_pct_encoded]}| - #{REGEXEN[:validate_url_sub_delims]}| - : - )*/iox - - REGEXEN[:validate_url_dec_octet] = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i - REGEXEN[:validate_url_ipv4] = - /(?:#{REGEXEN[:validate_url_dec_octet]}(?:\.#{REGEXEN[:validate_url_dec_octet]}){3})/iox - - # Punting on real IPv6 validation for now - REGEXEN[:validate_url_ipv6] = /(?:\[[a-f0-9:\.]+\])/i - - # Also punting on IPvFuture for now - REGEXEN[:validate_url_ip] = /(?: - #{REGEXEN[:validate_url_ipv4]}| - #{REGEXEN[:validate_url_ipv6]} - )/iox - - # This is more strict than the rfc specifies - REGEXEN[:validate_url_subdomain_segment] = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i - REGEXEN[:validate_url_domain_segment] = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i - REGEXEN[:validate_url_domain_tld] = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i - REGEXEN[:validate_url_domain] = /(?:(?:#{REGEXEN[:validate_url_subdomain_segment]}\.)* - (?:#{REGEXEN[:validate_url_domain_segment]}\.) - #{REGEXEN[:validate_url_domain_tld]})/iox - - REGEXEN[:validate_url_host] = /(?: - #{REGEXEN[:validate_url_ip]}| - #{REGEXEN[:validate_url_domain]} - )/iox - - # Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences - REGEXEN[:validate_url_unicode_subdomain_segment] = - /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9_\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix - REGEXEN[:validate_url_unicode_domain_segment] = - /(?:(?:[a-z0-9]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix - REGEXEN[:validate_url_unicode_domain_tld] = - /(?:(?:[a-z]|[^\x00-\x7f])(?:(?:[a-z0-9\-]|[^\x00-\x7f])*(?:[a-z0-9]|[^\x00-\x7f]))?)/ix - REGEXEN[:validate_url_unicode_domain] = /(?:(?:#{REGEXEN[:validate_url_unicode_subdomain_segment]}\.)* - (?:#{REGEXEN[:validate_url_unicode_domain_segment]}\.) - #{REGEXEN[:validate_url_unicode_domain_tld]})/iox - - REGEXEN[:validate_url_unicode_host] = /(?: - #{REGEXEN[:validate_url_ip]}| - #{REGEXEN[:validate_url_unicode_domain]} - )/iox - - REGEXEN[:validate_url_port] = /[0-9]{1,5}/ - - REGEXEN[:validate_url_unicode_authority] = %r{ - (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo - (#{REGEXEN[:validate_url_unicode_host]}) # $2 host - (?::(#{REGEXEN[:validate_url_port]}))? # $3 port - }iox - - REGEXEN[:validate_url_authority] = %r{ - (?:(#{REGEXEN[:validate_url_userinfo]})@)? # $1 userinfo - (#{REGEXEN[:validate_url_host]}) # $2 host - (?::(#{REGEXEN[:validate_url_port]}))? # $3 port - }iox - - REGEXEN[:validate_url_path] = %r{(/#{REGEXEN[:validate_url_pchar]}*)*}i - REGEXEN[:validate_url_query] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i - REGEXEN[:validate_url_fragment] = %r{(#{REGEXEN[:validate_url_pchar]}|/|\?)*}i - - # Modified version of RFC 3986 Appendix B - REGEXEN[:validate_url_unencoded] = %r{ - \A # Full URL - (?: - ([^:/?#]+): # $1 Scheme - ) - (?:// - ([^/?#]*) # $2 Authority - ) - ([^?#]*) # $3 Path - (?: - \?([^#]*) # $4 Query - )? - (?: - \#(.*) # $5 Fragment - )?\Z - }ix - - REGEXEN.each_pair{|k,v| v.freeze } - - # Return the regular expression for a given key. If the key - # is not a known symbol a nil will be returned. - def self.[](key) - REGEXEN[key] - end - end -end diff --git a/lib/rewriter.rb b/lib/rewriter.rb deleted file mode 100644 index a4f8aab..0000000 --- a/lib/rewriter.rb +++ /dev/null @@ -1,63 +0,0 @@ -module Twitter - # A module provides base methods to rewrite usernames, lists, hashtags and URLs. - module Rewriter extend self - def rewrite(text, options = {}) - [:hashtags, :urls, :usernames_or_lists].inject(text) do |key| - send("rewrite_#{key}", text, &options[key]) if options[key] - end - end - - def rewrite_usernames_or_lists(text) - new_text = "" - - # this -1 flag allows strings ending in ">" to work - text.to_s.split(/[<>]/, -1).each_with_index do |chunk, index| - if index != 0 - new_text << ((index % 2 == 0) ? ">" : "<") - end - - if index % 4 != 0 - new_text << chunk - else - new_text << chunk.gsub(Twitter::Regex[:auto_link_usernames_or_lists]) do - before, at, user, slash_listname, after = $1, $2, $3, $4, $' - if slash_listname - # the link is a list - "#{before}#{yield(at, user, slash_listname)}" - else - if after =~ Twitter::Regex[:end_screen_name_match] - # Followed by something that means we don't autolink - "#{before}#{at}#{user}#{slash_listname}" - else - # this is a screen name - "#{before}#{yield(at, user, nil)}#{slash_listname}" - end - end - end - end - end - - new_text - end - - def rewrite_hashtags(text) - text.to_s.gsub(Twitter::Regex[:auto_link_hashtags]) do - before = $1 - hash = $2 - hashtag = $3 - "#{before}#{yield(hash, hashtag)}" - end - end - - def rewrite_urls(text) - text.to_s.gsub(Twitter::Regex[:valid_url]) do - all, before, url, protocol, domain, path, query_string = $1, $2, $3, $4, $5, $6, $7 - if protocol && !protocol.empty? - "#{before}#{yield(url)}" - else - all - end - end - end - end -end diff --git a/lib/twitter-text.rb b/lib/twitter-text.rb deleted file mode 100644 index bfd38af..0000000 --- a/lib/twitter-text.rb +++ /dev/null @@ -1,20 +0,0 @@ -major, minor, patch = RUBY_VERSION.split('.') - -$RUBY_1_9 = if major.to_i == 1 && minor.to_i < 9 - # Ruby 1.8 KCODE check. Not needed on 1.9 and later. - raise("twitter-text requires the $KCODE variable be set to 'UTF8' or 'u'") unless $KCODE[0].chr =~ /u/i - false -else - true -end - -require 'active_support' -require 'active_support/core_ext/string/multibyte.rb' - -require File.join(File.dirname(__FILE__), 'regex') -require File.join(File.dirname(__FILE__), 'rewriter') -require File.join(File.dirname(__FILE__), 'autolink') -require File.join(File.dirname(__FILE__), 'extractor') -require File.join(File.dirname(__FILE__), 'unicode') -require File.join(File.dirname(__FILE__), 'validation') -require File.join(File.dirname(__FILE__), 'hithighlighter') diff --git a/lib/unicode.rb b/lib/unicode.rb deleted file mode 100644 index b4b3422..0000000 --- a/lib/unicode.rb +++ /dev/null @@ -1,26 +0,0 @@ -module Twitter - # This module lazily defines constants of the form Uxxxx for all Unicode - # codepoints from U0000 to U10FFFF. The value of each constant is the - # UTF-8 string for the codepoint. - # Examples: - # copyright = Unicode::U00A9 - # euro = Unicode::U20AC - # infinity = Unicode::U221E - # - module Unicode - CODEPOINT_REGEX = /^U_?([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})$/ - - def self.const_missing(name) - # Check that the constant name is of the right form: U0000 to U10FFFF - if name.to_s =~ CODEPOINT_REGEX - # Convert the codepoint to an immutable UTF-8 string, - # define a real constant for that value and return the value - #p name, name.class - const_set(name, [$1.to_i(16)].pack("U").freeze) - else # Raise an error for constants that are not Unicode. - raise NameError, "Uninitialized constant: Unicode::#{name}" - end - end - end - -end diff --git a/lib/validation.rb b/lib/validation.rb deleted file mode 100644 index ee946c8..0000000 --- a/lib/validation.rb +++ /dev/null @@ -1,102 +0,0 @@ -module Twitter - module Validation extend self - MAX_LENGTH = 140 - - # Character not allowed in Tweets - INVALID_CHARACTERS = [ - 0xFFFE, 0xFEFF, # BOM - 0xFFFF, # Special - 0x202A, 0x202B, 0x202C, 0x202D, 0x202E # Directional change - ].map{|cp| [cp].pack('U') }.freeze - - # Returns the length of the string as it would be displayed. This is equivilent to the length of the Unicode NFC - # (See: http://www.unicode.org/reports/tr15). This is needed in order to consistently calculate the length of a - # string no matter which actual form was transmitted. For example: - # - # U+0065 Latin Small Letter E - # + U+0301 Combining Acute Accent - # ---------- - # = 2 bytes, 2 characters, displayed as é (1 visual glyph) - # … The NFC of {U+0065, U+0301} is {U+00E9}, which is a single chracter and a +display_length+ of 1 - # - # The string could also contain U+00E9 already, in which case the canonicalization will not change the value. - # - def tweet_length(text) - ActiveSupport::Multibyte::Chars.new(text).normalize(:c).length - end - - # Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation - # before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation - # will allow quicker feedback. - # - # Returns false if this text is valid. Otherwise one of the following Symbols will be returned: - # - # :too_long:: if the text is too long - # :empty:: if the text is nil or empty - # :invalid_characters:: if the text contains non-Unicode or any of the disallowed Unicode characters - def tweet_invalid?(text) - return :empty if !text || text.empty? - begin - return :too_long if tweet_length(text) > MAX_LENGTH - return :invalid_characters if INVALID_CHARACTERS.any?{|invalid_char| text.include?(invalid_char) } - rescue ArgumentError, ActiveSupport::Multibyte::EncodingError => e - # non-Unicode value. - return :invalid_characters - end - - return false - end - - def valid_tweet_text?(text) - !tweet_invalid?(text) - end - - def valid_username?(username) - return false if !username || username.empty? - - extracted = Twitter::Extractor.extract_mentioned_screen_names(username) - # Should extract the username minus the @ sign, hence the [1..-1] - extracted.size == 1 && extracted.first == username[1..-1] - end - - VALID_LIST_RE = /\A#{Twitter::Regex[:auto_link_usernames_or_lists]}\z/o - def valid_list?(username_list) - match = username_list.match(VALID_LIST_RE) - # Must have matched and had nothing before or after - !!(match && match[1] == "" && match[4] && !match[4].empty?) - end - - def valid_hashtag?(hashtag) - return false if !hashtag || hashtag.empty? - - extracted = Twitter::Extractor.extract_hashtags(hashtag) - # Should extract the hashtag minus the # sign, hence the [1..-1] - extracted.size == 1 && extracted.first == hashtag[1..-1] - end - - def valid_url?(url, unicode_domains=true) - return false if !url || url.empty? - - url_parts = url.match(Twitter::Regex[:validate_url_unencoded]) - return false unless (url_parts && url_parts.to_s == url) - - scheme, authority, path, query, fragment = url_parts.captures - - return false unless (valid_match?(scheme, Twitter::Regex[:validate_url_scheme]) && scheme.match(/\Ahttps?\Z/i) && - valid_match?(path, Twitter::Regex[:validate_url_path]) && - valid_match?(query, Twitter::Regex[:validate_url_query], true) && - valid_match?(fragment, Twitter::Regex[:validate_url_fragment], true)) - - return (unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_unicode_authority])) || - (!unicode_domains && valid_match?(authority, Twitter::Regex[:validate_url_authority])) - end - - private - - def valid_match?(string, regex, optional=false) - return (string && string.match(regex) && $~.to_s == string) unless optional - - !(string && (!string.match(regex) || $~.to_s != string)) - end - end -end diff --git a/script/destroy b/script/destroy deleted file mode 100755 index 40901a8..0000000 --- a/script/destroy +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby -APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) - -begin - require 'rubigen' -rescue LoadError - require 'rubygems' - require 'rubigen' -end -require 'rubigen/scripts/destroy' - -ARGV.shift if ['--help', '-h'].include?(ARGV[0]) -RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] -RubiGen::Scripts::Destroy.new.run(ARGV) diff --git a/script/generate b/script/generate deleted file mode 100755 index 5c8ed01..0000000 --- a/script/generate +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby -APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..')) - -begin - require 'rubigen' -rescue LoadError - require 'rubygems' - require 'rubigen' -end -require 'rubigen/scripts/generate' - -ARGV.shift if ['--help', '-h'].include?(ARGV[0]) -RubiGen::Base.use_component_sources! [:newgem_simple, :test_unit] -RubiGen::Scripts::Generate.new.run(ARGV) diff --git a/spec/autolinking_spec.rb b/spec/autolinking_spec.rb deleted file mode 100644 index bb88ed5..0000000 --- a/spec/autolinking_spec.rb +++ /dev/null @@ -1,620 +0,0 @@ -#encoding: UTF-8 -# require File.dirname(__FILE__) + '/spec_helper' -require 'spec_helper' - -class TestAutolink - include Twitter::Autolink -end - -describe Twitter::Autolink do - def original_text; end - def url; end - - describe "auto_link_custom" do - before do - @autolinked_text = TestAutolink.new.auto_link(original_text) if original_text - end - - describe "username autolinking" do - context "username preceded by a space" do - def original_text; "hello @jacob"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('jacob') - end - end - - context "username at beginning of line" do - def original_text; "@jacob you're cool"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('jacob') - end - end - - context "username preceded by word character" do - def original_text; "meet@the beach"; end - - it "should not be linked" do - Nokogiri::HTML(@autolinked_text).search('a').should be_empty - end - end - - context "username preceded by non-word character" do - def original_text; "great.@jacob"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('jacob') - end - end - - context "username containing non-word characters" do - def original_text; "@zach&^$%^"; end - - it "should not be linked" do - @autolinked_text.should link_to_screen_name('zach') - end - end - - context "username over twenty characters" do - def original_text - @twenty_character_username = "zach" * 5 - "@" + @twenty_character_username + "1" - end - - it "should not be linked" do - @autolinked_text.should link_to_screen_name(@twenty_character_username) - end - end - - context "username followed by japanese" do - def original_text; "@jacobの"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('jacob') - end - end - - context "username preceded by japanese" do - def original_text; "あ@matz"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('matz') - end - end - - context "username surrounded by japanese" do - def original_text; "あ@yoshimiの"; end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('yoshimi') - end - end - - context "username using full-width at-sign" do - def original_text - "#{[0xFF20].pack('U')}jacob" - end - - it "should be linked" do - @autolinked_text.should link_to_screen_name('jacob') - end - end - end - - describe "list path autolinking" do - - context "when List is not available" do - it "should not be linked" do - @autolinked_text = TestAutolink.new.auto_link_usernames_or_lists("hello @jacob/my-list", :suppress_lists => true) - @autolinked_text.should_not link_to_list_path('jacob/my-list') - @autolinked_text.should include('my-list') - end - end - - context "slug preceded by a space" do - def original_text; "hello @jacob/my-list"; end - - it "should be linked" do - @autolinked_text.should link_to_list_path('jacob/my-list') - end - end - - context "username followed by a slash but no list" do - def original_text; "hello @jacob/ my-list"; end - - it "should NOT be linked" do - @autolinked_text.should_not link_to_list_path('jacob/my-list') - @autolinked_text.should link_to_screen_name('jacob') - end - end - - context "empty username followed by a list" do - def original_text; "hello @/my-list"; end - - it "should NOT be linked" do - Nokogiri::HTML(@autolinked_text).search('a').should be_empty - end - end - - context "list slug at beginning of line" do - def original_text; "@jacob/my-list"; end - - it "should be linked" do - @autolinked_text.should link_to_list_path('jacob/my-list') - end - end - - context "username preceded by alpha-numeric character" do - def original_text; "meet@the/beach"; end - - it "should not be linked" do - Nokogiri::HTML(@autolinked_text).search('a').should be_empty - end - end - - context "username preceded by non-word character" do - def original_text; "great.@jacob/my-list"; end - - it "should be linked" do - @autolinked_text = TestAutolink.new.auto_link("great.@jacob/my-list") - @autolinked_text.should link_to_list_path('jacob/my-list') - end - end - - context "username containing non-word characters" do - def original_text; "@zach/test&^$%^"; end - - it "should be linked" do - @autolinked_text.should link_to_list_path('zach/test') - end - end - - context "username over twenty characters" do - def original_text - @twentyfive_character_list = "jack/" + ("a" * 25) - "@#{@twentyfive_character_list}12345" - end - - it "should be linked" do - @autolinked_text.should link_to_list_path(@twentyfive_character_list) - end - end - end - - describe "hashtag autolinking" do - context "with an all numeric hashtag" do - def original_text; "#123"; end - - it "should not be linked" do - @autolinked_text.should_not have_autolinked_hashtag('#123') - end - end - - context "with a hashtag with alphanumeric characters" do - def original_text; "#ab1d"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag('#ab1d') - end - end - - context "with a hashtag with underscores" do - def original_text; "#a_b_c_d"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag(original_text) - end - end - - context "with a hashtag that is preceded by a word character" do - def original_text; "ab#cd"; end - - it "should not be linked" do - @autolinked_text.should_not have_autolinked_hashtag(original_text) - end - end - - context "with a page anchor in a url" do - def original_text; "Here's my url: http://foobar.com/#home"; end - - it "should not link the hashtag" do - @autolinked_text.should_not have_autolinked_hashtag('#home') - end - - it "should link the url" do - @autolinked_text.should have_autolinked_url('http://foobar.com/#home') - end - end - - context "with a hashtag that starts with a number but has word characters" do - def original_text; "#2ab"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag(original_text) - end - end - - context "with multiple valid hashtags" do - def original_text; "I'm frickin' awesome #ab #cd #ef"; end - - it "links each hashtag" do - @autolinked_text.should have_autolinked_hashtag('#ab') - @autolinked_text.should have_autolinked_hashtag('#cd') - @autolinked_text.should have_autolinked_hashtag('#ef') - end - end - - context "with a hashtag preceded by a ." do - def original_text; "ok, great.#abc"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag('#abc') - end - end - - context "with a hashtag preceded by a &" do - def original_text; "&#nbsp;"; end - - it "should not be linked" do - @autolinked_text.should_not have_autolinked_hashtag('#nbsp;') - end - end - - context "with a hashtag that ends in an !" do - def original_text; "#great!"; end - - it "should be linked, but should not include the !" do - @autolinked_text.should have_autolinked_hashtag('#great') - end - end - - context "with a hashtag followed by Japanese" do - def original_text; "#twj_devの"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag('#twj_devの') - end - end - - context "with a hashtag preceded by a full-width space" do - def original_text; "#{[0x3000].pack('U')}#twj_dev"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag('#twj_dev') - end - end - - context "with a hashtag followed by a full-width space" do - def original_text; "#twj_dev#{[0x3000].pack('U')}"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_hashtag('#twj_dev') - end - end - - context "with a hashtag using full-width hash" do - def original_text; "#{[0xFF03].pack('U')}twj_dev"; end - - it "should be linked" do - link = Nokogiri::HTML(@autolinked_text).search('a') - (link.inner_text.respond_to?(:force_encoding) ? link.inner_text.force_encoding("utf-8") : link.inner_text).should == "#{[0xFF03].pack('U')}twj_dev" - link.first['href'].should == 'http://twitter.com/search?q=%23twj_dev' - end - end - - context "with a hashtag containing an accented latin character" do - def original_text - # the hashtag is #éhashtag - "##{[0x00e9].pack('U')}hashtag" - end - - it "should be linked" do - @autolinked_text.should == "#éhashtag" - end - end - - end - - describe "URL autolinking" do - def url; "http://www.google.com"; end - - context "when embedded in plain text" do - def original_text; "On my search engine #{url} I found good links."; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "when surrounded by Japanese;" do - def original_text; "いまなにしてる#{url}いまなにしてる"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "with a path surrounded by parentheses;" do - def original_text; "I found a neatness (#{url})"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - - context "when the URL ends with a slash;" do - def url; "http://www.google.com/"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "when the URL has a path;" do - def url; "http://www.google.com/fsdfasdf"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - end - - context "when path contains parens" do - def original_text; "I found a neatness (#{url})"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - - context "wikipedia" do - def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "IIS session" do - def url; "http://msdn.com/S(deadbeef)/page.htm"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "unbalanced parens" do - def url; "http://example.com/i_has_a_("; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url("http://example.com/i_has_a_") - end - end - - context "balanced parens with a double quote inside" do - def url; "http://foo.bar/foo_(\")_bar" end - - it "should be linked" do - @autolinked_text.should have_autolinked_url("http://foo.bar/foo_") - end - end - - context "balanced parens hiding XSS" do - def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end - - it "should be linked" do - @autolinked_text.should have_autolinked_url("http://x.xx/") - end - end - end - - context "when preceded by a :" do - def original_text; "Check this out @hoverbird:#{url}"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "with a URL ending in allowed punctuation" do - it "does not consume ending punctuation" do - matcher = TestAutolink.new - %w| ? ! , . : ; ] ) } = \ ' |.each do |char| - matcher.auto_link("#{url}#{char}").should have_autolinked_url(url) - end - end - end - - context "with a URL preceded in forbidden characters" do - it "should not be linked" do - matcher = TestAutolink.new - %w| \ ' / ! = |.each do |char| - matcher.auto_link("#{char}#{url}").should_not have_autolinked_url(url) - end - end - end - - context "when embedded in a link tag" do - def original_text; "#{url}"; end - - it "should be linked" do - @autolinked_text.should have_autolinked_url(url) - end - end - - context "with multiple URLs" do - def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end - - it "should autolink each one" do - @autolinked_text.should have_autolinked_url('http://www.links.org') - @autolinked_text.should have_autolinked_url('http://www.foo.org') - end - end - - context "with multiple URLs in different formats" do - def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end - - it "should autolink each one, in the proper order" do - @autolinked_text.should have_autolinked_url('http://foo.com') - @autolinked_text.should have_autolinked_url('https://bar.com') - @autolinked_text.should have_autolinked_url('http://mail.foobar.org') - end - end - - context "with a URL having a long TLD" do - def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end - - it "should autolink it" do - @autolinked_text.should have_autolinked_url('http://golem.mobi/0912/71607.html') - end - end - - context "with a url lacking the protocol" do - def original_text; "I like www.foobar.com dudes"; end - - it "does not link at all" do - link = Nokogiri::HTML(@autolinked_text).search('a') - link.should be_empty - end - end - - context "with a @ in a URL" do - context "with XSS attack" do - def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end - - it "should not allow XSS follwing @" do - @autolinked_text.should have_autolinked_url('http://x.xx/') - end - end - - context "with a username not followed by a /" do - def original_text; 'http://example.com/@foobar'; end - - it "should link small url and username" do - @autolinked_text.should have_autolinked_url('http://example.com/') - @autolinked_text.should link_to_screen_name('foobar') - end - end - - context "with a username followed by a /" do - def original_text; 'http://example.com/@foobar/'; end - - it "should not link the username but link full url" do - @autolinked_text.should have_autolinked_url('http://example.com/@foobar/') - @autolinked_text.should_not link_to_screen_name('foobar') - end - end - end - - context "regex engine quirks" do - context "does not spiral out of control on repeated periods" do - def original_text; "Test a ton of periods http://example.com/path.........................................."; end - - it "should autolink" do - @autolinked_text.should have_autolinked_url('http://example.com/path') - end - end - - context "does not spiral out of control on repeated dashes" do - def original_text; "Single char file ext http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"; end - - it "should autolink" do - @autolinked_text.should have_autolinked_url('http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188') - end - end - end - - end - - describe "Autolink all" do - before do - @linker = TestAutolink.new - end - - it "should allow url/hashtag overlap" do - auto_linked = @linker.auto_link("http://twitter.com/#search") - auto_linked.should have_autolinked_url('http://twitter.com/#search') - end - - it "should not add invalid option in HTML tags" do - auto_linked = @linker.auto_link("http://twitter.com/ is a URL, not a hashtag", :hashtag_class => 'hashtag_classname') - auto_linked.should have_autolinked_url('http://twitter.com/') - auto_linked.should_not include('hashtag_class') - auto_linked.should_not include('hashtag_classname') - end - - end - - end - - describe "autolinking options" do - it "should apply :url_class as a CSS class" do - linked = TestAutolink.new.auto_link("http://example.com/", :url_class => 'myclass') - linked.should have_autolinked_url('http://example.com/') - linked.should match(/myclass/) - end - - it "should add rel=nofollow by default" do - linked = TestAutolink.new.auto_link("http://example.com/") - linked.should have_autolinked_url('http://example.com/') - linked.should match(/nofollow/) - end - - it "should not add rel=nofollow when passed :suppress_no_follow" do - linked = TestAutolink.new.auto_link("http://example.com/", :suppress_no_follow => true) - linked.should have_autolinked_url('http://example.com/') - linked.should_not match(/nofollow/) - end - - it "should not add a target attribute by default" do - linked = TestAutolink.new.auto_link("http://example.com/") - linked.should have_autolinked_url('http://example.com/') - linked.should_not match(/target=/) - end - - it "should respect the :target option" do - linked = TestAutolink.new.auto_link("http://example.com/", :target => 'mywindow') - linked.should have_autolinked_url('http://example.com/') - linked.should match(/target="mywindow"/) - end - - it "should customize href by username_url_block option" do - linked = TestAutolink.new.auto_link("@test", :username_url_block => lambda{|a| "dummy"}) - linked.should have_autolinked_url('dummy', 'test') - end - - it "should customize href by list_url_block option" do - linked = TestAutolink.new.auto_link("@test/list", :list_url_block => lambda{|a| "dummy"}) - linked.should have_autolinked_url('dummy', 'test/list') - end - - it "should customize href by hashtag_url_block option" do - linked = TestAutolink.new.auto_link("#hashtag", :hashtag_url_block => lambda{|a| "dummy"}) - linked.should have_autolinked_url('dummy', '#hashtag') - end - - it "should customize href by link_url_block option" do - linked = TestAutolink.new.auto_link("http://example.com/", :link_url_block => lambda{|a| "dummy"}) - linked.should have_autolinked_url('dummy', 'http://example.com/') - end - end - - describe "html_escape" do - before do - @linker = TestAutolink.new - end - it "should escape html entities properly" do - @linker.html_escape("&").should == "&" - @linker.html_escape(">").should == ">" - @linker.html_escape("<").should == "<" - @linker.html_escape("\"").should == """ - @linker.html_escape("'").should == "'" - @linker.html_escape("&<>\"").should == "&<>"" - @linker.html_escape("
").should == "<div>" - @linker.html_escape("a&b").should == "a&b" - @linker.html_escape("twitter & friends").should == "<a href="http://twitter.com" target="_blank">twitter & friends</a>" - @linker.html_escape("&").should == "&amp;" - @linker.html_escape(nil).should == nil - end - end - -end diff --git a/spec/extractor_spec.rb b/spec/extractor_spec.rb deleted file mode 100644 index 3a9c8a5..0000000 --- a/spec/extractor_spec.rb +++ /dev/null @@ -1,299 +0,0 @@ -#encoding: UTF-8 -require File.dirname(__FILE__) + '/spec_helper' - -class TestExtractor - include Twitter::Extractor -end - -describe Twitter::Extractor do - before do - @extractor = TestExtractor.new - end - - describe "mentions" do - context "single screen name alone " do - it "should be linked" do - @extractor.extract_mentioned_screen_names("@alice").should == ["alice"] - end - - it "should be linked with _" do - @extractor.extract_mentioned_screen_names("@alice_adams").should == ["alice_adams"] - end - - it "should be linked if numeric" do - @extractor.extract_mentioned_screen_names("@1234").should == ["1234"] - end - end - - context "multiple screen names" do - it "should both be linked" do - @extractor.extract_mentioned_screen_names("@alice @bob").should == ["alice", "bob"] - end - end - - context "screen names embedded in text" do - it "should be linked in Latin text" do - @extractor.extract_mentioned_screen_names("waiting for @alice to arrive").should == ["alice"] - end - - it "should be linked in Japanese text" do - @extractor.extract_mentioned_screen_names("の@aliceに到着を待っている").should == ["alice"] - end - end - - it "should accept a block arugment and call it in order" do - needed = ["alice", "bob"] - @extractor.extract_mentioned_screen_names("@alice @bob") do |sn| - sn.should == needed.shift - end - needed.should == [] - end - end - - describe "mentions with indices" do - context "single screen name alone " do - it "should be linked and the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("@alice").should == [{:screen_name => "alice", :indices => [0, 6]}] - end - - it "should be linked with _ and the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("@alice_adams").should == [{:screen_name => "alice_adams", :indices => [0, 12]}] - end - - it "should be linked if numeric and the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("@1234").should == [{:screen_name => "1234", :indices => [0, 5]}] - end - end - - context "multiple screen names" do - it "should both be linked with the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("@alice @bob").should == - [{:screen_name => "alice", :indices => [0, 6]}, - {:screen_name => "bob", :indices => [7, 11]}] - end - - it "should be linked with the correct indices even when repeated" do - @extractor.extract_mentioned_screen_names_with_indices("@alice @alice @bob").should == - [{:screen_name => "alice", :indices => [0, 6]}, - {:screen_name => "alice", :indices => [7, 13]}, - {:screen_name => "bob", :indices => [14, 18]}] - end - end - - context "screen names embedded in text" do - it "should be linked in Latin text with the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("waiting for @alice to arrive").should == [{:screen_name => "alice", :indices => [12, 18]}] - end - - it "should be linked in Japanese text with the correct indices" do - @extractor.extract_mentioned_screen_names_with_indices("の@aliceに到着を待っている").should == [{:screen_name => "alice", :indices => [1, 7]}] - end - end - - it "should accept a block arugment and call it in order" do - needed = [{:screen_name => "alice", :indices => [0, 6]}, {:screen_name => "bob", :indices => [7, 11]}] - @extractor.extract_mentioned_screen_names_with_indices("@alice @bob") do |sn, start_index, end_index| - data = needed.shift - sn.should == data[:screen_name] - start_index.should == data[:indices].first - end_index.should == data[:indices].last - end - needed.should == [] - end - end - - describe "replies" do - context "should be extracted from" do - it "should extract from lone name" do - @extractor.extract_reply_screen_name("@alice").should == "alice" - end - - it "should extract from the start" do - @extractor.extract_reply_screen_name("@alice reply text").should == "alice" - end - - it "should extract preceded by a space" do - @extractor.extract_reply_screen_name(" @alice reply text").should == "alice" - end - - it "should extract preceded by a full-width space" do - @extractor.extract_reply_screen_name("#{[0x3000].pack('U')}@alice reply text").should == "alice" - end - end - - context "should not be extracted from" do - it "should not be extracted when preceded by text" do - @extractor.extract_reply_screen_name("reply @alice text").should == nil - end - - it "should not be extracted when preceded by puctuation" do - %w(. / _ - + # ! @).each do |punct| - @extractor.extract_reply_screen_name("#{punct}@alice text").should == nil - end - end - end - - context "should accept a block arugment" do - it "should call the block on match" do - @extractor.extract_reply_screen_name("@alice") do |sn| - sn.should == "alice" - end - end - - it "should not call the block on no match" do - calls = 0 - @extractor.extract_reply_screen_name("not a reply") do |sn| - calls += 1 - end - calls.should == 0 - end - end - end - - describe "urls" do - describe "matching URLS" do - TestUrls::VALID.each do |url| - it "should extract the URL #{url} and prefix it with a protocol if missing" do - @extractor.extract_urls(url).first.should include(url) - end - - it "should match the URL #{url} when it's embedded in other text" do - text = "Sweet url: #{url} I found. #awesome" - @extractor.extract_urls(text).first.should include(url) - end - end - end - - describe "invalid URLS" do - it "does not link urls with invalid domains" do - @extractor.extract_urls("http://tld-too-short.x").should == [] - end - end - end - - describe "urls with indices" do - describe "matching URLS" do - TestUrls::VALID.each do |url| - it "should extract the URL #{url} and prefix it with a protocol if missing" do - extracted_urls = @extractor.extract_urls_with_indices(url) - extracted_urls.size.should == 1 - extracted_url = extracted_urls.first - extracted_url[:url].should include(url) - extracted_url[:indices].first.should == 0 - extracted_url[:indices].last.should == url.chars.to_a.size - end - - it "should match the URL #{url} when it's embedded in other text" do - text = "Sweet url: #{url} I found. #awesome" - extracted_urls = @extractor.extract_urls_with_indices(text) - extracted_urls.size.should == 1 - extracted_url = extracted_urls.first - extracted_url[:url].should include(url) - extracted_url[:indices].first.should == 11 - extracted_url[:indices].last.should == 11 + url.chars.to_a.size - end - end - end - - describe "invalid URLS" do - it "does not link urls with invalid domains" do - @extractor.extract_urls_with_indices("http://tld-too-short.x").should == [] - end - end - end - - describe "hashtags" do - context "extracts latin/numeric hashtags" do - %w(text text123 123text).each do |hashtag| - it "should extract ##{hashtag}" do - @extractor.extract_hashtags("##{hashtag}").should == [hashtag] - end - - it "should extract ##{hashtag} within text" do - @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] - end - end - end - - context "international hashtags" do - context "should allow accents" do - %w(mañana café münchen).each do |hashtag| - it "should extract ##{hashtag}" do - @extractor.extract_hashtags("##{hashtag}").should == [hashtag] - end - - it "should extract ##{hashtag} within text" do - @extractor.extract_hashtags("pre-text ##{hashtag} post-text").should == [hashtag] - end - end - - it "should not allow the multiplication character" do - @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00D7}post").should == [] - end - - it "should not allow the division character" do - @extractor.extract_hashtags("#pre#{Twitter::Unicode::U00F7}post").should == [] - end - end - - end - - it "should not extract numeric hashtags" do - @extractor.extract_hashtags("#1234").should == [] - end - end - - describe "hashtags with indices" do - def match_hashtag_in_text(hashtag, text, offset = 0) - extracted_hashtags = @extractor.extract_hashtags_with_indices(text) - extracted_hashtags.size.should == 1 - extracted_hashtag = extracted_hashtags.first - extracted_hashtag[:hashtag].should == hashtag - extracted_hashtag[:indices].first.should == offset - extracted_hashtag[:indices].last.should == offset + hashtag.chars.to_a.size + 1 - end - - def not_match_hashtag_in_text(text) - extracted_hashtags = @extractor.extract_hashtags_with_indices(text) - extracted_hashtags.size.should == 0 - end - - context "extracts latin/numeric hashtags" do - %w(text text123 123text).each do |hashtag| - it "should extract ##{hashtag}" do - match_hashtag_in_text(hashtag, "##{hashtag}") - end - - it "should extract ##{hashtag} within text" do - match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) - end - end - end - - context "international hashtags" do - context "should allow accents" do - %w(mañana café münchen).each do |hashtag| - it "should extract ##{hashtag}" do - match_hashtag_in_text(hashtag, "##{hashtag}") - end - - it "should extract ##{hashtag} within text" do - match_hashtag_in_text(hashtag, "pre-text ##{hashtag} post-text", 9) - end - end - - it "should not allow the multiplication character" do - not_match_hashtag_in_text("#pre#{[0xd7].pack('U')}post") - end - - it "should not allow the division character" do - not_match_hashtag_in_text("#pre#{[0xf7].pack('U')}post") - end - end - end - - it "should not extract numeric hashtags" do - not_match_hashtag_in_text("#1234") - end - end -end \ No newline at end of file diff --git a/spec/hithighlighter_spec.rb b/spec/hithighlighter_spec.rb deleted file mode 100644 index 6156122..0000000 --- a/spec/hithighlighter_spec.rb +++ /dev/null @@ -1,92 +0,0 @@ -#encoding: UTF-8 -require File.dirname(__FILE__) + '/spec_helper' - -class TestHitHighlighter - include Twitter::HitHighlighter -end - -describe Twitter::HitHighlighter do - describe "highlight" do - before do - @highlighter = TestHitHighlighter.new - end - - context "with options" do - before do - @original = "Testing this hit highliter" - @hits = [[13,16]] - end - - it "should default to tags" do - @highlighter.hit_highlight(@original, @hits).should == "Testing this hit highliter" - end - - it "should allow tag override" do - @highlighter.hit_highlight(@original, @hits, :tag => 'b').should == "Testing this hit highliter" - end - end - - context "without links" do - before do - @original = "Hey! this is a test tweet" - end - - it "should return original when no hits are provided" do - @highlighter.hit_highlight(@original).should == @original - end - - it "should highlight one hit" do - @highlighter.hit_highlight(@original, hits = [[5, 9]]).should == "Hey! this is a test tweet" - end - - it "should highlight two hits" do - @highlighter.hit_highlight(@original, hits = [[5, 9], [15, 19]]).should == "Hey! this is a test tweet" - end - - it "should correctly highlight first-word hits" do - @highlighter.hit_highlight(@original, hits = [[0, 3]]).should == "Hey! this is a test tweet" - end - - it "should correctly highlight last-word hits" do - @highlighter.hit_highlight(@original, hits = [[20, 25]]).should == "Hey! this is a test tweet" - end - end - - context "with links" do - it "should highlight with a single link" do - @highlighter.hit_highlight("@bcherry this was a test tweet", [[9, 13]]).should == "@bcherry this was a test tweet" - end - - it "should highlight with link at the end" do - @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" - end - - it "should highlight with a link at the beginning" do - @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" - end - - it "should highlight an entire link" do - @highlighter.hit_highlight("test test test", [[5, 9]]).should == "test test test" - end - - it "should highlight within a link" do - @highlighter.hit_highlight("test test test", [[6, 8]]).should == "test test test" - end - - it "should highlight around a link" do - @highlighter.hit_highlight("test test test", [[3, 11]]).should == "test test test" - end - - it "should fail gracefully with bad hits" do - @highlighter.hit_highlight("test test", [[5, 20]]).should == "test test" - end - - it "should not mess up with touching tags" do - @highlighter.hit_highlight("foofoo", [[3,6]]).should == "foofoo" - end - - end - - end - -end diff --git a/spec/regex_spec.rb b/spec/regex_spec.rb deleted file mode 100644 index a519731..0000000 --- a/spec/regex_spec.rb +++ /dev/null @@ -1,38 +0,0 @@ -# encoding: utf-8 -require File.dirname(__FILE__) + '/spec_helper' - -describe "Twitter::Regex regular expressions" do - describe "matching URLS" do - TestUrls::VALID.each do |url| - it "should match the URL #{url}" do - url.should match_autolink_expression - end - - it "should match the URL #{url} when it's embedded in other text" do - text = "Sweet url: #{url} I found. #awesome" - url.should match_autolink_expression_in(text) - end - end - end - - describe "invalid URLS" do - it "does not link urls with invalid characters" do - TestUrls::INVALID.each {|url| url.should_not match_autolink_expression} - end - end - - describe "matching List names" do - it "should match if less than 25 characters" do - name = "Shuffleboard Community" - name.length.should < 25 - name.should match(Twitter::Regex::REGEXEN[:list_name]) - end - - it "should not match if greater than 25 characters" do - name = "Most Glorious Shady Meadows Shuffleboard Community" - name.length.should > 25 - name.should match(Twitter::Regex[:list_name]) - end - - end -end diff --git a/spec/rewriter_spec.rb b/spec/rewriter_spec.rb deleted file mode 100644 index b7f8a1b..0000000 --- a/spec/rewriter_spec.rb +++ /dev/null @@ -1,558 +0,0 @@ -# encoding: UTF-8 - -require 'spec_helper' - -describe Twitter::Rewriter do - def original_text; end - def url; end - - def block(*args) - if Array === @block_args - unless Array === @block_args.first - @block_args = [@block_args] - end - @block_args << args - else - @block_args = args - end - "[rewritten]" - end - - describe "rewrite usernames" do #{{{ - before do - @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) - end - - context "username preceded by a space" do - def original_text; "hello @jacob"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "hello [rewritten]" - end - end - - context "username at beginning of line" do - def original_text; "@jacob you're cool"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "[rewritten] you're cool" - end - end - - context "username preceded by word character" do - def original_text; "meet@the beach"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "meet@the beach" - end - end - - context "username preceded by non-word character" do - def original_text; "great.@jacob"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "great.[rewritten]" - end - end - - context "username containing non-word characters" do - def original_text; "@jacob&^$%^"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "[rewritten]&^$%^" - end - end - - context "username over twenty characters" do - def original_text - @twenty_character_username = "zach" * 5 - "@" + @twenty_character_username + "1" - end - - it "should be rewritten" do - @block_args.should == ["@", @twenty_character_username, nil] - @rewritten_text.should == "[rewritten]1" - end - end - - context "username followed by japanese" do - def original_text; "@jacobの"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "[rewritten]の" - end - end - - context "username preceded by japanese" do - def original_text; "あ@jacob"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "あ[rewritten]" - end - end - - context "username surrounded by japanese" do - def original_text; "あ@jacobの"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "あ[rewritten]の" - end - end - - context "username using full-width at-sign" do - def original_text - "#{[0xFF20].pack('U')}jacob" - end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "[rewritten]" - end - end - end #}}} - - describe "rewrite lists" do #{{{ - before do - @rewritten_text = Twitter::Rewriter.rewrite_usernames_or_lists(original_text, &method(:block)) - end - - context "slug preceded by a space" do - def original_text; "hello @jacob/my-list"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", "/my-list"] - @rewritten_text.should == "hello [rewritten]" - end - end - - context "username followed by a slash but no list" do - def original_text; "hello @jacob/ my-list"; end - - it "should not be rewritten" do - @block_args.should == ["@", "jacob", nil] - @rewritten_text.should == "hello [rewritten]/ my-list" - end - end - - context "empty username followed by a list" do - def original_text; "hello @/my-list"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "hello @/my-list" - end - end - - context "list slug at beginning of line" do - def original_text; "@jacob/my-list"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", "/my-list"] - @rewritten_text.should == "[rewritten]" - end - end - - context "username preceded by alpha-numeric character" do - def original_text; "meet@jacob/my-list"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "meet@jacob/my-list" - end - end - - context "username preceded by non-word character" do - def original_text; "great.@jacob/my-list"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", "/my-list"] - @rewritten_text.should == "great.[rewritten]" - end - end - - context "username containing non-word characters" do - def original_text; "@jacob/my-list&^$%^"; end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", "/my-list"] - @rewritten_text.should == "[rewritten]&^$%^" - end - end - - context "username over twenty characters" do - def original_text - @twentyfive_character_list = "a" * 25 - "@jacob/#{@twentyfive_character_list}12345" - end - - it "should be rewritten" do - @block_args.should == ["@", "jacob", "/#{@twentyfive_character_list}"] - @rewritten_text.should == "[rewritten]12345" - end - end - end #}}} - - describe "rewrite hashtags" do #{{{ - before do - @rewritten_text = Twitter::Rewriter.rewrite_hashtags(original_text, &method(:block)) - end - - context "with an all numeric hashtag" do - def original_text; "#123"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "#123" - end - end - - context "with a hashtag with alphanumeric characters" do - def original_text; "#ab1d"; end - - it "should be rewritten" do - @block_args.should == ["#", "ab1d"] - @rewritten_text.should == "[rewritten]" - end - end - - context "with a hashtag with underscores" do - def original_text; "#a_b_c_d"; end - - it "should be rewritten" do - @block_args.should == ["#", "a_b_c_d"] - @rewritten_text.should == "[rewritten]" - end - end - - context "with a hashtag that is preceded by a word character" do - def original_text; "ab#cd"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "ab#cd" - end - end - - context "with a page anchor in a url" do - def original_text; "Here's my url: http://foobar.com/#home"; end - - it "should not link the hashtag" do - @block_args.should be_nil - @rewritten_text.should == "Here's my url: http://foobar.com/#home" - end - end - - context "with a hashtag that starts with a number but has word characters" do - def original_text; "#2ab"; end - - it "should be rewritten" do - @block_args.should == ["#", "2ab"] - @rewritten_text.should == "[rewritten]" - end - end - - context "with multiple valid hashtags" do - def original_text; "I'm frickin' awesome #ab #cd #ef"; end - - it "rewrites each hashtag" do - @block_args.should == [["#", "ab"], ["#", "cd"], ["#", "ef"]] - @rewritten_text.should == "I'm frickin' awesome [rewritten] [rewritten] [rewritten]" - end - end - - context "with a hashtag preceded by a ." do - def original_text; "ok, great.#abc"; end - - it "should be rewritten" do - @block_args.should == ["#", "abc"] - @rewritten_text.should == "ok, great.[rewritten]" - end - end - - context "with a hashtag preceded by a &" do - def original_text; "&#nbsp;"; end - - it "should not be rewritten" do - @block_args.should be_nil - @rewritten_text.should == "&#nbsp;" - end - end - - context "with a hashtag that ends in an !" do - def original_text; "#great!"; end - - it "should be rewritten, but should not include the !" do - @block_args.should == ["#", "great"]; - @rewritten_text.should == "[rewritten]!" - end - end - - context "with a hashtag followed by Japanese" do - def original_text; "#twj_devの"; end - - it "should be rewritten" do - @block_args.should == ["#", "twj_devの"]; - @rewritten_text.should == "[rewritten]" - end - end - - context "with a hashtag preceded by a full-width space" do - def original_text; "#{[0x3000].pack('U')}#twj_dev"; end - - it "should be rewritten" do - @block_args.should == ["#", "twj_dev"]; - @rewritten_text.should == " [rewritten]" - end - end - - context "with a hashtag followed by a full-width space" do - def original_text; "#twj_dev#{[0x3000].pack('U')}"; end - - it "should be rewritten" do - @block_args.should == ["#", "twj_dev"]; - @rewritten_text.should == "[rewritten] " - end - end - - context "with a hashtag using full-width hash" do - def original_text; "#{[0xFF03].pack('U')}twj_dev"; end - - it "should be rewritten" do - @block_args.should == ["#", "twj_dev"]; - @rewritten_text.should == "[rewritten]" - end - end - - context "with a hashtag containing an accented latin character" do - def original_text - # the hashtag is #éhashtag - "##{[0x00e9].pack('U')}hashtag" - end - - it "should be rewritten" do - @block_args.should == ["#", "éhashtag"]; - @rewritten_text.should == "[rewritten]" - end - end - end #}}} - - describe "rewrite urls" do #{{{ - def url; "http://www.google.com"; end - - before do - @rewritten_text = Twitter::Rewriter.rewrite_urls(original_text, &method(:block)) - end - - context "when embedded in plain text" do - def original_text; "On my search engine #{url} I found good links."; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "On my search engine [rewritten] I found good links." - end - end - - context "when surrounded by Japanese;" do - def original_text; "いまなにしてる#{url}いまなにしてる"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "いまなにしてる[rewritten]いまなにしてる" - end - end - - context "with a path surrounded by parentheses;" do - def original_text; "I found a neatness (#{url})"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - - context "when the URL ends with a slash;" do - def url; "http://www.google.com/"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - end - - context "when the URL has a path;" do - def url; "http://www.google.com/fsdfasdf"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - end - end - - context "when path contains parens" do - def original_text; "I found a neatness (#{url})"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - - context "wikipedia" do - def url; "http://en.wikipedia.org/wiki/Madonna_(artist)"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - end - - context "IIS session" do - def url; "http://msdn.com/S(deadbeef)/page.htm"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "I found a neatness ([rewritten])" - end - end - - context "unbalanced parens" do - def url; "http://example.com/i_has_a_("; end - - it "should be rewritten" do - @block_args.should == ["http://example.com/i_has_a_"]; - @rewritten_text.should == "I found a neatness ([rewritten]()" - end - end - - context "balanced parens with a double quote inside" do - def url; "http://foo.bar/foo_(\")_bar" end - - it "should be rewritten" do - @block_args.should == ["http://foo.bar/foo_"]; - @rewritten_text.should == "I found a neatness ([rewritten](\")_bar)" - end - end - - context "balanced parens hiding XSS" do - def url; 'http://x.xx/("style="color:red"onmouseover="alert(1)' end - - it "should be rewritten" do - @block_args.should == ["http://x.xx/"]; - @rewritten_text.should == 'I found a neatness ([rewritten]("style="color:red"onmouseover="alert(1))' - end - end - end - - context "when preceded by a :" do - def original_text; "Check this out @hoverbird:#{url}"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "Check this out @hoverbird:[rewritten]" - end - end - - context "with a URL ending in allowed punctuation" do - it "does not consume ending punctuation" do - %w| ? ! , . : ; ] ) } = \ ' |.each do |char| - Twitter::Rewriter.rewrite_urls("#{url}#{char}") do |url| - url.should == url; "[rewritten]" - end.should == "[rewritten]#{char}" - end - end - end - - context "with a URL preceded in forbidden characters" do - it "should not be rewritten" do - %w| \ ' / ! = |.each do |char| - Twitter::Rewriter.rewrite_urls("#{char}#{url}") do |url| - "[rewritten]" # should not be called here. - end.should == "#{char}#{url}" - end - end - end - - context "when embedded in a link tag" do - def original_text; "#{url}"; end - - it "should be rewritten" do - @block_args.should == [url]; - @rewritten_text.should == "[rewritten]" - end - end - - context "with multiple URLs" do - def original_text; "http://www.links.org link at start of page, link at end http://www.foo.org"; end - - it "should autolink each one" do - @block_args.should == [["http://www.links.org"], ["http://www.foo.org"]]; - @rewritten_text.should == "[rewritten] link at start of page, link at end [rewritten]" - end - end - - context "with multiple URLs in different formats" do - def original_text; "http://foo.com https://bar.com http://mail.foobar.org"; end - - it "should autolink each one, in the proper order" do - @block_args.should == [["http://foo.com"], ["https://bar.com"], ["http://mail.foobar.org"]]; - @rewritten_text.should == "[rewritten] [rewritten] [rewritten]" - end - end - - context "with a URL having a long TLD" do - def original_text; "Yahoo integriert Facebook http://golem.mobi/0912/71607.html"; end - - it "should autolink it" do - @block_args.should == ["http://golem.mobi/0912/71607.html"] - @rewritten_text.should == "Yahoo integriert Facebook [rewritten]" - end - end - - context "with a url lacking the protocol" do - def original_text; "I like www.foobar.com dudes"; end - - it "does not link at all" do - @block_args.should be_nil - @rewritten_text.should == "I like www.foobar.com dudes" - end - end - - context "with a @ in a URL" do - context "with XSS attack" do - def original_text; 'http://x.xx/@"style="color:pink"onmouseover=alert(1)//'; end - - it "should not allow XSS follwing @" do - @block_args.should == ["http://x.xx/"] - @rewritten_text.should == '[rewritten]@"style="color:pink"onmouseover=alert(1)//' - end - end - - context "with a username not followed by a /" do - def original_text; "http://example.com/@foobar"; end - - it "should link small url and username" do - @block_args.should == ["http://example.com/"] - @rewritten_text.should == "[rewritten]@foobar" - end - end - - context "with a username followed by a /" do - def original_text; "http://example.com/@foobar/"; end - - it "should not link the username but link full url" do - @block_args.should == ["http://example.com/@foobar/"] - @rewritten_text.should == "[rewritten]" - end - end - end - end #}}} -end - -# vim: foldmethod=marker diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb deleted file mode 100644 index 40a371d..0000000 --- a/spec/spec_helper.rb +++ /dev/null @@ -1,107 +0,0 @@ -$TESTING=true - -# Ruby 1.8 encoding check -major, minor, patch = RUBY_VERSION.split('.') -if major.to_i == 1 && minor.to_i < 9 - $KCODE='u' -end - -$:.push File.join(File.dirname(__FILE__), '..', 'lib') - -require 'nokogiri' -require 'simplecov' -SimpleCov.start do - add_group 'Libraries', 'lib' -end - -require File.expand_path('../../lib/twitter-text', __FILE__) -require File.expand_path('../test_urls', __FILE__) - -RSpec.configure do |config| - config.include TestUrls -end - -RSpec::Matchers.define :match_autolink_expression do - match do |string| - Twitter::Regex[:valid_url].match(string) - end -end - -RSpec::Matchers.define :match_autolink_expression_in do |text| - match do |url| - @match_data = Twitter::Regex[:valid_url].match(text) - @match_data && @match_data.to_s.strip == url - end - - failure_message_for_should do |url| - "Expected to find url '#{url}' in text '#{text}', but the match was #{@match_data.captures}'" - end -end - -RSpec::Matchers.define :have_autolinked_url do |url, inner_text| - match do |text| - @link = Nokogiri::HTML(text).search("a[@href='#{url}']") - @link && - @link.inner_text && - (inner_text && @link.inner_text == inner_text) || (!inner_text && @link.inner_text == url) - end - - failure_message_for_should do |text| - "Expected url '#{url}'#{", inner_text '#{inner_text}'" if inner_text} to be autolinked in '#{text}'" - end -end - -RSpec::Matchers.define :link_to_screen_name do |screen_name| - match do |text| - @link = Nokogiri::HTML(text).search("a.username") - @link && @link.inner_text == screen_name && "http://twitter.com/#{screen_name}".downcase.should == @link.first['href'] - end - - failure_message_for_should do |text| - "expected link #{@link.inner_text} with href #{@link['href']} to match screen_name #{@screen_name}, but it does not" - end - - failure_message_for_should_not do |text| - "expected link #{@link.inner_text} with href #{@link['href']} not to match screen_name #{@screen_name}, but it does" - end - - description do - "contain a link with the name and href pointing to the expected screen_name" - end -end - -RSpec::Matchers.define :link_to_list_path do |list_path| - match do |text| - @link = Nokogiri::HTML(text).search("a.list-slug") - !@link.nil? && @link.inner_text == list_path && "http://twitter.com/#{list_path}".downcase.should == @link.first['href'] - end - - failure_message_for_should do |text| - "expected link #{@link.inner_text} with href #{@link['href']} to match the list path #{list_path}, but it does not" - end - - failure_message_for_should_not do |text| - "expected link #{@link.inner_text} with href #{@link['href']} not to match the list path #{@list_path}, but it does" - end - - description do - "contain a link with the list title and an href pointing to the list path" - end -end - -RSpec::Matchers.define :have_autolinked_hashtag do |hashtag| - match do |text| - @link = Nokogiri::HTML(text).search("a[@href='http://twitter.com/search?q=#{hashtag.sub(/^#/, '%23')}']") - @link && - @link.inner_text && - @link.inner_text == hashtag - end - - failure_message_for_should do |text| - if @link - "Expected link text to be [#{hashtag}], but it was [#{@link.inner_text}] in #{text}" - else - "Expected hashtag #{hashtag} to be autolinked in '#{text}', but no link was found." - end - end -end diff --git a/spec/test_urls.rb b/spec/test_urls.rb deleted file mode 100644 index e1234f1..0000000 --- a/spec/test_urls.rb +++ /dev/null @@ -1,49 +0,0 @@ -#encoding: UTF-8 -module TestUrls - VALID = [ - "http://google.com", - "http://foobar.com/#", - "http://google.com/#foo", - "http://google.com/#search?q=iphone%20-filter%3Alinks", - "http://twitter.com/#search?q=iphone%20-filter%3Alinks", - "http://somedomain.com/index.php?path=/abc/def/", - "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html", - "http://somehost.com:3000", - "http://xo.com/~matthew+%-x", - "http://en.wikipedia.org/wiki/Primer_(film)", - "http://www.ams.org/bookstore-getitem/item=mbk-59", - "http://chilp.it/?77e8fd", - "http://tell.me/why", - "http://longtlds.info", - "http://✪df.ws/ejp", - "http://日本.com", - "http://search.twitter.com/search?q=avro&lang=en", - "http://mrs.domain-dash.biz", - "http://x.com/has/one/char/domain", - "http://t.co/nwcLTFF", - "http://sub_domain-dash.twitter.com", - "http://a.b.cd", - "http://a_b.c-d.com", - "http://a-b.b.com", - "http://twitter-dash.com", - # "t.co/nwcLTFF" - ] unless defined?(TestUrls::VALID) - - INVALID = [ - "http://no-tld", - "http://tld-too-short.x", - "www.foobar.com", - "WWW.FOOBAR.COM", - "http://-doman_dash.com", - "http://_leadingunderscore.twitter.com", - "http://trailingunderscore_.twitter.com", - "http://-leadingdash.twitter.com", - "http://trailingdash-.twitter.com", - "http://-leadingdash.com", - "http://trailingdash-.com", - "http://no_underscores.com", - "http://test.c_o_m", - "http://test.c-o-m" - ] unless defined?(TestUrls::INVALID) - -end diff --git a/spec/twitter_text_spec.rb b/spec/twitter_text_spec.rb deleted file mode 100644 index 6c298e4..0000000 --- a/spec/twitter_text_spec.rb +++ /dev/null @@ -1,20 +0,0 @@ -require File.dirname(__FILE__) + '/spec_helper' - -major, minor, patch = RUBY_VERSION.split('.') -if major.to_i == 1 && minor.to_i < 9 - describe "base" do - before do - $KCODE = 'NONE' - end - - after do - $KCODE = 'u' - end - - it "should raise with invalid KCODE on Ruby < 1.9" do - lambda do - require 'twitter-text' - end.should raise_error - end - end -end diff --git a/spec/unicode_spec.rb b/spec/unicode_spec.rb deleted file mode 100644 index 8705b5d..0000000 --- a/spec/unicode_spec.rb +++ /dev/null @@ -1,31 +0,0 @@ -#encoding: UTF-8 -require File.dirname(__FILE__) + '/spec_helper' - -describe Twitter::Unicode do - - it "should lazy-init constants" do - Twitter::Unicode.const_defined?(:UFEB6).should == false - Twitter::Unicode::UFEB6.should_not be_nil - Twitter::Unicode::UFEB6.should be_kind_of(String) - Twitter::Unicode.const_defined?(:UFEB6).should == true - end - - it "should return corresponding character" do - Twitter::Unicode::UFEB6.should == [0xfeb6].pack('U') - end - - it "should allow lowercase notation" do - Twitter::Unicode::Ufeb6.should == Twitter::Unicode::UFEB6 - Twitter::Unicode::Ufeb6.should === Twitter::Unicode::UFEB6 - end - - it "should allow underscore notation" do - Twitter::Unicode::U_FEB6.should == Twitter::Unicode::UFEB6 - Twitter::Unicode::U_FEB6.should === Twitter::Unicode::UFEB6 - end - - it "should raise on invalid codepoints" do - lambda { Twitter::Unicode::FFFFFF }.should raise_error(NameError) - end - -end \ No newline at end of file diff --git a/spec/validation_spec.rb b/spec/validation_spec.rb deleted file mode 100644 index a87dc77..0000000 --- a/spec/validation_spec.rb +++ /dev/null @@ -1,43 +0,0 @@ -#encoding: BINARY -require File.dirname(__FILE__) + '/spec_helper' - -class TestValidation - include Twitter::Validation -end - -describe Twitter::Validation do - - it "should disallow invalid BOM character" do - TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFE}").should == :invalid_characters - TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFEFF}").should == :invalid_characters - end - - it "should disallow invalid U+FFFF character" do - TestValidation.new.tweet_invalid?("Bom:#{Twitter::Unicode::UFFFF}").should == :invalid_characters - end - - it "should disallow direction change characters" do - [0x202A, 0x202B, 0x202C, 0x202D, 0x202E].map{|cp| [cp].pack('U') }.each do |char| - TestValidation.new.tweet_invalid?("Invalid:#{char}").should == :invalid_characters - end - end - - it "should disallow non-Unicode" do - TestValidation.new.tweet_invalid?("not-Unicode:\xfff0").should == :invalid_characters - end - - it "should allow <= 140 combined accent characters" do - char = [0x65, 0x0301].pack('U') - TestValidation.new.tweet_invalid?(char * 139).should == false - TestValidation.new.tweet_invalid?(char * 140).should == false - TestValidation.new.tweet_invalid?(char * 141).should == :too_long - end - - it "should allow <= 140 multi-byte characters" do - char = [ 0x1d106 ].pack('U') - TestValidation.new.tweet_invalid?(char * 139).should == false - TestValidation.new.tweet_invalid?(char * 140).should == false - TestValidation.new.tweet_invalid?(char * 141).should == :too_long - end - -end \ No newline at end of file diff --git a/test/conformance_test.rb b/test/conformance_test.rb deleted file mode 100644 index 069dc1f..0000000 --- a/test/conformance_test.rb +++ /dev/null @@ -1,176 +0,0 @@ -require 'test/unit' -require 'yaml' - -# Ruby 1.8 encoding check -major, minor, patch = RUBY_VERSION.split('.') -if major.to_i == 1 && minor.to_i < 9 - $KCODE='u' -end - -require File.expand_path(File.dirname(__FILE__) + '/../lib/twitter-text') - -class ConformanceTest < Test::Unit::TestCase - include Twitter::Extractor - include Twitter::Autolink - include Twitter::HitHighlighter - include Twitter::Validation - - def setup - @conformance_dir = ENV['CONFORMANCE_DIR'] || File.join(File.dirname(__FILE__), 'twitter-text-conformance') - end - - module ExtractorConformance - def test_replies_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :replies) do |description, expected, input| - assert_equal expected, extract_reply_screen_name(input), description - end - end - - def test_mentions_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :mentions) do |description, expected, input| - assert_equal expected, extract_mentioned_screen_names(input), description - end - end - - def test_mentions_with_indices_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :mentions_with_indices) do |description, expected, input| - expected = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } - assert_equal expected, extract_mentioned_screen_names_with_indices(input), description - end - end - - def test_mentions_or_lists_with_indices_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :mentions_or_lists_with_indices) do |description, expected, input| - expected = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } - assert_equal expected, extract_mentions_or_lists_with_indices(input), description - end - end - - def test_url_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls) do |description, expected, input| - assert_equal expected, extract_urls(input), description - expected.each do |expected_url| - assert_equal true, valid_url?(expected_url), "expected url [#{expected_url}] not valid" - end - end - end - - def test_urls_with_indices_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :urls_with_indices) do |description, expected, input| - expected = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } - assert_equal expected, extract_urls_with_indices(input), description - end - end - - def test_hashtag_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :hashtags) do |description, expected, input| - assert_equal expected, extract_hashtags(input), description - end - end - - def test_hashtags_with_indices_extractor_conformance - run_conformance_test(File.join(@conformance_dir, 'extract.yml'), :hashtags_with_indices) do |description, expected, input| - expected = expected.map{|elem| elem.inject({}){|h, (k,v)| h[k.to_sym] = v; h} } - assert_equal expected, extract_hashtags_with_indices(input), description - end - end - end - include ExtractorConformance - - module AutolinkConformance - def test_users_autolink_conformance - run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :usernames) do |description, expected, input| - assert_equal expected, auto_link_usernames_or_lists(input, :suppress_no_follow => true), description - end - end - - def test_lists_autolink_conformance - run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :lists) do |description, expected, input| - assert_equal expected, auto_link_usernames_or_lists(input, :suppress_no_follow => true), description - end - end - - def test_urls_autolink_conformance - run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :urls) do |description, expected, input| - assert_equal expected, auto_link_urls_custom(input, :suppress_no_follow => true), description - end - end - - def test_hashtags_autolink_conformance - run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :hashtags) do |description, expected, input| - assert_equal expected, auto_link_hashtags(input, :suppress_no_follow => true), description - end - end - - def test_all_autolink_conformance - run_conformance_test(File.join(@conformance_dir, 'autolink.yml'), :all) do |description, expected, input| - assert_equal expected, auto_link(input, :suppress_no_follow => true), description - end - end - end - include AutolinkConformance - - module HitHighlighterConformance - - def test_plain_text_conformance - run_conformance_test(File.join(@conformance_dir, 'hit_highlighting.yml'), :plain_text, true) do |config| - assert_equal config['expected'], hit_highlight(config['text'], config['hits']), config['description'] - end - end - - def test_with_links_conformance - run_conformance_test(File.join(@conformance_dir, 'hit_highlighting.yml'), :with_links, true) do |config| - assert_equal config['expected'], hit_highlight(config['text'], config['hits']), config['description'] - end - end - end - include HitHighlighterConformance - - module ValidationConformance - def test_tweet_validation_conformance - run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :tweets) do |description, expected, input| - assert_equal expected, valid_tweet_text?(input), description - end - end - - def test_users_validation_conformance - run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :usernames) do |description, expected, input| - assert_equal expected, valid_username?(input), description - end - end - - def test_lists_validation_conformance - run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :lists) do |description, expected, input| - assert_equal expected, valid_list?(input), description - end - end - - def test_urls_validation_conformance - run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :urls) do |description, expected, input| - assert_equal expected, valid_url?(input), description - end - end - - def test_hashtags_validation_conformance - run_conformance_test(File.join(@conformance_dir, 'validate.yml'), :hashtags) do |description, expected, input| - assert_equal expected, valid_hashtag?(input), description - end - end - end - include ValidationConformance - - private - - def run_conformance_test(file, test_type, hash_config = false, &block) - yaml = YAML.load_file(file) - assert yaml["tests"][test_type.to_s], "No such test suite: #{test_type.to_s}" - - yaml["tests"][test_type.to_s].each do |test_info| - if hash_config - yield test_info - else - yield test_info['description'], test_info['expected'], test_info['text'] - end - end - end -end diff --git a/test/twitter-text-conformance b/test/twitter-text-conformance deleted file mode 160000 index b186e84..0000000 --- a/test/twitter-text-conformance +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b186e84e29cfa66829bc2feb39db8907a3c66db3 diff --git a/twitter-text.gemspec b/twitter-text.gemspec deleted file mode 100644 index 68d466f..0000000 --- a/twitter-text.gemspec +++ /dev/null @@ -1,25 +0,0 @@ -spec = Gem::Specification.new do |s| - s.name = "twitter-text" - s.version = "1.4.9" - s.authors = ["Matt Sanford", "Patrick Ewing", "Ben Cherry", "Britt Selvitelle", - "Raffi Krikorian", "J.P. Cummins", "Yoshimasa Niwa"] - s.email = ["matt@twitter.com", "patrick.henry.ewing@gmail.com", "bcherry@gmail.com", "bs@brittspace.com", - "raffi@twitter.com", "jcummins@twitter.com", "niw@niw.at"] - s.homepage = "http://twitter.com" - s.description = s.summary = "A gem that provides text handling for Twitter" - - s.platform = Gem::Platform::RUBY - s.has_rdoc = true - s.summary = "Twitter text handling library" - - s.add_development_dependency "nokogiri" - s.add_development_dependency "rake" - s.add_development_dependency "rspec" - s.add_development_dependency "simplecov" - s.add_runtime_dependency "activesupport" - - s.files = `git ls-files`.split("\n") - s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") - s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } - s.require_paths = ["lib"] -end